Merge branch 'main' into memory_usage3

This commit is contained in:
Spencer
2026-04-15 16:15:59 -04:00
committed by GitHub
24 changed files with 2246 additions and 748 deletions
@@ -11,12 +11,12 @@ exports[`Initial Theme Selection > should default to a dark theme when terminal
│ ● 4. Default Dark (Matches terminal) │ 3 a, b = 0, 1 │ │
│ 5. Dracula Dark │ 4 for _ in range(n): │ │
│ 6. GitHub Dark │ 5 a, b = b, a + b │ │
│ 7. Holiday Dark │ 6 return a │ │
│ 8. Shades Of Purple Dark │ │ │
│ 9. Solarized Dark │ 1 - print("Hello, " + name) │ │
│ 10. Tokyo Night Dark │ 1 + print(f"Hello, {name}!") │ │
│ 11. ANSI Light │ │ │
│ 12. Ayu Light └─────────────────────────────────────────────────┘ │
│ 7. GitHub Dark Colorblind Dark │ 6 return a │ │
│ 8. Holiday Dark │ │ │
│ 9. Shades Of Purple Dark │ 1 - print("Hello, " + name) │ │
│ 10. Solarized Dark │ 1 + print(f"Hello, {name}!") │ │
│ 11. Tokyo Night Dark │ │ │
│ 12. ANSI Light └─────────────────────────────────────────────────┘ │
│ ▼ │
│ │
│ (Use Enter to select, Tab to configure scope, Esc to close) │
@@ -34,14 +34,14 @@ exports[`Initial Theme Selection > should default to a light theme when terminal
│ 2. Ayu Light │ 1 # function │ │
│ ● 3. Default Light │ 2 def fibonacci(n): │ │
│ 4. GitHub Light │ 3 a, b = 0, 1 │ │
│ 5. Google Code Light │ 4 for _ in range(n): │ │
│ 6. Solarized Light │ 5 a, b = b, a + b │ │
│ 7. Xcode Light │ 6 return a │ │
│ 8. ANSI Dark (Incompatible) │ │ │
│ 9. Atom One Dark (Incompatible) │ 1 - print("Hello, " + name) │ │
│ 10. Ayu Dark (Incompatible) │ 1 + print(f"Hello, {name}!") │ │
│ 11. Default Dark (Incompatible) │ │ │
│ 12. Dracula Dark (Incompatible) └─────────────────────────────────────────────────┘ │
│ 5. GitHub Light Colorblind Light (Mat… │ 4 for _ in range(n): │ │
│ 6. Google Code Light │ 5 a, b = b, a + b │ │
│ 7. Solarized Light │ 6 return a │ │
│ 8. Xcode Light │ │ │
│ 9. ANSI Dark (Incompatible) │ 1 - print("Hello, " + name) │ │
│ 10. Atom One Dark (Incompatible) │ 1 + print(f"Hello, {name}!") │ │
│ 11. Ayu Dark (Incompatible) │ │ │
│ 12. Default Dark (Incompatible) └─────────────────────────────────────────────────┘ │
│ ▼ │
│ │
│ (Use Enter to select, Tab to configure scope, Esc to close) │
@@ -61,12 +61,12 @@ exports[`Initial Theme Selection > should use the theme from settings even if te
│ 4. Default Dark (Matches terminal) │ 3 a, b = 0, 1 │ │
│ 5. Dracula Dark │ 4 for _ in range(n): │ │
│ 6. GitHub Dark │ 5 a, b = b, a + b │ │
│ 7. Holiday Dark │ 6 return a │ │
│ 8. Shades Of Purple Dark │ │ │
│ 9. Solarized Dark │ 1 - print("Hello, " + name) │ │
│ 10. Tokyo Night Dark │ 1 + print(f"Hello, {name}!") │ │
│ 11. ANSI Light │ │ │
│ 12. Ayu Light └─────────────────────────────────────────────────┘ │
│ 7. GitHub Dark Colorblind Dark │ 6 return a │ │
│ 8. Holiday Dark │ │ │
│ 9. Shades Of Purple Dark │ 1 - print("Hello, " + name) │ │
│ 10. Solarized Dark │ 1 + print(f"Hello, {name}!") │ │
│ 11. Tokyo Night Dark │ │ │
│ 12. ANSI Light └─────────────────────────────────────────────────┘ │
│ ▼ │
│ │
│ (Use Enter to select, Tab to configure scope, Esc to close) │
@@ -100,12 +100,12 @@ exports[`ThemeDialog Snapshots > should render correctly in theme selection mode
│ 4. Default Dark │ 3 a, b = 0, 1 │ │
│ 5. Dracula Dark │ 4 for _ in range(n): │ │
│ 6. GitHub Dark │ 5 a, b = b, a + b │ │
│ 7. Holiday Dark │ 6 return a │ │
│ 8. Shades Of Purple Dark │ │ │
│ 9. Solarized Dark │ 1 - print("Hello, " + name) │ │
│ 10. Tokyo Night Dark │ 1 + print(f"Hello, {name}!") │ │
│ 11. ANSI Light │ │ │
│ 12. Ayu Light └─────────────────────────────────────────────────┘ │
│ 7. GitHub Dark Colorblind Dark │ 6 return a │ │
│ 8. Holiday Dark │ │ │
│ 9. Shades Of Purple Dark │ 1 - print("Hello, " + name) │ │
│ 10. Solarized Dark │ 1 + print(f"Hello, {name}!") │ │
│ 11. Tokyo Night Dark │ │ │
│ 12. ANSI Light └─────────────────────────────────────────────────┘ │
│ ▼ │
│ │
│ (Use Enter to select, Tab to configure scope, Esc to close) │
@@ -125,12 +125,12 @@ exports[`ThemeDialog Snapshots > should render correctly in theme selection mode
│ 4. Default Dark │ 3 a, b = 0, 1 │ │
│ 5. Dracula Dark │ 4 for _ in range(n): │ │
│ 6. GitHub Dark │ 5 a, b = b, a + b │ │
│ 7. Holiday Dark │ 6 return a │ │
│ 8. Shades Of Purple Dark │ │ │
│ 9. Solarized Dark │ 1 - print("Hello, " + name) │ │
│ 10. Tokyo Night Dark │ 1 + print(f"Hello, {name}!") │ │
│ 11. ANSI Light │ │ │
│ 12. Ayu Light └─────────────────────────────────────────────────┘ │
│ 7. GitHub Dark Colorblind Dark │ 6 return a │ │
│ 8. Holiday Dark │ │ │
│ 9. Shades Of Purple Dark │ 1 - print("Hello, " + name) │ │
│ 10. Solarized Dark │ 1 + print(f"Hello, {name}!") │ │
│ 11. Tokyo Night Dark │ │ │
│ 12. ANSI Light └─────────────────────────────────────────────────┘ │
│ ▼ │
│ ╭─────────────────────────────────────────────────╮ │
│ │ DEVELOPER TOOLS (Not visible to users) │ │
@@ -858,11 +858,81 @@ describe('useSlashCommandProcessor', () => {
});
describe('Lifecycle', () => {
it('removes the IDE status listener on unmount after async initialization', async () => {
let resolveIdeClient:
| ((client: {
addStatusChangeListener: (listener: () => void) => void;
removeStatusChangeListener: (listener: () => void) => void;
}) => void)
| undefined;
const addStatusChangeListener = vi.fn();
const removeStatusChangeListener = vi.fn();
mockIdeClientGetInstance.mockImplementation(
() =>
new Promise((resolve) => {
resolveIdeClient = resolve;
}),
);
const result = await setupProcessorHook();
await act(async () => {
resolveIdeClient?.({
addStatusChangeListener,
removeStatusChangeListener,
});
});
result.unmount();
unmountHook = undefined;
expect(addStatusChangeListener).toHaveBeenCalledTimes(1);
expect(removeStatusChangeListener).toHaveBeenCalledTimes(1);
expect(removeStatusChangeListener).toHaveBeenCalledWith(
addStatusChangeListener.mock.calls[0]?.[0],
);
});
it('does not register an IDE status listener if unmounted before async initialization resolves', async () => {
let resolveIdeClient:
| ((client: {
addStatusChangeListener: (listener: () => void) => void;
removeStatusChangeListener: (listener: () => void) => void;
}) => void)
| undefined;
const addStatusChangeListener = vi.fn();
const removeStatusChangeListener = vi.fn();
mockIdeClientGetInstance.mockImplementation(
() =>
new Promise((resolve) => {
resolveIdeClient = resolve;
}),
);
const result = await setupProcessorHook();
result.unmount();
unmountHook = undefined;
await act(async () => {
resolveIdeClient?.({
addStatusChangeListener,
removeStatusChangeListener,
});
});
expect(addStatusChangeListener).not.toHaveBeenCalled();
expect(removeStatusChangeListener).not.toHaveBeenCalled();
});
it('should abort command loading when the hook unmounts', async () => {
const abortSpy = vi.spyOn(AbortController.prototype, 'abort');
const { unmount } = await setupProcessorHook();
unmount();
unmountHook = undefined;
expect(abortSpy).toHaveBeenCalledTimes(1);
});
@@ -281,10 +281,16 @@ export const useSlashCommandProcessor = (
const listener = () => {
reloadCommands();
};
let isActive = true;
let activeIdeClient: IdeClient | undefined;
// eslint-disable-next-line @typescript-eslint/no-floating-promises
(async () => {
const ideClient = await IdeClient.getInstance();
if (!isActive) {
return;
}
activeIdeClient = ideClient;
ideClient.addStatusChangeListener(listener);
})();
@@ -307,11 +313,8 @@ export const useSlashCommandProcessor = (
coreEvents.on('extensionsStopping', extensionEventListener);
return () => {
// eslint-disable-next-line @typescript-eslint/no-floating-promises
(async () => {
const ideClient = await IdeClient.getInstance();
ideClient.removeStatusChangeListener(listener);
})();
isActive = false;
activeIdeClient?.removeStatusChangeListener(listener);
removeMCPStatusChangeListener(listener);
coreEvents.off('extensionsStarting', extensionEventListener);
coreEvents.off('extensionsStopping', extensionEventListener);
@@ -0,0 +1,147 @@
/**
* @license
* Copyright 2025 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import { type ColorsTheme, Theme } from '../../theme.js';
import { interpolateColor } from '../../color-utils.js';
const githubDarkColorblindColors: ColorsTheme = {
type: 'dark',
Background: '#0d1117',
Foreground: '#e6edf3',
LightBlue: '#a5d6ff',
AccentBlue: '#79c0ff',
AccentPurple: '#d2a8ff',
AccentCyan: '#a5d6ff',
AccentGreen: '#a5d6ff',
AccentYellow: '#d29922',
AccentRed: '#f0883e',
DiffAdded: '#0d161f',
DiffRemoved: '#1d150e',
Comment: '#7d8590',
Gray: '#7d8590',
DarkGray: interpolateColor('#7d8590', '#0d1117', 0.5),
GradientColors: ['#58a6ff', '#f0883e'],
};
export const GitHubDarkColorblind: Theme = new Theme(
'GitHub Dark Colorblind',
'dark',
{
hljs: {
display: 'block',
overflowX: 'auto',
padding: '0.5em',
color: githubDarkColorblindColors.Foreground,
background: githubDarkColorblindColors.Background,
},
'hljs-comment': {
color: githubDarkColorblindColors.Comment,
fontStyle: 'italic',
},
'hljs-quote': {
color: githubDarkColorblindColors.Comment,
fontStyle: 'italic',
},
'hljs-keyword': {
color: githubDarkColorblindColors.AccentRed,
fontWeight: 'bold',
},
'hljs-selector-tag': {
color: githubDarkColorblindColors.AccentRed,
fontWeight: 'bold',
},
'hljs-subst': {
color: githubDarkColorblindColors.Foreground,
},
'hljs-number': {
color: githubDarkColorblindColors.LightBlue,
},
'hljs-literal': {
color: githubDarkColorblindColors.LightBlue,
},
'hljs-variable': {
color: githubDarkColorblindColors.Foreground,
},
'hljs-template-variable': {
color: githubDarkColorblindColors.Foreground,
},
'hljs-tag .hljs-attr': {
color: githubDarkColorblindColors.AccentYellow,
},
'hljs-string': {
color: githubDarkColorblindColors.AccentCyan,
},
'hljs-doctag': {
color: githubDarkColorblindColors.AccentCyan,
},
'hljs-title': {
color: githubDarkColorblindColors.AccentPurple,
fontWeight: 'bold',
},
'hljs-section': {
color: githubDarkColorblindColors.AccentPurple,
fontWeight: 'bold',
},
'hljs-selector-id': {
color: githubDarkColorblindColors.AccentPurple,
fontWeight: 'bold',
},
'hljs-type': {
color: githubDarkColorblindColors.AccentGreen,
fontWeight: 'bold',
},
'hljs-class .hljs-title': {
color: githubDarkColorblindColors.AccentGreen,
fontWeight: 'bold',
},
'hljs-tag': {
color: githubDarkColorblindColors.AccentGreen,
},
'hljs-name': {
color: githubDarkColorblindColors.AccentGreen,
},
'hljs-attribute': {
color: githubDarkColorblindColors.LightBlue,
},
'hljs-regexp': {
color: githubDarkColorblindColors.AccentCyan,
},
'hljs-link': {
color: githubDarkColorblindColors.AccentCyan,
},
'hljs-symbol': {
color: githubDarkColorblindColors.AccentPurple,
},
'hljs-bullet': {
color: githubDarkColorblindColors.AccentPurple,
},
'hljs-built_in': {
color: githubDarkColorblindColors.LightBlue,
},
'hljs-builtin-name': {
color: githubDarkColorblindColors.LightBlue,
},
'hljs-meta': {
color: githubDarkColorblindColors.LightBlue,
fontWeight: 'bold',
},
'hljs-deletion': {
background: '#682d0f',
color: githubDarkColorblindColors.AccentRed,
},
'hljs-addition': {
background: '#0c2d6b',
color: githubDarkColorblindColors.AccentGreen,
},
'hljs-emphasis': {
fontStyle: 'italic',
},
'hljs-strong': {
fontWeight: 'bold',
},
},
githubDarkColorblindColors,
);
@@ -0,0 +1,147 @@
/**
* @license
* Copyright 2025 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import { type ColorsTheme, Theme } from '../../theme.js';
import { interpolateColor } from '../../color-utils.js';
const githubLightColorblindColors: ColorsTheme = {
type: 'light',
Background: '#ffffff',
Foreground: '#1f2328',
LightBlue: '#0a3069',
AccentBlue: '#0550ae',
AccentPurple: '#8250df',
AccentCyan: '#0a3069',
AccentGreen: '#0969da',
AccentYellow: '#9a6700',
AccentRed: '#bc4c00',
DiffAdded: '#ddf4ff',
DiffRemoved: '#fff1e5',
Comment: '#656d76',
Gray: '#656d76',
DarkGray: interpolateColor('#656d76', '#ffffff', 0.5),
GradientColors: ['#0969da', '#bc4c00'],
};
export const GitHubLightColorblind: Theme = new Theme(
'GitHub Light Colorblind',
'light',
{
hljs: {
display: 'block',
overflowX: 'auto',
padding: '0.5em',
color: githubLightColorblindColors.Foreground,
background: githubLightColorblindColors.Background,
},
'hljs-comment': {
color: githubLightColorblindColors.Comment,
fontStyle: 'italic',
},
'hljs-quote': {
color: githubLightColorblindColors.Comment,
fontStyle: 'italic',
},
'hljs-keyword': {
color: githubLightColorblindColors.AccentRed,
fontWeight: 'bold',
},
'hljs-selector-tag': {
color: githubLightColorblindColors.AccentRed,
fontWeight: 'bold',
},
'hljs-subst': {
color: githubLightColorblindColors.Foreground,
},
'hljs-number': {
color: githubLightColorblindColors.LightBlue,
},
'hljs-literal': {
color: githubLightColorblindColors.LightBlue,
},
'hljs-variable': {
color: githubLightColorblindColors.Foreground,
},
'hljs-template-variable': {
color: githubLightColorblindColors.Foreground,
},
'hljs-tag .hljs-attr': {
color: githubLightColorblindColors.AccentYellow,
},
'hljs-string': {
color: githubLightColorblindColors.AccentCyan,
},
'hljs-doctag': {
color: githubLightColorblindColors.AccentCyan,
},
'hljs-title': {
color: githubLightColorblindColors.AccentPurple,
fontWeight: 'bold',
},
'hljs-section': {
color: githubLightColorblindColors.AccentPurple,
fontWeight: 'bold',
},
'hljs-selector-id': {
color: githubLightColorblindColors.AccentPurple,
fontWeight: 'bold',
},
'hljs-type': {
color: githubLightColorblindColors.AccentGreen,
fontWeight: 'bold',
},
'hljs-class .hljs-title': {
color: githubLightColorblindColors.AccentGreen,
fontWeight: 'bold',
},
'hljs-tag': {
color: githubLightColorblindColors.AccentGreen,
},
'hljs-name': {
color: githubLightColorblindColors.AccentGreen,
},
'hljs-attribute': {
color: githubLightColorblindColors.LightBlue,
},
'hljs-regexp': {
color: githubLightColorblindColors.AccentCyan,
},
'hljs-link': {
color: githubLightColorblindColors.AccentCyan,
},
'hljs-symbol': {
color: githubLightColorblindColors.AccentPurple,
},
'hljs-bullet': {
color: githubLightColorblindColors.AccentPurple,
},
'hljs-built_in': {
color: githubLightColorblindColors.LightBlue,
},
'hljs-builtin-name': {
color: githubLightColorblindColors.LightBlue,
},
'hljs-meta': {
color: githubLightColorblindColors.LightBlue,
fontWeight: 'bold',
},
'hljs-deletion': {
background: '#fff1e5',
color: githubLightColorblindColors.AccentRed,
},
'hljs-addition': {
background: '#ddf4ff',
color: githubLightColorblindColors.AccentGreen,
},
'hljs-emphasis': {
fontStyle: 'italic',
},
'hljs-strong': {
fontWeight: 'bold',
},
},
githubLightColorblindColors,
);
@@ -10,6 +10,8 @@ import { AtomOneDark } from './builtin/dark/atom-one-dark.js';
import { Dracula } from './builtin/dark/dracula-dark.js';
import { GitHubDark } from './builtin/dark/github-dark.js';
import { GitHubLight } from './builtin/light/github-light.js';
import { GitHubDarkColorblind } from './builtin/dark/github-dark-colorblind.js';
import { GitHubLightColorblind } from './builtin/light/github-light-colorblind.js';
import { GoogleCode } from './builtin/light/googlecode-light.js';
import { Holiday } from './builtin/dark/holiday-dark.js';
import { DefaultLight } from './builtin/light/default-light.js';
@@ -79,6 +81,8 @@ class ThemeManager {
DefaultDark,
GitHubDark,
GitHubLight,
GitHubDarkColorblind,
GitHubLightColorblind,
GoogleCode,
Holiday,
ShadesOfPurple,
@@ -0,0 +1,90 @@
/**
* @license
* Copyright 2026 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import { describe, expect, it } from 'vitest';
import { SkillExtractionAgent } from './skill-extraction-agent.js';
import {
EDIT_TOOL_NAME,
GLOB_TOOL_NAME,
GREP_TOOL_NAME,
LS_TOOL_NAME,
READ_FILE_TOOL_NAME,
WRITE_FILE_TOOL_NAME,
} from '../tools/tool-names.js';
import { PREVIEW_GEMINI_FLASH_MODEL } from '../config/models.js';
describe('SkillExtractionAgent', () => {
const skillsDir = '/tmp/skills';
const sessionIndex =
'[NEW] Debug login flow (12 user msgs) — /tmp/chats/session-1.json';
const existingSkillsSummary =
'## Workspace Skills (.gemini/skills — do NOT duplicate)\n- **existing-skill**: Existing description';
const agent = SkillExtractionAgent(
skillsDir,
sessionIndex,
existingSkillsSummary,
);
it('should expose expected metadata, model, and tools', () => {
expect(agent.kind).toBe('local');
expect(agent.name).toBe('confucius');
expect(agent.displayName).toBe('Skill Extractor');
expect(agent.modelConfig.model).toBe(PREVIEW_GEMINI_FLASH_MODEL);
expect(agent.toolConfig?.tools).toEqual(
expect.arrayContaining([
READ_FILE_TOOL_NAME,
WRITE_FILE_TOOL_NAME,
EDIT_TOOL_NAME,
LS_TOOL_NAME,
GLOB_TOOL_NAME,
GREP_TOOL_NAME,
]),
);
});
it('should default to no skill unless recurrence and durability are proven', () => {
const prompt = agent.promptConfig.systemPrompt;
expect(prompt).toContain('Default to NO SKILL.');
expect(prompt).toContain(
'strong evidence this will recur for future agents in this repo/workflow',
);
expect(prompt).toContain('broader than a single incident');
expect(prompt).toContain('A skill MUST meet ALL of these criteria:');
expect(prompt).toContain(
'Future agents in this repo/workflow are likely to need it',
);
});
it('should explicitly reject one-off incidents and single-session preferences', () => {
const prompt = agent.promptConfig.systemPrompt;
expect(prompt).toContain('Single-session preferences');
expect(prompt).toContain('One-off incidents');
expect(prompt).toContain('Output-style preferences');
expect(prompt).toContain('cannot survive renaming the specific');
});
it('should warn that session summaries are user-intent summaries, not workflow evidence', () => {
const query = agent.promptConfig.query ?? '';
expect(query).toContain(existingSkillsSummary);
expect(query).toContain(sessionIndex);
expect(query).toContain(
'The summary is a user-intent summary, not a workflow summary.',
);
expect(query).toContain(
'The session summaries describe user intent, not workflow details.',
);
expect(query).toContain(
'Only write a skill if the evidence shows a durable, recurring workflow',
);
expect(query).toContain(
'If recurrence or future reuse is unclear, create no skill and explain why.',
);
});
});
@@ -36,7 +36,7 @@ function buildSystemPrompt(skillsDir: string): string {
'- solve similar tasks with fewer tool calls and fewer reasoning tokens',
'- reuse proven workflows and verification checklists',
'- avoid known failure modes and landmines',
'- anticipate user preferences without being reminded',
'- capture durable workflow constraints that future agents are likely to encounter again',
'',
'============================================================',
'SAFETY AND HYGIENE (STRICT)',
@@ -59,6 +59,10 @@ function buildSystemPrompt(skillsDir: string): string {
'1. "Is this something a competent agent would NOT already know?" If no, STOP.',
'2. "Does an existing skill (listed below) already cover this?" If yes, STOP.',
'3. "Can I write a concrete, step-by-step procedure?" If no, STOP.',
'4. "Is there strong evidence this will recur for future agents in this repo/workflow?" If no, STOP.',
'5. "Is this broader than a single incident (one bug, one ticket, one branch, one date, one exact error)?" If no, STOP.',
'',
'Default to NO SKILL.',
'',
'Do NOT create skills for:',
'',
@@ -67,6 +71,10 @@ function buildSystemPrompt(skillsDir: string): string {
'- **Pure Q&A**: The user asked "how does X work?" and got an answer. No procedure.',
'- **Brainstorming/design**: Discussion of how to build something, without a validated',
' implementation that produced a reusable procedure.',
'- **Single-session preferences**: User-specific style/output preferences or workflow',
' preferences mentioned only once.',
'- **One-off incidents**: Debugging or incident response tied to a single bug, ticket,',
' branch, date, or exact error string.',
'- **Anything already covered by an existing skill** (global, workspace, builtin, or',
' previously extracted). Check the "Existing Skills" section carefully.',
'',
@@ -74,31 +82,40 @@ function buildSystemPrompt(skillsDir: string): string {
'WHAT COUNTS AS A SKILL',
'============================================================',
'',
'A skill MUST meet BOTH of these criteria:',
'A skill MUST meet ALL of these criteria:',
'',
'1. **Procedural and concrete**: It can be expressed as numbered steps with specific',
' commands, paths, or code patterns. If you can only write vague guidance, it is NOT',
' a skill. "Be careful with X" is advice, not a skill.',
'',
'2. **Non-obvious and project-specific**: A competent agent would NOT already know this.',
' It encodes project-specific knowledge, non-obvious ordering constraints, or',
' hard-won failure shields that cannot be inferred from the codebase alone.',
'2. **Durable and reusable**: Future agents in this repo/workflow are likely to need it',
' again. If it only solved one incident, it is NOT a skill.',
'',
'Confidence tiers (prefer higher tiers):',
'3. **Evidence-backed and project-specific**: It encodes project-specific knowledge,',
' repeated operational constraints, or hard-won failure shields supported by session',
' evidence. Do not assume something is non-obvious just because it sounds detailed.',
'',
'**High confidence** — create the skill:',
'- The same workflow appeared in multiple sessions (cross-session repetition)',
'- A multi-step procedure was validated (tests passed, user confirmed success)',
'Confidence tiers:',
'',
'**Medium confidence** — create the skill if it is clearly project-specific:',
'- A project-specific build/test/deploy/release procedure was established',
'- A non-obvious ordering constraint or prerequisite was discovered',
'- A failure mode was hit and a concrete fix was found and verified',
'**High confidence** — create the skill only when recurrence/durability is clear:',
'- The same workflow appeared in multiple sessions (cross-session repetition), OR it is',
' a stable recurring repo workflow (for example setup/build/test/deploy/release) with a',
' clear future trigger',
'- The workflow was validated (tests passed, user confirmed success, or the same fix',
' worked repeatedly)',
'- The skill can be named without referencing a specific incident, bug, branch, or date',
'',
'**Medium confidence** — usually do NOT create the skill yet:',
'- A project-specific procedure appeared once and seems useful, but recurrence is not yet',
' clear',
'- A verified fix exists, but it is still tied to one incident',
'- A user correction changed the approach once, but durability is uncertain',
'',
'**Low confidence** — do NOT create the skill:',
'- A one-off debugging session with no reusable procedure',
'- Generic workflows any agent could figure out from the codebase',
'- A code review or investigation with no durable takeaway',
'- Output-style preferences that do not materially change procedure',
'',
'Aim for 0-2 skills per run. Quality over quantity.',
'',
@@ -117,8 +134,10 @@ function buildSystemPrompt(skillsDir: string): string {
'',
'What to look for:',
'',
'- User corrections: "No, do it this way" -> preference signal',
'- User corrections that change procedure in a durable way, especially when repeated',
' across sessions',
'- Repeated patterns across sessions: same commands, same file paths, same workflow',
'- Stable recurring repo lifecycle workflows with clear future triggers',
'- Failed attempts followed by successful ones -> failure shield',
'- Multi-step procedures that were validated (tests passed, user confirmed)',
'- User interruptions: "Stop, you need to X first" -> ordering constraint',
@@ -129,6 +148,8 @@ function buildSystemPrompt(skillsDir: string): string {
'- Tool outputs that are just data (file contents, search results)',
'- Speculative plans that were never executed',
"- Temporary context (current branch name, today's date, specific error IDs)",
'- Similar session summaries without matching workflow evidence',
'- One-off artifact names: bug IDs, branch names, timestamps, exact incident strings',
'',
'============================================================',
'SKILL FORMAT',
@@ -214,7 +235,10 @@ function buildSystemPrompt(skillsDir: string): string {
'- Keep scopes distinct. Avoid overlapping "do-everything" skills.',
'- Every skill MUST have: triggers, procedure, at least one pitfall or verification step.',
'- If you cannot write a reliable procedure (too many unknowns), do NOT create the skill.',
'- Do not create skills for generic advice that any competent agent would already know.',
'- If the candidate is tied to one incident or cannot survive renaming the specific',
' bug/ticket, do NOT create it.',
'- Do not create skills for generic advice, output-style preferences, or ephemeral',
' choices that any competent agent would already know or adapt to on the fly.',
'- Prefer fewer, higher-quality skills. 0-2 skills per run is typical. 3+ is unusual.',
'',
'============================================================',
@@ -224,17 +248,23 @@ function buildSystemPrompt(skillsDir: string): string {
`1. Use list_directory on ${skillsDir} to see existing skills.`,
'2. If skills exist, read their SKILL.md files to understand what is already captured.',
'3. Scan the session index provided in the query. Look for [NEW] sessions whose summaries',
' suggest workflows that ALSO appear in other sessions (either [NEW] or [old]).',
'4. Apply the minimum signal gate. If no repeated patterns are visible, report that and finish.',
' hint at workflows that ALSO appear in other sessions (either [NEW] or [old]) or at a',
' stable recurring repo workflow. Remember: summary similarity alone is NOT enough.',
'4. Apply the minimum signal gate. If recurrence or durability is not visible, report that',
' no skill should be created and finish.',
'5. For promising patterns, use read_file on the session file paths to inspect the full',
' conversation. Confirm the workflow was actually repeated and validated.',
'6. For each confirmed skill, verify it meets ALL criteria (repeatable, procedural, high-leverage).',
' conversation. Confirm the workflow was actually repeated and validated. Read at least',
' two sessions unless the candidate is clearly a stable recurring repo lifecycle workflow.',
'6. For each candidate, verify it meets ALL criteria. Before writing, make sure you can',
' state: future trigger, evidence sessions, recurrence signal, validation signal, and',
' why it is not generic.',
'7. Write new SKILL.md files or update existing ones in your directory using write_file.',
' For skills that live OUTSIDE your directory, write a .patch file instead (see UPDATING EXISTING SKILLS).',
'8. Write COMPLETE files — never partially update a SKILL.md.',
'',
'IMPORTANT: Do NOT read every session. Only read sessions whose summaries suggest a',
'repeated pattern worth investigating. Most runs should read 0-3 sessions and create 0 skills.',
'repeated pattern or a stable recurring repo workflow worth investigating. Most runs',
'should read 0-3 sessions and create 0 skills.',
'Do not explore the codebase. Work only with the session index, session files, and the skills directory.',
].join('\n');
}
@@ -301,6 +331,9 @@ export const SkillExtractionAgent = (
'Below is an index of past conversation sessions. Each line shows:',
'[NEW] or [old] status, a 1-line summary, message count, and the file path.',
'',
'The summary is a user-intent summary, not a workflow summary.',
'Matching summary text alone is never enough evidence for a reusable skill.',
'',
'[NEW] = not yet processed for skill extraction (focus on these)',
'[old] = previously processed (read only if a [NEW] session hints at a repeated pattern)',
'',
@@ -319,7 +352,7 @@ export const SkillExtractionAgent = (
return {
systemPrompt: buildSystemPrompt(skillsDir),
query: `${initialContext}\n\nAnalyze the session index above. Read sessions that suggest repeated workflows using read_file. Extract reusable skills to ${skillsDir}/.`,
query: `${initialContext}\n\nAnalyze the session index above. The session summaries describe user intent, not workflow details. Read sessions that suggest repeated workflows using read_file. Only write a skill if the evidence shows a durable, recurring workflow or a stable recurring repo procedure. If recurrence or future reuse is unclear, create no skill and explain why.`,
};
},
runConfig: {
+195 -8
View File
@@ -148,7 +148,7 @@ describe('createContentGenerator', () => {
);
expect(GoogleGenAI).toHaveBeenCalledWith({
apiKey: 'test-api-key',
vertexai: undefined,
vertexai: false,
httpOptions: expect.objectContaining({
headers: expect.objectContaining({
'User-Agent': expect.stringMatching(
@@ -365,7 +365,7 @@ describe('createContentGenerator', () => {
expect(GoogleGenAI).toHaveBeenCalledWith({
apiKey: 'test-api-key',
vertexai: undefined,
vertexai: false,
httpOptions: expect.objectContaining({
headers: expect.objectContaining({
'User-Agent': expect.any(String),
@@ -409,7 +409,7 @@ describe('createContentGenerator', () => {
expect(GoogleGenAI).toHaveBeenCalledWith({
apiKey: 'test-api-key',
vertexai: undefined,
vertexai: false,
httpOptions: expect.objectContaining({
headers: expect.objectContaining({
'User-Agent': expect.any(String),
@@ -443,7 +443,7 @@ describe('createContentGenerator', () => {
expect(GoogleGenAI).toHaveBeenCalledWith({
apiKey: 'test-api-key',
vertexai: undefined,
vertexai: false,
httpOptions: expect.objectContaining({
headers: expect.objectContaining({
'User-Agent': expect.any(String),
@@ -481,7 +481,7 @@ describe('createContentGenerator', () => {
);
expect(GoogleGenAI).toHaveBeenCalledWith({
apiKey: 'test-api-key',
vertexai: undefined,
vertexai: false,
httpOptions: expect.objectContaining({
headers: {
'User-Agent': expect.any(String),
@@ -517,7 +517,7 @@ describe('createContentGenerator', () => {
expect(GoogleGenAI).toHaveBeenCalledWith({
apiKey: 'test-api-key',
vertexai: undefined,
vertexai: false,
httpOptions: expect.objectContaining({
headers: expect.objectContaining({
'User-Agent': expect.any(String),
@@ -550,7 +550,7 @@ describe('createContentGenerator', () => {
expect(GoogleGenAI).toHaveBeenCalledWith({
apiKey: 'test-api-key',
vertexai: undefined,
vertexai: false,
httpOptions: expect.objectContaining({
headers: expect.objectContaining({
'User-Agent': expect.any(String),
@@ -589,7 +589,7 @@ describe('createContentGenerator', () => {
expect(GoogleGenAI).toHaveBeenCalledWith({
apiKey: 'test-api-key',
vertexai: undefined,
vertexai: false,
httpOptions: expect.objectContaining({
headers: expect.objectContaining({
'User-Agent': expect.any(String),
@@ -638,6 +638,193 @@ describe('createContentGenerator', () => {
apiVersion: 'v1alpha',
});
});
it('should pass baseUrl to GoogleGenAI when GOOGLE_GEMINI_BASE_URL is set', async () => {
const mockConfig = {
getModel: vi.fn().mockReturnValue('gemini-pro'),
getProxy: vi.fn().mockReturnValue(undefined),
getUsageStatisticsEnabled: () => false,
getClientName: vi.fn().mockReturnValue(undefined),
} as unknown as Config;
const mockGenerator = {
models: {},
} as unknown as GoogleGenAI;
vi.mocked(GoogleGenAI).mockImplementation(() => mockGenerator as never);
vi.stubEnv('GOOGLE_GEMINI_BASE_URL', 'https://gemini.test.local');
vi.stubEnv('GEMINI_API_KEY', 'test-api-key');
const config = await createContentGeneratorConfig(
mockConfig,
AuthType.USE_GEMINI,
);
await createContentGenerator(config, mockConfig);
expect(GoogleGenAI).toHaveBeenCalledWith(
expect.objectContaining({
apiKey: 'test-api-key',
vertexai: false,
httpOptions: expect.objectContaining({
baseUrl: 'https://gemini.test.local',
}),
}),
);
});
it('should pass baseUrl to GoogleGenAI when GOOGLE_VERTEX_BASE_URL is set', async () => {
const mockConfig = {
getModel: vi.fn().mockReturnValue('gemini-pro'),
getProxy: vi.fn().mockReturnValue(undefined),
getUsageStatisticsEnabled: () => false,
getClientName: vi.fn().mockReturnValue(undefined),
} as unknown as Config;
const mockGenerator = {
models: {},
} as unknown as GoogleGenAI;
vi.mocked(GoogleGenAI).mockImplementation(() => mockGenerator as never);
vi.stubEnv('GOOGLE_VERTEX_BASE_URL', 'https://vertex.test.local');
vi.stubEnv('GOOGLE_CLOUD_PROJECT', 'my-project');
vi.stubEnv('GOOGLE_CLOUD_LOCATION', 'us-central1');
const config = await createContentGeneratorConfig(
mockConfig,
AuthType.USE_VERTEX_AI,
);
await createContentGenerator(config, mockConfig);
expect(GoogleGenAI).toHaveBeenCalledWith(
expect.objectContaining({
apiKey: undefined,
vertexai: true,
httpOptions: expect.objectContaining({
baseUrl: 'https://vertex.test.local',
}),
}),
);
});
it('should prefer GOOGLE_VERTEX_BASE_URL when authType is USE_VERTEX_AI without inferred vertex credentials', async () => {
const mockConfig = {
getModel: vi.fn().mockReturnValue('gemini-pro'),
getProxy: vi.fn().mockReturnValue(undefined),
getUsageStatisticsEnabled: () => false,
getClientName: vi.fn().mockReturnValue(undefined),
} as unknown as Config;
const mockGenerator = {
models: {},
} as unknown as GoogleGenAI;
vi.mocked(GoogleGenAI).mockImplementation(() => mockGenerator as never);
vi.stubEnv('GOOGLE_GEMINI_BASE_URL', 'https://gemini.test.local');
vi.stubEnv('GOOGLE_VERTEX_BASE_URL', 'https://vertex.test.local');
await createContentGenerator(
{
authType: AuthType.USE_VERTEX_AI,
},
mockConfig,
);
expect(GoogleGenAI).toHaveBeenCalledWith(
expect.objectContaining({
apiKey: undefined,
vertexai: true,
httpOptions: expect.objectContaining({
baseUrl: 'https://vertex.test.local',
}),
}),
);
});
it('should prefer an explicit baseUrl over GOOGLE_GEMINI_BASE_URL', async () => {
const mockConfig = {
getModel: vi.fn().mockReturnValue('gemini-pro'),
getProxy: vi.fn().mockReturnValue(undefined),
getUsageStatisticsEnabled: () => false,
getClientName: vi.fn().mockReturnValue(undefined),
} as unknown as Config;
const mockGenerator = {
models: {},
} as unknown as GoogleGenAI;
vi.mocked(GoogleGenAI).mockImplementation(() => mockGenerator as never);
vi.stubEnv('GOOGLE_GEMINI_BASE_URL', 'https://env.test.local');
vi.stubEnv('GEMINI_API_KEY', 'test-api-key');
const config = await createContentGeneratorConfig(
mockConfig,
AuthType.USE_GEMINI,
undefined,
'https://explicit.test.local',
);
await createContentGenerator(config, mockConfig);
expect(GoogleGenAI).toHaveBeenCalledWith(
expect.objectContaining({
httpOptions: expect.objectContaining({
baseUrl: 'https://explicit.test.local',
}),
}),
);
});
it('should allow localhost baseUrl overrides over http', async () => {
const mockConfig = {
getModel: vi.fn().mockReturnValue('gemini-pro'),
getProxy: vi.fn().mockReturnValue(undefined),
getUsageStatisticsEnabled: () => false,
getClientName: vi.fn().mockReturnValue(undefined),
} as unknown as Config;
const mockGenerator = {
models: {},
} as unknown as GoogleGenAI;
vi.mocked(GoogleGenAI).mockImplementation(() => mockGenerator as never);
await createContentGenerator(
{
apiKey: 'test-api-key',
authType: AuthType.USE_GEMINI,
baseUrl: 'http://127.0.0.1:8080',
},
mockConfig,
);
expect(GoogleGenAI).toHaveBeenCalledWith(
expect.objectContaining({
httpOptions: expect.objectContaining({
baseUrl: 'http://127.0.0.1:8080',
}),
}),
);
});
it('should reject invalid custom baseUrl values', async () => {
await expect(
createContentGenerator(
{
apiKey: 'test-api-key',
authType: AuthType.USE_GEMINI,
baseUrl: 'not-a-url',
},
mockConfig,
),
).rejects.toThrow('Invalid custom base URL: not-a-url');
});
it('should reject non-https remote custom baseUrl values', async () => {
await expect(
createContentGenerator(
{
apiKey: 'test-api-key',
authType: AuthType.USE_GEMINI,
baseUrl: 'http://example.com',
},
mockConfig,
),
).rejects.toThrow('Custom base URL must use HTTPS unless it is localhost.');
});
});
describe('createContentGeneratorConfig', () => {
+32 -3
View File
@@ -101,6 +101,21 @@ export type ContentGeneratorConfig = {
customHeaders?: Record<string, string>;
};
const LOCAL_HOSTNAMES = ['localhost', '127.0.0.1', '[::1]'];
function validateBaseUrl(baseUrl: string): void {
let url: URL;
try {
url = new URL(baseUrl);
} catch {
throw new Error(`Invalid custom base URL: ${baseUrl}`);
}
if (url.protocol !== 'https:' && !LOCAL_HOSTNAMES.includes(url.hostname)) {
throw new Error('Custom base URL must use HTTPS unless it is localhost.');
}
}
export async function createContentGeneratorConfig(
config: Config,
authType: AuthType | undefined,
@@ -273,18 +288,32 @@ export async function createContentGenerator(
'x-gemini-api-privileged-user-id': `${installationId}`,
};
}
let baseUrl = config.baseUrl;
if (!baseUrl) {
const envBaseUrl =
config.authType === AuthType.USE_VERTEX_AI
? process.env['GOOGLE_VERTEX_BASE_URL']
: process.env['GOOGLE_GEMINI_BASE_URL'];
if (envBaseUrl) {
validateBaseUrl(envBaseUrl);
baseUrl = envBaseUrl;
}
} else {
validateBaseUrl(baseUrl);
}
const httpOptions: {
baseUrl?: string;
headers: Record<string, string>;
} = { headers };
if (config.baseUrl) {
httpOptions.baseUrl = config.baseUrl;
if (baseUrl) {
httpOptions.baseUrl = baseUrl;
}
const googleGenAI = new GoogleGenAI({
apiKey: config.apiKey === '' ? undefined : config.apiKey,
vertexai: config.vertexai,
vertexai: config.vertexai ?? config.authType === AuthType.USE_VERTEX_AI,
httpOptions,
...(apiVersionEnv && { apiVersion: apiVersionEnv }),
});
@@ -17,13 +17,18 @@ import {
_resetGlobalMemoryMonitorForTests,
} from './memory-monitor.js';
import type { Config } from '../config/config.js';
import { recordMemoryUsage, isPerformanceMonitoringActive } from './metrics.js';
import {
recordMemoryUsage,
recordCpuUsage,
isPerformanceMonitoringActive,
} from './metrics.js';
import { HighWaterMarkTracker } from './high-water-mark-tracker.js';
import { RateLimiter } from './rate-limiter.js';
// Mock dependencies
vi.mock('./metrics.js', () => ({
recordMemoryUsage: vi.fn(),
recordCpuUsage: vi.fn(),
isPerformanceMonitoringActive: vi.fn(),
MemoryMetricType: {
HEAP_USED: 'heap_used',
@@ -50,6 +55,7 @@ vi.mock('node:process', () => ({
}));
const mockRecordMemoryUsage = vi.mocked(recordMemoryUsage);
const mockRecordCpuUsage = vi.mocked(recordCpuUsage);
const mockIsPerformanceMonitoringActive = vi.mocked(
isPerformanceMonitoringActive,
);
@@ -192,6 +198,13 @@ describe('MemoryMonitor', () => {
component: 'test_context',
},
);
expect(mockRecordCpuUsage).toHaveBeenCalledWith(
mockConfig,
expect.any(Number),
{
component: 'test_context',
},
);
});
it('should not record metrics when performance monitoring is inactive', () => {
@@ -12,6 +12,7 @@ import { isUserActive } from './activity-detector.js';
import { HighWaterMarkTracker } from './high-water-mark-tracker.js';
import {
recordMemoryUsage,
recordCpuUsage,
MemoryMetricType,
isPerformanceMonitoringActive,
} from './metrics.js';
@@ -37,6 +38,7 @@ export class MemoryMonitor {
private intervalId: NodeJS.Timeout | null = null;
private isRunning = false;
private lastSnapshot: MemorySnapshot | null = null;
private lastCpuUsage: NodeJS.CpuUsage | null = null;
private monitoringInterval: number = 10000;
private highWaterMarkTracker: HighWaterMarkTracker;
private rateLimiter: RateLimiter;
@@ -191,6 +193,13 @@ export class MemoryMonitor {
memory_type: MemoryMetricType.RSS,
component: context,
});
// Record delta CPU usage (in microseconds)
const cpuUsage = process.cpuUsage(this.lastCpuUsage ?? undefined);
this.lastCpuUsage = process.cpuUsage();
recordCpuUsage(config, cpuUsage.user + cpuUsage.system, {
component: context,
});
}
this.lastSnapshot = snapshot;
+3 -1
View File
@@ -147,7 +147,9 @@ export class PerfTestHarness {
throw new Error(`No active timer found for label "${label}"`);
}
const wallClockMs = performance.now() - timer.startTime;
// Round wall-clock time to nearest 0.1 ms
const wallClockMs =
Math.round((performance.now() - timer.startTime) * 10) / 10;
const cpuDelta = process.cpuUsage(timer.startCpuUsage);
this.activeTimers.delete(label);
+28 -6
View File
@@ -193,6 +193,28 @@ export function checkModelOutputContent(
return isValid;
}
export interface MetricDataPoint {
attributes?: Record<string, unknown>;
value?: {
sum?: number;
min?: number;
max?: number;
count?: number;
};
startTime?: [number, number];
endTime?: string;
}
export interface TelemetryMetric {
descriptor: {
name: string;
type?: string;
description?: string;
unit?: string;
};
dataPoints: MetricDataPoint[];
}
export interface ParsedLog {
attributes?: {
'event.name'?: string;
@@ -213,11 +235,7 @@ export interface ParsedLog {
prompt_id?: string;
};
scopeMetrics?: {
metrics: {
descriptor: {
name: string;
};
}[];
metrics: TelemetryMetric[];
}[];
}
@@ -1297,6 +1315,10 @@ export class TestRig {
return logs;
}
readTelemetryLogs(): ParsedLog[] {
return this._readAndParseTelemetryLog();
}
private _readAndParseTelemetryLog(): ParsedLog[] {
// Telemetry is always written to the test directory
const logFilePath = join(this.homeDir!, 'telemetry.log');
@@ -1450,7 +1472,7 @@ export class TestRig {
);
}
readMetric(metricName: string): Record<string, unknown> | null {
readMetric(metricName: string): TelemetryMetric | null {
const logs = this._readAndParseTelemetryLog();
for (const logData of logs) {
if (logData.scopeMetrics) {