diff --git a/docs/get-started/configuration.md b/docs/get-started/configuration.md index eba48e8d74..71e5e25b87 100644 --- a/docs/get-started/configuration.md +++ b/docs/get-started/configuration.md @@ -468,6 +468,19 @@ their corresponding top-level category object in your `settings.json` file. } } }, + "flash-lite-helper": { + "extends": "base", + "modelConfig": { + "model": "gemini-2.5-flash-lite", + "generateContentConfig": { + "temperature": 0.2, + "maxOutputTokens": 120, + "thinkingConfig": { + "thinkingBudget": 0 + } + } + } + }, "edit-corrector": { "extends": "base", "modelConfig": { diff --git a/evals/app-test-helper.ts b/evals/app-test-helper.ts new file mode 100644 index 0000000000..8e61ed619a --- /dev/null +++ b/evals/app-test-helper.ts @@ -0,0 +1,76 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { it } from 'vitest'; +import { AppRig } from '../packages/cli/src/test-utils/AppRig.js'; +import type { EvalPolicy } from './test-helper.js'; +import fs from 'node:fs'; +import path from 'node:path'; +import { DEFAULT_GEMINI_MODEL } from '@google/gemini-cli-core'; + +export interface AppEvalCase { + name: string; + configOverrides?: any; + prompt: string; + timeout?: number; + files?: Record; + setup?: (rig: AppRig) => Promise; + assert: (rig: AppRig, output: string) => Promise; +} + +/** + * A helper for running behavioral evaluations using the in-process AppRig. + * This matches the API of evalTest in test-helper.ts as closely as possible. + */ +export function appEvalTest(policy: EvalPolicy, evalCase: AppEvalCase) { + const fn = async () => { + const rig = new AppRig({ + configOverrides: { + model: DEFAULT_GEMINI_MODEL, + ...evalCase.configOverrides, + }, + }); + + try { + await rig.initialize(); + + // Setup initial files + if (evalCase.files) { + const testDir = rig.getTestDir(); + for (const [filePath, content] of Object.entries(evalCase.files)) { + const fullPath = path.join(testDir, filePath); + fs.mkdirSync(path.dirname(fullPath), { recursive: true }); + fs.writeFileSync(fullPath, content); + } + } + + // Run custom setup if provided (e.g. for breakpoints) + if (evalCase.setup) { + await evalCase.setup(rig); + } + + // Render the app! + rig.render(); + + // Wait for initial ready state + await rig.waitForIdle(); + + // Send the initial prompt + await rig.sendMessage(evalCase.prompt); + + // Run assertion. Interaction-heavy tests can do their own waiting/steering here. + await evalCase.assert(rig, rig.getStaticOutput()); + } finally { + await rig.unmount(); + } + }; + + if (policy === 'USUALLY_PASSES' && !process.env['RUN_EVALS']) { + it.skip(evalCase.name, fn); + } else { + it(evalCase.name, fn, (evalCase.timeout ?? 60000) + 10000); + } +} diff --git a/evals/model_steering.eval.ts b/evals/model_steering.eval.ts new file mode 100644 index 0000000000..7b5878ac54 --- /dev/null +++ b/evals/model_steering.eval.ts @@ -0,0 +1,87 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { describe, expect } from 'vitest'; +import { act } from 'react'; +import path from 'node:path'; +import fs from 'node:fs'; +import { appEvalTest } from './app-test-helper.js'; +import { PolicyDecision } from '@google/gemini-cli-core'; + +describe('Model Steering Behavioral Evals', () => { + appEvalTest('ALWAYS_PASSES', { + name: 'Corrective Hint: Model switches task based on hint during tool turn', + configOverrides: { + excludeTools: ['run_shell_command', 'ls', 'google_web_search'], + }, + files: { + 'README.md': + '# Gemini CLI\nThis is a tool for developers.\nLicense: Apache-2.0\nLine 4\nLine 5\nLine 6', + }, + prompt: 'Find the first 5 lines of README.md', + setup: async (rig) => { + // Pause on any relevant tool to inject a corrective hint + rig.setBreakpoint(['read_file', 'list_directory', 'glob']); + }, + assert: async (rig) => { + // Wait for the model to pause on any tool call + await rig.waitForPendingConfirmation( + /read_file|list_directory|glob/i, + 30000, + ); + + // Interrupt with a corrective hint + await rig.addUserHint( + 'Actually, stop what you are doing. Just tell me a short knock-knock joke about a robot instead.', + ); + + // Resolve the tool to let the turn finish and the model see the hint + await rig.resolveAwaitedTool(); + + // Verify the model pivots to the new task + await rig.waitForOutput(/Knock,? knock/i, 40000); + await rig.waitForIdle(30000); + + const output = rig.getStaticOutput(); + expect(output).toMatch(/Knock,? knock/i); + expect(output).not.toContain('Line 6'); + }, + }); + + appEvalTest('ALWAYS_PASSES', { + name: 'Suggestive Hint: Model incorporates user guidance mid-stream', + configOverrides: { + excludeTools: ['run_shell_command', 'ls', 'google_web_search'], + }, + files: {}, + prompt: 'Create a file called "hw.js" with a JS hello world.', + setup: async (rig) => { + // Pause on write_file to inject a suggestive hint + rig.setBreakpoint(['write_file']); + }, + assert: async (rig) => { + // Wait for the model to start creating the first file + await rig.waitForPendingConfirmation('write_file', 30000); + + await rig.addUserHint( + 'Next, create a file called "hw.py" with a python hello world.', + ); + + // Resolve and wait for the model to complete both tasks + await rig.resolveAwaitedTool(); + await rig.waitForPendingConfirmation('write_file', 30000); + await rig.resolveAwaitedTool(); + await rig.waitForIdle(60000); + + const testDir = rig.getTestDir(); + const hwJs = path.join(testDir, 'hw.js'); + const hwPy = path.join(testDir, 'hw.py'); + + expect(fs.existsSync(hwJs), 'hw.js should exist').toBe(true); + expect(fs.existsSync(hwPy), 'hw.py should exist').toBe(true); + }, + }); +}); diff --git a/evals/vitest.config.ts b/evals/vitest.config.ts index 2c59682f16..50733a999c 100644 --- a/evals/vitest.config.ts +++ b/evals/vitest.config.ts @@ -5,8 +5,15 @@ */ import { defineConfig } from 'vitest/config'; +import { fileURLToPath } from 'node:url'; +import * as path from 'node:path'; + +const __dirname = path.dirname(fileURLToPath(import.meta.url)); export default defineConfig({ + resolve: { + conditions: ['test'], + }, test: { testTimeout: 300000, // 5 minutes reporters: ['default', 'json'], @@ -14,5 +21,16 @@ export default defineConfig({ json: 'evals/logs/report.json', }, include: ['**/*.eval.ts'], + environment: 'node', + globals: true, + alias: { + react: path.resolve(__dirname, '../node_modules/react'), + }, + setupFiles: [path.resolve(__dirname, '../packages/cli/test-setup.ts')], + server: { + deps: { + inline: [/@google\/gemini-cli-core/], + }, + }, }, }); diff --git a/packages/cli/src/integration-tests/modelSteering.test.tsx b/packages/cli/src/integration-tests/modelSteering.test.tsx new file mode 100644 index 0000000000..b477326e50 --- /dev/null +++ b/packages/cli/src/integration-tests/modelSteering.test.tsx @@ -0,0 +1,81 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { describe, it, afterEach } from 'vitest'; +import { AppRig } from '../test-utils/AppRig.js'; +import path from 'node:path'; +import { fileURLToPath } from 'node:url'; +import { PolicyDecision } from '@google/gemini-cli-core'; + +const __dirname = path.dirname(fileURLToPath(import.meta.url)); + +describe('Model Steering Integration', () => { + let rig: AppRig | undefined; + + afterEach(async () => { + await rig?.unmount(); + }); + + it('should steer the model using a hint during a tool turn', async () => { + const fakeResponsesPath = path.join( + __dirname, + '../test-utils/fixtures/steering.responses', + ); + rig = new AppRig({ fakeResponsesPath }); + await rig.initialize(); + rig.render(); + await rig.waitForIdle(); + + rig.setToolPolicy('list_directory', PolicyDecision.ASK_USER); + rig.setToolPolicy('read_file', PolicyDecision.ASK_USER); + + rig.setMockCommands([ + { + command: /list_directory/, + result: { + output: 'file1.txt\nfile2.js\nfile3.md', + exitCode: 0, + }, + }, + { + command: /read_file file1.txt/, + result: { + output: 'This is file1.txt content.', + exitCode: 0, + }, + }, + ]); + + // Start a long task + await rig.type('Start long task'); + await rig.pressEnter(); + + // Wait for the model to call 'list_directory' (Confirming state) + await rig.waitForOutput('ReadFolder'); + + // Injected a hint while the model is in a tool turn + await rig.addUserHint('focus on .txt'); + + // Resolve list_directory (Proceed) + await rig.resolveTool('ReadFolder'); + + // Wait for the model to process the hint and output the next action + // Based on steering.responses, it should first acknowledge the hint + await rig.waitForOutput('ACK: I will focus on .txt files now.'); + + // Then it should proceed with the next action + await rig.waitForOutput( + /Since you want me to focus on .txt files,[\s\S]*I will read file1.txt/, + ); + await rig.waitForOutput('ReadFile'); + + // Resolve read_file (Proceed) + await rig.resolveTool('ReadFile'); + + // Wait for final completion + await rig.waitForOutput('Task complete.'); + }); +}); diff --git a/packages/cli/src/test-utils/AppRig.test.tsx b/packages/cli/src/test-utils/AppRig.test.tsx new file mode 100644 index 0000000000..e1db3112a2 --- /dev/null +++ b/packages/cli/src/test-utils/AppRig.test.tsx @@ -0,0 +1,80 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { describe, it, afterEach, expect } from 'vitest'; +import { AppRig } from './AppRig.js'; +import path from 'node:path'; +import { fileURLToPath } from 'node:url'; +import { debugLogger } from '@google/gemini-cli-core'; + +const __dirname = path.dirname(fileURLToPath(import.meta.url)); + +describe('AppRig', () => { + let rig: AppRig | undefined; + + afterEach(async () => { + await rig?.unmount(); + }); + + it('should handle deterministic tool turns with breakpoints', async () => { + const fakeResponsesPath = path.join( + __dirname, + 'fixtures', + 'steering.responses', + ); + rig = new AppRig({ fakeResponsesPath }); + await rig.initialize(); + rig.render(); + await rig.waitForIdle(); + + // Set breakpoints on the canonical tool names + rig.setBreakpoint('list_directory'); + rig.setBreakpoint('read_file'); + + // Start a task + debugLogger.log('[Test] Sending message: Start long task'); + await rig.sendMessage('Start long task'); + + // Wait for the first breakpoint (list_directory) + const pending1 = await rig.waitForPendingConfirmation('list_directory'); + expect(pending1.toolName).toBe('list_directory'); + + // Injected a hint + await rig.addUserHint('focus on .txt'); + + // Resolve and wait for the NEXT breakpoint (read_file) + // resolveTool will automatically remove the breakpoint policy for list_directory + await rig.resolveTool('list_directory'); + + const pending2 = await rig.waitForPendingConfirmation('read_file'); + expect(pending2.toolName).toBe('read_file'); + + // Resolve and finish. Also removes read_file breakpoint. + await rig.resolveTool('read_file'); + await rig.waitForOutput('Task complete.', 100000); + }); + + it('should render the app and handle a simple message', async () => { + const fakeResponsesPath = path.join( + __dirname, + 'fixtures', + 'simple.responses', + ); + rig = new AppRig({ fakeResponsesPath }); + await rig.initialize(); + rig.render(); + + // Wait for initial render + await rig.waitForIdle(); + + // Type a message + await rig.type('Hello'); + await rig.pressEnter(); + + // Wait for model response + await rig.waitForOutput('Hello! How can I help you today?'); + }); +}); diff --git a/packages/cli/src/test-utils/AppRig.tsx b/packages/cli/src/test-utils/AppRig.tsx new file mode 100644 index 0000000000..04237a6bec --- /dev/null +++ b/packages/cli/src/test-utils/AppRig.tsx @@ -0,0 +1,569 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { vi } from 'vitest'; +import { act } from 'react'; +import stripAnsi from 'strip-ansi'; +import os from 'node:os'; +import path from 'node:path'; +import fs from 'node:fs'; +import { AppContainer } from '../ui/AppContainer.js'; +import { renderWithProviders } from './render.js'; +import { + makeFakeConfig, + type Config, + type ConfigParameters, + ExtensionLoader, + AuthType, + ApprovalMode, + createPolicyEngineConfig, + PolicyDecision, + ToolConfirmationOutcome, + MessageBusType, + type ToolCallsUpdateMessage, + coreEvents, + ideContextStore, + createContentGenerator, + startupProfiler, + IdeClient, + debugLogger, +} from '@google/gemini-cli-core'; +import { + type MockShellCommand, + MockShellExecutionService, +} from './MockShellExecutionService.js'; +import { createMockSettings } from './settings.js'; +import { type LoadedSettings } from '../config/settings.js'; +import { AuthState } from '../ui/types.js'; + +// Mock core functions globally for tests using AppRig. +vi.mock('@google/gemini-cli-core', async (importOriginal) => { + const original = + await importOriginal(); + const { MockShellExecutionService: MockService } = await import( + './MockShellExecutionService.js' + ); + // Register the real execution logic so MockShellExecutionService can fall back to it + MockService.setOriginalImplementation(original.ShellExecutionService.execute); + + return { + ...original, + ShellExecutionService: MockService, + }; +}); + +// Mock useAuthCommand to bypass authentication flows in tests +vi.mock('../ui/auth/useAuth.js', () => ({ + useAuthCommand: () => ({ + authState: AuthState.Authenticated, + setAuthState: vi.fn(), + authError: null, + onAuthError: vi.fn(), + apiKeyDefaultValue: 'test-api-key', + reloadApiKey: vi.fn().mockResolvedValue('test-api-key'), + }), + validateAuthMethodWithSettings: () => null, +})); + +// A minimal mock ExtensionManager to satisfy AppContainer's forceful cast +class MockExtensionManager extends ExtensionLoader { + getExtensions = vi.fn().mockReturnValue([]); + setRequestConsent = vi.fn(); + setRequestSetting = vi.fn(); +} + +export interface AppRigOptions { + fakeResponsesPath?: string; + terminalWidth?: number; + terminalHeight?: number; + configOverrides?: Partial; +} + +export interface PendingConfirmation { + toolName: string; + toolDisplayName?: string; + correlationId: string; +} + +export class AppRig { + private renderResult: ReturnType | undefined; + private config: Config | undefined; + private settings: LoadedSettings | undefined; + private testDir: string; + private sessionId: string; + + private pendingConfirmations = new Map(); + private breakpointTools = new Set(); + private lastAwaitedConfirmation: PendingConfirmation | undefined; + + constructor(private options: AppRigOptions = {}) { + this.testDir = fs.mkdtempSync(path.join(os.tmpdir(), 'gemini-app-rig-')); + this.sessionId = `test-session-${Math.random().toString(36).slice(2, 9)}`; + } + + async initialize() { + this.setupEnvironment(); + this.settings = this.createRigSettings(); + + const approvalMode = + this.options.configOverrides?.approvalMode ?? ApprovalMode.DEFAULT; + const policyEngineConfig = await createPolicyEngineConfig( + this.settings.merged, + approvalMode, + ); + + const configParams: ConfigParameters = { + sessionId: this.sessionId, + targetDir: this.testDir, + cwd: this.testDir, + debugMode: false, + model: 'test-model', + fakeResponses: this.options.fakeResponsesPath, + interactive: true, + approvalMode, + policyEngineConfig, + enableEventDrivenScheduler: true, + extensionLoader: new MockExtensionManager(), + excludeTools: this.options.configOverrides?.excludeTools, + ...this.options.configOverrides, + }; + this.config = makeFakeConfig(configParams); + + if (this.options.fakeResponsesPath) { + this.stubRefreshAuth(); + } + + this.setupMessageBusListeners(); + + await act(async () => { + await this.config!.initialize(); + // Since we mocked useAuthCommand, we must manually trigger the first + // refreshAuth to ensure contentGenerator is initialized. + await this.config!.refreshAuth(AuthType.USE_GEMINI); + }); + } + + private setupEnvironment() { + // Stub environment variables to avoid interference from developer's machine + vi.stubEnv('GEMINI_CLI_HOME', this.testDir); + if (this.options.fakeResponsesPath) { + vi.stubEnv('GEMINI_API_KEY', 'test-api-key'); + MockShellExecutionService.setPassthrough(false); + } else { + if (!process.env['GEMINI_API_KEY']) { + throw new Error( + 'GEMINI_API_KEY must be set in the environment for live model tests.', + ); + } + // For live tests, we allow falling through to the real shell service if no mock matches + MockShellExecutionService.setPassthrough(true); + } + vi.stubEnv('GEMINI_DEFAULT_AUTH_TYPE', AuthType.USE_GEMINI); + } + + private createRigSettings(): LoadedSettings { + return createMockSettings({ + user: { + path: path.join(this.testDir, '.gemini', 'user_settings.json'), + settings: { + security: { + auth: { + selectedType: AuthType.USE_GEMINI, + useExternal: true, + }, + folderTrust: { + enabled: true, + }, + }, + ide: { + enabled: false, + hasSeenNudge: true, + }, + }, + originalSettings: {}, + }, + merged: { + security: { + auth: { + selectedType: AuthType.USE_GEMINI, + useExternal: true, + }, + folderTrust: { + enabled: true, + }, + }, + ide: { + enabled: false, + hasSeenNudge: true, + }, + }, + }); + } + + private stubRefreshAuth() { + // eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion, @typescript-eslint/no-explicit-any + const gcConfig = this.config as any; + gcConfig.refreshAuth = async (authMethod: AuthType) => { + gcConfig.modelAvailabilityService.reset(); + + const newContentGeneratorConfig = { + authType: authMethod, + proxy: gcConfig.getProxy(), + apiKey: process.env['GEMINI_API_KEY'] || 'test-api-key', + }; + + gcConfig.contentGenerator = await createContentGenerator( + newContentGeneratorConfig, + this.config!, + gcConfig.getSessionId(), + ); + gcConfig.contentGeneratorConfig = newContentGeneratorConfig; + + // Initialize BaseLlmClient now that the ContentGenerator is available + const { BaseLlmClient } = await import('@google/gemini-cli-core'); + gcConfig.baseLlmClient = new BaseLlmClient( + gcConfig.contentGenerator, + this.config!, + ); + }; + } + + private setupMessageBusListeners() { + if (!this.config) return; + const messageBus = this.config.getMessageBus(); + + messageBus.subscribe( + MessageBusType.TOOL_CALLS_UPDATE, + (message: ToolCallsUpdateMessage) => { + for (const call of message.toolCalls) { + if (call.status === 'awaiting_approval' && call.correlationId) { + const details = call.confirmationDetails; + const title = 'title' in details ? details.title : ''; + const toolDisplayName = + call.tool?.displayName || title.replace(/^Confirm:\s*/, ''); + if (!this.pendingConfirmations.has(call.correlationId)) { + this.pendingConfirmations.set(call.correlationId, { + toolName: call.request.name, + toolDisplayName, + correlationId: call.correlationId, + }); + } + } else if (call.status !== 'awaiting_approval') { + for (const [ + correlationId, + pending, + ] of this.pendingConfirmations.entries()) { + if (pending.toolName === call.request.name) { + this.pendingConfirmations.delete(correlationId); + break; + } + } + } + } + }, + ); + } + + render() { + if (!this.config || !this.settings) + throw new Error('AppRig not initialized'); + + act(() => { + this.renderResult = renderWithProviders( + , + { + config: this.config!, + settings: this.settings!, + width: this.options.terminalWidth ?? 120, + useAlternateBuffer: false, + uiState: { + terminalHeight: this.options.terminalHeight ?? 40, + }, + }, + ); + }); + } + + setMockCommands(commands: MockShellCommand[]) { + MockShellExecutionService.setMockCommands(commands); + } + + setToolPolicy( + toolName: string | undefined, + decision: PolicyDecision, + priority = 10, + ) { + if (!this.config) throw new Error('AppRig not initialized'); + this.config.getPolicyEngine().addRule({ + toolName, + decision, + priority, + source: 'AppRig Override', + }); + } + + setBreakpoint(toolName: string | string[] | undefined) { + if (Array.isArray(toolName)) { + for (const name of toolName) { + this.setBreakpoint(name); + } + } else { + this.setToolPolicy(toolName, PolicyDecision.ASK_USER, 100); + this.breakpointTools.add(toolName); + } + } + + removeToolPolicy(toolName?: string, source = 'AppRig Override') { + if (!this.config) throw new Error('AppRig not initialized'); + this.config + .getPolicyEngine() + // eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion + .removeRulesForTool(toolName as string, source); + this.breakpointTools.delete(toolName); + } + + getTestDir(): string { + return this.testDir; + } + + getPendingConfirmations() { + return Array.from(this.pendingConfirmations.values()); + } + + private async waitUntil( + predicate: () => boolean | Promise, + options: { timeout?: number; interval?: number; message?: string } = {}, + ) { + const { + timeout = 30000, + interval = 100, + message = 'Condition timed out', + } = options; + const start = Date.now(); + + while (true) { + if (await predicate()) return; + + if (Date.now() - start > timeout) { + throw new Error(message); + } + + await act(async () => { + await new Promise((resolve) => setTimeout(resolve, interval)); + }); + } + } + + async waitForPendingConfirmation( + toolNameOrDisplayName?: string | RegExp, + timeout = 30000, + ): Promise { + const matches = (p: PendingConfirmation) => { + if (!toolNameOrDisplayName) return true; + if (typeof toolNameOrDisplayName === 'string') { + return ( + p.toolName === toolNameOrDisplayName || + p.toolDisplayName === toolNameOrDisplayName + ); + } + return ( + toolNameOrDisplayName.test(p.toolName) || + toolNameOrDisplayName.test(p.toolDisplayName || '') + ); + }; + + let matched: PendingConfirmation | undefined; + await this.waitUntil( + () => { + matched = this.getPendingConfirmations().find(matches); + return !!matched; + }, + { + timeout, + message: `Timed out waiting for pending confirmation: ${toolNameOrDisplayName || 'any'}. Current pending: ${this.getPendingConfirmations() + .map((p) => p.toolName) + .join(', ')}`, + }, + ); + + this.lastAwaitedConfirmation = matched; + return matched!; + } + + async resolveTool( + toolNameOrDisplayName: string | RegExp | PendingConfirmation, + outcome: ToolConfirmationOutcome = ToolConfirmationOutcome.ProceedOnce, + ): Promise { + if (!this.config) throw new Error('AppRig not initialized'); + const messageBus = this.config.getMessageBus(); + + let pending: PendingConfirmation; + if ( + typeof toolNameOrDisplayName === 'object' && + 'correlationId' in toolNameOrDisplayName + ) { + pending = toolNameOrDisplayName; + } else { + pending = await this.waitForPendingConfirmation(toolNameOrDisplayName); + } + + await act(async () => { + this.pendingConfirmations.delete(pending.correlationId); + + if (this.breakpointTools.has(pending.toolName)) { + this.removeToolPolicy(pending.toolName); + } + + // eslint-disable-next-line @typescript-eslint/no-floating-promises + messageBus.publish({ + type: MessageBusType.TOOL_CONFIRMATION_RESPONSE, + correlationId: pending.correlationId, + confirmed: outcome !== ToolConfirmationOutcome.Cancel, + outcome, + }); + }); + + await act(async () => { + await new Promise((resolve) => setTimeout(resolve, 100)); + }); + } + + async resolveAwaitedTool( + outcome: ToolConfirmationOutcome = ToolConfirmationOutcome.ProceedOnce, + ): Promise { + if (!this.lastAwaitedConfirmation) { + throw new Error('No tool has been awaited yet'); + } + await this.resolveTool(this.lastAwaitedConfirmation, outcome); + this.lastAwaitedConfirmation = undefined; + } + + async addUserHint(hint: string) { + if (!this.config) throw new Error('AppRig not initialized'); + await act(async () => { + this.config!.addUserHint(hint); + }); + } + + getConfig(): Config { + if (!this.config) throw new Error('AppRig not initialized'); + return this.config; + } + + async type(text: string) { + if (!this.renderResult) throw new Error('AppRig not initialized'); + await act(async () => { + this.renderResult!.stdin.write(text); + }); + await act(async () => { + await new Promise((resolve) => setTimeout(resolve, 50)); + }); + } + + async pressEnter() { + await this.type('\r'); + } + + async pressKey(key: string) { + if (!this.renderResult) throw new Error('AppRig not initialized'); + await act(async () => { + this.renderResult!.stdin.write(key); + }); + await act(async () => { + await new Promise((resolve) => setTimeout(resolve, 50)); + }); + } + + get lastFrame() { + if (!this.renderResult) return ''; + return stripAnsi(this.renderResult.lastFrame() || ''); + } + + getStaticOutput() { + if (!this.renderResult) return ''; + return stripAnsi(this.renderResult.stdout.lastFrame() || ''); + } + + async waitForOutput(pattern: string | RegExp, timeout = 30000) { + await this.waitUntil( + () => { + const frame = this.lastFrame; + return typeof pattern === 'string' + ? frame.includes(pattern) + : pattern.test(frame); + }, + { + timeout, + message: `Timed out waiting for output: ${pattern}\nLast frame:\n${this.lastFrame}`, + }, + ); + } + + async waitForIdle(timeout = 20000) { + await this.waitForOutput('Type your message', timeout); + } + + async sendMessage(text: string) { + await this.type(text); + await this.pressEnter(); + } + + async unmount() { + // Poison the chat recording service to prevent late writes to the test directory + if (this.config) { + const recordingService = this.config + .getGeminiClient() + ?.getChatRecordingService(); + if (recordingService) { + // eslint-disable-next-line @typescript-eslint/no-explicit-any, @typescript-eslint/no-unsafe-type-assertion + (recordingService as any).conversationFile = null; + } + } + + if (this.renderResult) { + this.renderResult.unmount(); + } + + await act(async () => { + await new Promise((resolve) => setTimeout(resolve, 500)); + }); + + vi.unstubAllEnvs(); + + coreEvents.removeAllListeners(); + coreEvents.drainBacklogs(); + MockShellExecutionService.reset(); + ideContextStore.clear(); + // Forcefully clear IdeClient singleton promise + // eslint-disable-next-line @typescript-eslint/no-explicit-any, @typescript-eslint/no-unsafe-type-assertion + (IdeClient as any).instancePromise = null; + startupProfiler.clear(); + vi.clearAllMocks(); + + this.config = undefined; + this.renderResult = undefined; + + if (this.testDir && fs.existsSync(this.testDir)) { + try { + fs.rmSync(this.testDir, { recursive: true, force: true }); + } catch (e) { + debugLogger.warn( + `Failed to cleanup test directory ${this.testDir}:`, + e, + ); + } + } + } +} diff --git a/packages/cli/src/test-utils/MockShellExecutionService.ts b/packages/cli/src/test-utils/MockShellExecutionService.ts new file mode 100644 index 0000000000..ce9e28c594 --- /dev/null +++ b/packages/cli/src/test-utils/MockShellExecutionService.ts @@ -0,0 +1,140 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { vi } from 'vitest'; +import type { + ShellExecutionHandle, + ShellExecutionResult, + ShellOutputEvent, + ShellExecutionConfig, +} from '@google/gemini-cli-core'; + +export interface MockShellCommand { + command: string | RegExp; + result: Partial; + events?: ShellOutputEvent[]; +} + +type ShellExecutionServiceExecute = ( + commandToExecute: string, + cwd: string, + onOutputEvent: (event: ShellOutputEvent) => void, + abortSignal: AbortSignal, + shouldUseNodePty: boolean, + shellExecutionConfig: ShellExecutionConfig, +) => Promise; + +export class MockShellExecutionService { + private static mockCommands: MockShellCommand[] = []; + private static originalExecute: ShellExecutionServiceExecute | undefined; + private static passthroughEnabled = false; + + /** + * Registers the original implementation to allow falling back to real shell execution. + */ + static setOriginalImplementation( + implementation: ShellExecutionServiceExecute, + ) { + this.originalExecute = implementation; + } + + /** + * Enables or disables passthrough to the real implementation when no mock matches. + */ + static setPassthrough(enabled: boolean) { + this.passthroughEnabled = enabled; + } + + static setMockCommands(commands: MockShellCommand[]) { + this.mockCommands = commands; + } + + static reset() { + this.mockCommands = []; + this.passthroughEnabled = false; + this.writeToPty.mockClear(); + this.kill.mockClear(); + this.background.mockClear(); + this.resizePty.mockClear(); + this.scrollPty.mockClear(); + } + + static async execute( + commandToExecute: string, + cwd: string, + onOutputEvent: (event: ShellOutputEvent) => void, + abortSignal: AbortSignal, + shouldUseNodePty: boolean, + shellExecutionConfig: ShellExecutionConfig, + ): Promise { + const mock = this.mockCommands.find((m) => + typeof m.command === 'string' + ? m.command === commandToExecute + : m.command.test(commandToExecute), + ); + + const pid = Math.floor(Math.random() * 10000); + + if (mock) { + if (mock.events) { + for (const event of mock.events) { + onOutputEvent(event); + } + } + + const result: ShellExecutionResult = { + rawOutput: Buffer.from(mock.result.output || ''), + output: mock.result.output || '', + exitCode: mock.result.exitCode ?? 0, + signal: mock.result.signal ?? null, + error: mock.result.error ?? null, + aborted: false, + pid, + executionMethod: 'none', + ...mock.result, + }; + + return { + pid, + result: Promise.resolve(result), + }; + } + + if (this.passthroughEnabled && this.originalExecute) { + return this.originalExecute( + commandToExecute, + cwd, + onOutputEvent, + abortSignal, + shouldUseNodePty, + shellExecutionConfig, + ); + } + + return { + pid, + result: Promise.resolve({ + rawOutput: Buffer.from(''), + output: `Command not found: ${commandToExecute}`, + exitCode: 127, + signal: null, + error: null, + aborted: false, + pid, + executionMethod: 'none', + }), + }; + } + + static writeToPty = vi.fn(); + static isPtyActive = vi.fn(() => false); + static onExit = vi.fn(() => () => {}); + static kill = vi.fn(); + static background = vi.fn(); + static subscribe = vi.fn(() => () => {}); + static resizePty = vi.fn(); + static scrollPty = vi.fn(); +} diff --git a/packages/cli/src/test-utils/fixtures/simple.responses b/packages/cli/src/test-utils/fixtures/simple.responses new file mode 100644 index 0000000000..1612ab928a --- /dev/null +++ b/packages/cli/src/test-utils/fixtures/simple.responses @@ -0,0 +1 @@ +{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"Hello! How can I help you today?"}],"role":"model"},"finishReason":"STOP"}]}]} diff --git a/packages/cli/src/test-utils/fixtures/steering.responses b/packages/cli/src/test-utils/fixtures/steering.responses new file mode 100644 index 0000000000..66407f819e --- /dev/null +++ b/packages/cli/src/test-utils/fixtures/steering.responses @@ -0,0 +1,4 @@ +{"method":"generateContentStream","response":[{"candidates":[{"content":{"role":"model","parts":[{"text":"Starting a long task. First, I'll list the files."},{"functionCall":{"name":"list_directory","args":{"dir_path":"."}}}]},"finishReason":"STOP"}]}]} +{"method":"generateContent","response":{"candidates":[{"content":{"role":"model","parts":[{"text":"ACK: I will focus on .txt files now."}]},"finishReason":"STOP"}]}} +{"method":"generateContentStream","response":[{"candidates":[{"content":{"role":"model","parts":[{"text":"I see the files. Since you want me to focus on .txt files, I will read file1.txt."},{"functionCall":{"name":"read_file","args":{"file_path":"file1.txt"}}}]},"finishReason":"STOP"}]}]} +{"method":"generateContentStream","response":[{"candidates":[{"content":{"role":"model","parts":[{"text":"I have read file1.txt. Task complete."}]},"finishReason":"STOP"}]}]} diff --git a/packages/cli/src/test-utils/render.tsx b/packages/cli/src/test-utils/render.tsx index 0c8eac325e..3812d7699d 100644 --- a/packages/cli/src/test-utils/render.tsx +++ b/packages/cli/src/test-utils/render.tsx @@ -33,6 +33,7 @@ import { makeFakeConfig, type Config } from '@google/gemini-cli-core'; import { FakePersistentState } from './persistentStateFake.js'; import { AppContext, type AppState } from '../ui/contexts/AppContext.js'; import { createMockSettings } from './settings.js'; +import { SessionStatsProvider } from '../ui/contexts/SessionContext.js'; export const persistentStateMock = new FakePersistentState(); @@ -160,6 +161,8 @@ const baseMockUiState = { proQuotaRequest: null, validationRequest: null, }, + hintMode: false, + hintBuffer: '', }; export const mockAppState: AppState = { @@ -209,6 +212,10 @@ const mockUIActions: UIActions = { setActiveBackgroundShellPid: vi.fn(), setIsBackgroundShellListOpen: vi.fn(), setAuthContext: vi.fn(), + onHintInput: vi.fn(), + onHintBackspace: vi.fn(), + onHintClear: vi.fn(), + onHintSubmit: vi.fn(), handleRestart: vi.fn(), handleNewAgentsSelect: vi.fn(), }; @@ -306,39 +313,43 @@ export const renderWithProviders = ( - - - - + + + - - - - - - {component} - - - - - - - - - + + + + + + + {component} + + + + + + + + + + diff --git a/packages/cli/src/ui/AppContainer.tsx b/packages/cli/src/ui/AppContainer.tsx index c18b9f24e8..5c1a531445 100644 --- a/packages/cli/src/ui/AppContainer.tsx +++ b/packages/cli/src/ui/AppContainer.tsx @@ -94,6 +94,10 @@ import { basename } from 'node:path'; import { computeTerminalTitle } from '../utils/windowTitle.js'; import { useTextBuffer } from './components/shared/text-buffer.js'; import { useLogger } from './hooks/useLogger.js'; +import { + buildUserSteeringHintPrompt, + generateSteeringAckMessage, +} from '@google/gemini-cli-core'; import { useGeminiStream } from './hooks/useGeminiStream.js'; import { type BackgroundShell } from './hooks/shellCommandProcessor.js'; import { useVim } from './hooks/vim.js'; @@ -603,6 +607,7 @@ export const AppContainer = (props: AppContainerProps) => { apiKeyDefaultValue, reloadApiKey, } = useAuthCommand(settings, config, initializationResult.authError); + const [authContext, setAuthContext] = useState<{ requiresRestart?: boolean }>( {}, ); @@ -963,6 +968,19 @@ Logging in with Google... Restarting Gemini CLI to continue. } }, [pendingRestorePrompt, inputHistory, historyManager.history]); + const lastProcessedHintIndexRef = useRef(-1); + + const consumePendingHints = useCallback(() => { + const userHints = config.getUserHintsAfter( + lastProcessedHintIndexRef.current, + ); + if (userHints.length === 0) { + return null; + } + lastProcessedHintIndexRef.current = config.getLatestHintIndex(); + return userHints.join('\n'); + }, [config]); + const { streamingState, submitQuery, @@ -1001,6 +1019,7 @@ Logging in with Google... Restarting Gemini CLI to continue. terminalWidth, terminalHeight, embeddedShellFocused, + consumePendingHints, ); toggleBackgroundShellRef.current = toggleBackgroundShell; @@ -1103,10 +1122,38 @@ Logging in with Google... Restarting Gemini CLI to continue. ], ); + const handleHintSubmit = useCallback( + (hint: string) => { + const trimmed = hint.trim(); + if (!trimmed) { + return; + } + config.addUserHint(trimmed); + // Render hints with a distinct style. + historyManager.addItem({ + type: 'hint', + text: trimmed, + } as Omit); + }, + [config, historyManager], + ); + const handleFinalSubmit = useCallback( async (submittedValue: string) => { const isSlash = isSlashCommand(submittedValue.trim()); const isIdle = streamingState === StreamingState.Idle; + const isAgentRunning = + streamingState === StreamingState.Responding || + isToolExecuting([ + ...pendingSlashCommandHistoryItems, + ...pendingGeminiHistoryItems, + ]); + + if (isAgentRunning && !isSlash) { + handleHintSubmit(submittedValue); + addInput(submittedValue); + return; + } if (isSlash || (isIdle && isMcpReady)) { if (!isSlash) { @@ -1148,7 +1195,10 @@ Logging in with Google... Restarting Gemini CLI to continue. isMcpReady, streamingState, messageQueue.length, + pendingSlashCommandHistoryItems, + pendingGeminiHistoryItems, config, + handleHintSubmit, ], ); @@ -1814,6 +1864,45 @@ Logging in with Google... Restarting Gemini CLI to continue. [pendingSlashCommandHistoryItems, pendingGeminiHistoryItems], ); + useEffect(() => { + if ( + !isConfigInitialized || + streamingState !== StreamingState.Idle || + !isMcpReady || + isToolAwaitingConfirmation(pendingHistoryItems) + ) { + return; + } + + const pendingHint = consumePendingHints(); + if (!pendingHint) { + return; + } + + const geminiClient = config.getGeminiClient(); + void generateSteeringAckMessage(geminiClient, pendingHint).then( + (ackText) => { + historyManager.addItem({ + type: 'info', + icon: '· ', + color: 'gray', + marginBottom: 1, + text: ackText, + } as Omit); + }, + ); + void submitQuery([{ text: buildUserSteeringHintPrompt(pendingHint) }]); + }, [ + config, + historyManager, + isConfigInitialized, + isMcpReady, + streamingState, + submitQuery, + consumePendingHints, + pendingHistoryItems, + ]); + const allToolCalls = useMemo( () => pendingHistoryItems @@ -1975,6 +2064,8 @@ Logging in with Google... Restarting Gemini CLI to continue. isBackgroundShellListOpen, adminSettingsChanged, newAgents, + hintMode: false, + hintBuffer: '', }), [ isThemeDialogOpen, @@ -2137,6 +2228,10 @@ Logging in with Google... Restarting Gemini CLI to continue. setActiveBackgroundShellPid, setIsBackgroundShellListOpen, setAuthContext, + onHintInput: () => {}, + onHintBackspace: () => {}, + onHintClear: () => {}, + onHintSubmit: () => {}, handleRestart: async () => { if (process.send) { const remoteSettings = config.getRemoteAdminSettings(); diff --git a/packages/cli/src/ui/components/DialogManager.tsx b/packages/cli/src/ui/components/DialogManager.tsx index e4e2f4a6e6..80099a9f42 100644 --- a/packages/cli/src/ui/components/DialogManager.tsx +++ b/packages/cli/src/ui/components/DialogManager.tsx @@ -50,6 +50,7 @@ export const DialogManager = ({ const uiState = useUIState(); const uiActions = useUIActions(); + const { constrainHeight, terminalHeight, diff --git a/packages/cli/src/ui/components/Footer.tsx b/packages/cli/src/ui/components/Footer.tsx index 3fc830c1b7..a36848b8c6 100644 --- a/packages/cli/src/ui/components/Footer.tsx +++ b/packages/cli/src/ui/components/Footer.tsx @@ -71,7 +71,8 @@ export const Footer: React.FC = () => { const justifyContent = hideCWD && hideModelInfo ? 'center' : 'space-between'; const displayVimMode = vimEnabled ? vimMode : undefined; - const showDebugProfiler = debugMode || isDevelopment; + const showDebugProfiler = + debugMode || (isDevelopment && settings.merged.general.devtools); return ( ', () => { }, background: { primary: '', + hintMode: '', diff: { added: '', removed: '' }, }, border: { diff --git a/packages/cli/src/ui/components/HistoryItemDisplay.test.tsx b/packages/cli/src/ui/components/HistoryItemDisplay.test.tsx index b232ff948a..df772e9410 100644 --- a/packages/cli/src/ui/components/HistoryItemDisplay.test.tsx +++ b/packages/cli/src/ui/components/HistoryItemDisplay.test.tsx @@ -44,6 +44,18 @@ describe('', () => { expect(lastFrame()).toContain('Hello'); }); + it('renders HintMessage for "hint" type', () => { + const item: HistoryItem = { + ...baseItem, + type: 'hint', + text: 'Try using ripgrep first', + }; + const { lastFrame } = renderWithProviders( + , + ); + expect(lastFrame()).toContain('Try using ripgrep first'); + }); + it('renders UserMessage for "user" type with slash command', () => { const item: HistoryItem = { ...baseItem, diff --git a/packages/cli/src/ui/components/HistoryItemDisplay.tsx b/packages/cli/src/ui/components/HistoryItemDisplay.tsx index 41340c1b08..c12dd2edfb 100644 --- a/packages/cli/src/ui/components/HistoryItemDisplay.tsx +++ b/packages/cli/src/ui/components/HistoryItemDisplay.tsx @@ -35,6 +35,7 @@ import { ChatList } from './views/ChatList.js'; import { HooksList } from './views/HooksList.js'; import { ModelMessage } from './messages/ModelMessage.js'; import { ThinkingMessage } from './messages/ThinkingMessage.js'; +import { HintMessage } from './messages/HintMessage.js'; import { getInlineThinkingMode } from '../utils/inlineThinkingMode.js'; import { useSettings } from '../contexts/SettingsContext.js'; @@ -71,6 +72,9 @@ export const HistoryItemDisplay: React.FC = ({ {itemForDisplay.type === 'thinking' && inlineThinkingMode !== 'off' && ( )} + {itemForDisplay.type === 'hint' && ( + + )} {itemForDisplay.type === 'user' && ( )} @@ -102,6 +106,7 @@ export const HistoryItemDisplay: React.FC = ({ text={itemForDisplay.text} icon={itemForDisplay.icon} color={itemForDisplay.color} + marginBottom={itemForDisplay.marginBottom} /> )} {itemForDisplay.type === 'warning' && ( diff --git a/packages/cli/src/ui/components/InputPrompt.tsx b/packages/cli/src/ui/components/InputPrompt.tsx index 0621255f90..604e8c9900 100644 --- a/packages/cli/src/ui/components/InputPrompt.tsx +++ b/packages/cli/src/ui/components/InputPrompt.tsx @@ -238,7 +238,7 @@ export const InputPrompt: React.FC = ({ ]); const [expandedSuggestionIndex, setExpandedSuggestionIndex] = useState(-1); - const shellHistory = useShellHistory(config.getProjectRoot()); + const shellHistory = useShellHistory(config.getProjectRoot(), config.storage); const shellHistoryData = shellHistory.history; const completion = useCommandCompletion({ diff --git a/packages/cli/src/ui/components/messages/HintMessage.tsx b/packages/cli/src/ui/components/messages/HintMessage.tsx new file mode 100644 index 0000000000..a19847dd34 --- /dev/null +++ b/packages/cli/src/ui/components/messages/HintMessage.tsx @@ -0,0 +1,53 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import type React from 'react'; +import { Text, Box } from 'ink'; +import { theme } from '../../semantic-colors.js'; +import { SCREEN_READER_USER_PREFIX } from '../../textConstants.js'; +import { HalfLinePaddedBox } from '../shared/HalfLinePaddedBox.js'; +import { useConfig } from '../../contexts/ConfigContext.js'; + +interface HintMessageProps { + text: string; +} + +export const HintMessage: React.FC = ({ text }) => { + const prefix = '💡 '; + const prefixWidth = prefix.length; + const config = useConfig(); + const useBackgroundColor = config.getUseBackgroundColor(); + + return ( + + + + + {prefix} + + + + + {`Steering Hint: ${text}`} + + + + + ); +}; diff --git a/packages/cli/src/ui/components/messages/InfoMessage.tsx b/packages/cli/src/ui/components/messages/InfoMessage.tsx index d092e292b1..e725a23993 100644 --- a/packages/cli/src/ui/components/messages/InfoMessage.tsx +++ b/packages/cli/src/ui/components/messages/InfoMessage.tsx @@ -13,19 +13,21 @@ interface InfoMessageProps { text: string; icon?: string; color?: string; + marginBottom?: number; } export const InfoMessage: React.FC = ({ text, icon, color, + marginBottom, }) => { color ??= theme.status.warning; const prefix = icon ?? 'ℹ '; const prefixWidth = prefix.length; return ( - + {prefix} diff --git a/packages/cli/src/ui/contexts/UIActionsContext.tsx b/packages/cli/src/ui/contexts/UIActionsContext.tsx index 8ad79f6b25..f5b3653d7a 100644 --- a/packages/cli/src/ui/contexts/UIActionsContext.tsx +++ b/packages/cli/src/ui/contexts/UIActionsContext.tsx @@ -73,6 +73,10 @@ export interface UIActions { setActiveBackgroundShellPid: (pid: number) => void; setIsBackgroundShellListOpen: (isOpen: boolean) => void; setAuthContext: (context: { requiresRestart?: boolean }) => void; + onHintInput: (char: string) => void; + onHintBackspace: () => void; + onHintClear: () => void; + onHintSubmit: (hint: string) => void; handleRestart: () => void; handleNewAgentsSelect: (choice: NewAgentsChoice) => Promise; } diff --git a/packages/cli/src/ui/contexts/UIStateContext.tsx b/packages/cli/src/ui/contexts/UIStateContext.tsx index cf41896232..10d4f82d50 100644 --- a/packages/cli/src/ui/contexts/UIStateContext.tsx +++ b/packages/cli/src/ui/contexts/UIStateContext.tsx @@ -173,6 +173,8 @@ export interface UIState { isBackgroundShellListOpen: boolean; adminSettingsChanged: boolean; newAgents: AgentDefinition[] | null; + hintMode: boolean; + hintBuffer: string; transientMessage: { text: string; type: TransientMessageType; diff --git a/packages/cli/src/ui/hooks/useGeminiStream.test.tsx b/packages/cli/src/ui/hooks/useGeminiStream.test.tsx index 294c537af4..a82b33c8d8 100644 --- a/packages/cli/src/ui/hooks/useGeminiStream.test.tsx +++ b/packages/cli/src/ui/hooks/useGeminiStream.test.tsx @@ -56,6 +56,11 @@ const MockedGeminiClientClass = vi.hoisted(() => this.startChat = mockStartChat; this.sendMessageStream = mockSendMessageStream; this.addHistory = vi.fn(); + this.generateContent = vi.fn().mockResolvedValue({ + candidates: [ + { content: { parts: [{ text: 'Got it. Focusing on tests only.' }] } }, + ], + }); this.getCurrentSequenceModel = vi.fn().mockReturnValue('test-model'); this.getChat = vi.fn().mockReturnValue({ recordCompletedToolCalls: vi.fn(), @@ -152,13 +157,17 @@ vi.mock('./useLogger.js', () => ({ const mockStartNewPrompt = vi.fn(); const mockAddUsage = vi.fn(); -vi.mock('../contexts/SessionContext.js', () => ({ - useSessionStats: vi.fn(() => ({ - startNewPrompt: mockStartNewPrompt, - addUsage: mockAddUsage, - getPromptCount: vi.fn(() => 5), - })), -})); +vi.mock('../contexts/SessionContext.js', async (importOriginal) => { + const actual = (await importOriginal()) as any; + return { + ...actual, + useSessionStats: vi.fn(() => ({ + startNewPrompt: mockStartNewPrompt, + addUsage: mockAddUsage, + getPromptCount: vi.fn(() => 5), + })), + }; +}); vi.mock('./slashCommandProcessor.js', () => ({ handleSlashCommand: vi.fn().mockReturnValue(false), @@ -661,6 +670,113 @@ describe('useGeminiStream', () => { ); }); + it('should inject steering hint prompt for continuation', async () => { + const toolCallResponseParts: Part[] = [{ text: 'tool final response' }]; + const completedToolCalls: TrackedToolCall[] = [ + { + request: { + callId: 'call1', + name: 'tool1', + args: {}, + isClientInitiated: false, + prompt_id: 'prompt-id-ack', + }, + status: 'success', + responseSubmittedToGemini: false, + response: { + callId: 'call1', + responseParts: toolCallResponseParts, + errorType: undefined, + }, + tool: { + displayName: 'MockTool', + }, + invocation: { + getDescription: () => `Mock description`, + } as unknown as AnyToolInvocation, + } as TrackedCompletedToolCall, + ]; + + mockSendMessageStream.mockReturnValue( + (async function* () { + yield { + type: ServerGeminiEventType.Content, + value: 'Applied the requested adjustment.', + }; + })(), + ); + + let capturedOnComplete: + | ((completedTools: TrackedToolCall[]) => Promise) + | null = null; + mockUseToolScheduler.mockImplementation((onComplete) => { + capturedOnComplete = onComplete; + return [ + [], + mockScheduleToolCalls, + mockMarkToolsAsSubmitted, + vi.fn(), + mockCancelAllToolCalls, + 0, + ]; + }); + + renderHookWithProviders(() => + useGeminiStream( + new MockedGeminiClientClass(mockConfig), + [], + mockAddItem, + mockConfig, + mockLoadedSettings, + mockOnDebugMessage, + mockHandleSlashCommand, + false, + () => 'vscode' as EditorType, + () => {}, + () => Promise.resolve(), + false, + () => {}, + () => {}, + () => {}, + 80, + 24, + undefined, + () => 'focus on tests only', + ), + ); + + await act(async () => { + if (capturedOnComplete) { + await new Promise((resolve) => setTimeout(resolve, 0)); + await capturedOnComplete(completedToolCalls); + } + }); + + await waitFor(() => { + expect(mockSendMessageStream).toHaveBeenCalledTimes(1); + }); + + const sentParts = mockSendMessageStream.mock.calls[0][0] as Part[]; + const injectedHintPart = sentParts[0] as { text?: string }; + expect(injectedHintPart.text).toContain( + 'User steering update: "focus on tests only"', + ); + expect(injectedHintPart.text).toContain( + 'Classify it as ADD_TASK, MODIFY_TASK, CANCEL_TASK, or EXTRA_CONTEXT.', + ); + expect(injectedHintPart.text).toContain( + 'Do not cancel/skip tasks unless the user explicitly cancels them.', + ); + expect( + mockAddItem.mock.calls.some( + ([item]) => + item?.type === 'info' && + typeof item.text === 'string' && + item.text.includes('Got it. Focusing on tests only.'), + ), + ).toBe(true); + }); + it('should handle all tool calls being cancelled', async () => { const cancelledToolCalls: TrackedToolCall[] = [ { diff --git a/packages/cli/src/ui/hooks/useGeminiStream.ts b/packages/cli/src/ui/hooks/useGeminiStream.ts index bba6977ffa..5b03cde056 100644 --- a/packages/cli/src/ui/hooks/useGeminiStream.ts +++ b/packages/cli/src/ui/hooks/useGeminiStream.ts @@ -32,6 +32,8 @@ import { ValidationRequiredError, coreEvents, CoreEvent, + buildUserSteeringHintPrompt, + generateSteeringAckMessage, } from '@google/gemini-cli-core'; import type { Config, @@ -81,6 +83,7 @@ import path from 'node:path'; import { useSessionStats } from '../contexts/SessionContext.js'; import { useKeypress } from './useKeypress.js'; import type { LoadedSettings } from '../../config/settings.js'; +import { theme } from '../semantic-colors.js'; type ToolResponseWithParts = ToolCallResponseInfo & { llmContent?: PartListUnion; @@ -185,6 +188,7 @@ export const useGeminiStream = ( terminalWidth: number, terminalHeight: number, isShellFocused?: boolean, + consumeUserHint?: () => string | null, ) => { const [initError, setInitError] = useState(null); const [retryStatus, setRetryStatus] = useState( @@ -1561,6 +1565,28 @@ export const useGeminiStream = ( const responsesToSend: Part[] = geminiTools.flatMap( (toolCall) => toolCall.response.responseParts, ); + + if (consumeUserHint) { + const userHint = consumeUserHint(); + if (userHint && userHint.trim().length > 0) { + const hintText = userHint.trim(); + responsesToSend.unshift({ + text: buildUserSteeringHintPrompt(hintText), + }); + void generateSteeringAckMessage(geminiClient, hintText).then( + (ackText) => { + addItem({ + type: 'info', + icon: '· ', + color: theme.text.secondary, + marginBottom: 1, + text: ackText, + } as Omit); + }, + ); + } + } + const callIdsToMarkAsSubmitted = geminiTools.map( (toolCall) => toolCall.request.callId, ); @@ -1593,6 +1619,7 @@ export const useGeminiStream = ( modelSwitchedFromQuotaError, addItem, registerBackgroundShell, + consumeUserHint, ], ); diff --git a/packages/cli/src/ui/hooks/useShellHistory.ts b/packages/cli/src/ui/hooks/useShellHistory.ts index 1cc013ca83..a5157a3043 100644 --- a/packages/cli/src/ui/hooks/useShellHistory.ts +++ b/packages/cli/src/ui/hooks/useShellHistory.ts @@ -79,14 +79,26 @@ export function useShellHistory( const [historyFilePath, setHistoryFilePath] = useState(null); useEffect(() => { + let isMounted = true; async function loadHistory() { - const filePath = await getHistoryFilePath(projectRoot, storage); - setHistoryFilePath(filePath); - const loadedHistory = await readHistoryFile(filePath); - setHistory(loadedHistory.reverse()); // Newest first + try { + const filePath = await getHistoryFilePath(projectRoot, storage); + if (!isMounted) return; + setHistoryFilePath(filePath); + const loadedHistory = await readHistoryFile(filePath); + if (!isMounted) return; + setHistory(loadedHistory.reverse()); // Newest first + } catch (error) { + if (isMounted) { + debugLogger.error('Error loading shell history:', error); + } + } } // eslint-disable-next-line @typescript-eslint/no-floating-promises loadHistory(); + return () => { + isMounted = false; + }; }, [projectRoot, storage]); const addCommandToHistory = useCallback( diff --git a/packages/cli/src/ui/themes/no-color.ts b/packages/cli/src/ui/themes/no-color.ts index 7c22e68b9a..5d2a4fbb58 100644 --- a/packages/cli/src/ui/themes/no-color.ts +++ b/packages/cli/src/ui/themes/no-color.ts @@ -36,6 +36,7 @@ const noColorSemanticColors: SemanticColors = { }, background: { primary: '', + hintMode: '', diff: { added: '', removed: '', diff --git a/packages/cli/src/ui/themes/semantic-tokens.ts b/packages/cli/src/ui/themes/semantic-tokens.ts index 3e95aee188..557ccd0ffa 100644 --- a/packages/cli/src/ui/themes/semantic-tokens.ts +++ b/packages/cli/src/ui/themes/semantic-tokens.ts @@ -16,6 +16,7 @@ export interface SemanticColors { }; background: { primary: string; + hintMode: string; diff: { added: string; removed: string; @@ -48,6 +49,7 @@ export const lightSemanticColors: SemanticColors = { }, background: { primary: lightTheme.Background, + hintMode: '#E8E0F0', diff: { added: lightTheme.DiffAdded, removed: lightTheme.DiffRemoved, @@ -80,6 +82,7 @@ export const darkSemanticColors: SemanticColors = { }, background: { primary: darkTheme.Background, + hintMode: '#352A45', diff: { added: darkTheme.DiffAdded, removed: darkTheme.DiffRemoved, diff --git a/packages/cli/src/ui/themes/theme.ts b/packages/cli/src/ui/themes/theme.ts index 2e39b1b6c7..61061b866d 100644 --- a/packages/cli/src/ui/themes/theme.ts +++ b/packages/cli/src/ui/themes/theme.ts @@ -131,6 +131,7 @@ export class Theme { }, background: { primary: this.colors.Background, + hintMode: this.type === 'light' ? '#E8E0F0' : '#352A45', diff: { added: this.colors.DiffAdded, removed: this.colors.DiffRemoved, @@ -400,6 +401,7 @@ export function createCustomTheme(customTheme: CustomTheme): Theme { }, background: { primary: customTheme.background?.primary ?? colors.Background, + hintMode: 'magenta', diff: { added: customTheme.background?.diff?.added ?? colors.DiffAdded, removed: customTheme.background?.diff?.removed ?? colors.DiffRemoved, diff --git a/packages/cli/src/ui/types.ts b/packages/cli/src/ui/types.ts index c48b81bf9c..5a08658149 100644 --- a/packages/cli/src/ui/types.ts +++ b/packages/cli/src/ui/types.ts @@ -123,6 +123,7 @@ export type HistoryItemInfo = HistoryItemBase & { text: string; icon?: string; color?: string; + marginBottom?: number; }; export type HistoryItemError = HistoryItemBase & { @@ -225,6 +226,11 @@ export type HistoryItemThinking = HistoryItemBase & { thought: ThoughtSummary; }; +export type HistoryItemHint = HistoryItemBase & { + type: 'hint'; + text: string; +}; + export type HistoryItemChatList = HistoryItemBase & { type: 'chat_list'; chats: ChatDetail[]; @@ -349,6 +355,7 @@ export type HistoryItemWithoutId = | HistoryItemMcpStatus | HistoryItemChatList | HistoryItemThinking + | HistoryItemHint | HistoryItemHooksList; export type HistoryItem = HistoryItemWithoutId & { id: number }; @@ -374,6 +381,7 @@ export enum MessageType { MCP_STATUS = 'mcp_status', CHAT_LIST = 'chat_list', HOOKS_LIST = 'hooks_list', + HINT = 'hint', } // Simplified message structure for internal feedback diff --git a/packages/core/src/agents/local-executor.ts b/packages/core/src/agents/local-executor.ts index e9fee219e3..039531e4df 100644 --- a/packages/core/src/agents/local-executor.ts +++ b/packages/core/src/agents/local-executor.ts @@ -59,6 +59,7 @@ import { getVersion } from '../utils/version.js'; import { getToolCallContext } from '../utils/toolCallContext.js'; import { scheduleAgentTools } from './agent-scheduler.js'; import { DeadlineTimer } from '../utils/deadlineTimer.js'; +import { formatUserHintsForModel } from '../utils/flashLiteHelper.js'; /** A callback function to report on agent activity. */ export type ActivityCallback = (activity: SubagentActivityEvent) => void; @@ -462,7 +463,17 @@ export class LocalAgentExecutor { const query = this.definition.promptConfig.query ? templateString(this.definition.promptConfig.query, augmentedInputs) : DEFAULT_QUERY_STRING; - let currentMessage: Content = { role: 'user', parts: [{ text: query }] }; + + let lastProcessedHintIndex = this.runtimeContext.getLatestHintIndex(); + const initialHints = this.runtimeContext.getUserHints(); + const formattedInitialHints = formatUserHintsForModel(initialHints); + + let currentMessage: Content = formattedInitialHints + ? { + role: 'user', + parts: [{ text: formattedInitialHints }, { text: query }], + } + : { role: 'user', parts: [{ text: query }] }; while (true) { // Check for termination conditions like max turns. @@ -501,6 +512,20 @@ export class LocalAgentExecutor { // If status is 'continue', update message for the next loop currentMessage = turnResult.nextMessage; + + // Check for new user steering hints + const newHints = this.runtimeContext.getUserHintsAfter( + lastProcessedHintIndex, + ); + if (newHints.length > 0) { + const formattedHints = formatUserHintsForModel(newHints); + if (formattedHints) { + // Append hints to the current message (next turn) + currentMessage.parts ??= []; + currentMessage.parts.unshift({ text: formattedHints }); + } + lastProcessedHintIndex = this.runtimeContext.getLatestHintIndex(); + } } // === UNIFIED RECOVERY BLOCK === diff --git a/packages/core/src/agents/subagent-tool.ts b/packages/core/src/agents/subagent-tool.ts index f311b60562..0a1bd0f3c2 100644 --- a/packages/core/src/agents/subagent-tool.ts +++ b/packages/core/src/agents/subagent-tool.ts @@ -64,6 +64,8 @@ export class SubagentTool extends BaseDeclarativeTool { } } +import { formatUserHintsForModel } from '../utils/flashLiteHelper.js'; + class SubAgentInvocation extends BaseToolInvocation { constructor( params: AgentInputs, @@ -88,7 +90,10 @@ class SubAgentInvocation extends BaseToolInvocation { override async shouldConfirmExecute( abortSignal: AbortSignal, ): Promise { - const invocation = this.buildSubInvocation(this.definition, this.params); + const invocation = this.buildSubInvocation( + this.definition, + this.withUserHints(this.params), + ); return invocation.shouldConfirmExecute(abortSignal); } @@ -107,11 +112,36 @@ class SubAgentInvocation extends BaseToolInvocation { ); } - const invocation = this.buildSubInvocation(this.definition, this.params); + const invocation = this.buildSubInvocation( + this.definition, + this.withUserHints(this.params), + ); return invocation.execute(signal, updateOutput); } + private withUserHints(agentArgs: AgentInputs): AgentInputs { + if (this.definition.kind !== 'remote') { + return agentArgs; + } + + const userHints = this.config.getUserHints(); + const formattedHints = formatUserHintsForModel(userHints); + if (!formattedHints) { + return agentArgs; + } + + const query = agentArgs['query']; + if (typeof query !== 'string' || query.trim().length === 0) { + return agentArgs; + } + + return { + ...agentArgs, + query: `${formattedHints}\n\n${query}`, + }; + } + private buildSubInvocation( definition: AgentDefinition, agentArgs: AgentInputs, diff --git a/packages/core/src/config/config.test.ts b/packages/core/src/config/config.test.ts index 886e722ba0..27a2f1fcf4 100644 --- a/packages/core/src/config/config.test.ts +++ b/packages/core/src/config/config.test.ts @@ -41,6 +41,7 @@ import type { SkillDefinition } from '../skills/skillLoader.js'; import type { McpClientManager } from '../tools/mcp-client-manager.js'; import { DEFAULT_MODEL_CONFIGS } from './defaultModelConfigs.js'; import { DEFAULT_GEMINI_MODEL } from './models.js'; +import { Storage } from './storage.js'; vi.mock('fs', async (importOriginal) => { const actual = await importOriginal(); @@ -279,16 +280,21 @@ describe('Server Config (config.ts)', () => { await expect(config.initialize()).resolves.toBeUndefined(); }); - it('should throw an error if initialized more than once', async () => { + it('should deduplicate multiple calls to initialize', async () => { const config = new Config({ ...baseParams, checkpointing: false, }); - await expect(config.initialize()).resolves.toBeUndefined(); - await expect(config.initialize()).rejects.toThrow( - 'Config was already initialized', - ); + const storageSpy = vi.spyOn(Storage.prototype, 'initialize'); + + await Promise.all([ + config.initialize(), + config.initialize(), + config.initialize(), + ]); + + expect(storageSpy).toHaveBeenCalledTimes(1); }); it('should await MCP initialization in non-interactive mode', async () => { @@ -2583,4 +2589,34 @@ describe('syncPlanModeTools', () => { expect(setToolsSpy).toHaveBeenCalled(); }); + + describe('user hints', () => { + it('stores trimmed hints and exposes them via indexing', () => { + const config = new Config(baseParams); + + config.addUserHint(' first hint '); + config.addUserHint('second hint'); + config.addUserHint(' '); + + expect(config.getUserHints()).toEqual(['first hint', 'second hint']); + expect(config.getLatestHintIndex()).toBe(1); + expect(config.getUserHintsAfter(-1)).toEqual([ + 'first hint', + 'second hint', + ]); + expect(config.getUserHintsAfter(0)).toEqual(['second hint']); + expect(config.getUserHintsAfter(1)).toEqual([]); + }); + + it('tracks the last hint timestamp', () => { + const config = new Config(baseParams); + + expect(config.getLastUserHintAt()).toBeNull(); + config.addUserHint('hint'); + + const timestamp = config.getLastUserHintAt(); + expect(timestamp).not.toBeNull(); + expect(typeof timestamp).toBe('number'); + }); + }); }); diff --git a/packages/core/src/config/config.ts b/packages/core/src/config/config.ts index 45a3a953b5..031bf068d9 100644 --- a/packages/core/src/config/config.ts +++ b/packages/core/src/config/config.ts @@ -615,7 +615,7 @@ export class Config { private readonly enablePromptCompletion: boolean = false; private readonly truncateToolOutputThreshold: number; private compressionTruncationCounter = 0; - private initialized: boolean = false; + private initPromise: Promise | undefined; readonly storage: Storage; private readonly fileExclusions: FileExclusions; private readonly eventEmitter?: EventEmitter; @@ -668,7 +668,7 @@ export class Config { private remoteAdminSettings: AdminControlsSettings | undefined; private latestApiRequest: GenerateContentParameters | undefined; private lastModeSwitchTime: number = Date.now(); - + private userHints: Array<{ text: string; timestamp: number }> = []; private approvedPlanPath: string | undefined; constructor(params: ConfigParameters) { @@ -909,97 +909,100 @@ export class Config { * Must only be called once, throws if called again. */ async initialize(): Promise { - if (this.initialized) { - throw Error('Config was already initialized'); - } - this.initialized = true; - - await this.storage.initialize(); - - // Add pending directories to workspace context - for (const dir of this.pendingIncludeDirectories) { - this.workspaceContext.addDirectory(dir); + if (this.initPromise) { + return this.initPromise; } - // Add plans directory to workspace context for plan file storage - if (this.planEnabled) { - const plansDir = this.storage.getProjectTempPlansDir(); - await fs.promises.mkdir(plansDir, { recursive: true }); - this.workspaceContext.addDirectory(plansDir); - } + this.initPromise = (async () => { + await this.storage.initialize(); - // Initialize centralized FileDiscoveryService - const discoverToolsHandle = startupProfiler.start('discover_tools'); - this.getFileService(); - if (this.getCheckpointingEnabled()) { - await this.getGitService(); - } - this.promptRegistry = new PromptRegistry(); - this.resourceRegistry = new ResourceRegistry(); - - this.agentRegistry = new AgentRegistry(this); - await this.agentRegistry.initialize(); - - coreEvents.on(CoreEvent.AgentsRefreshed, this.onAgentsRefreshed); - - this.toolRegistry = await this.createToolRegistry(); - discoverToolsHandle?.end(); - this.mcpClientManager = new McpClientManager( - this.clientVersion, - this.toolRegistry, - this, - this.eventEmitter, - ); - // We do not await this promise so that the CLI can start up even if - // MCP servers are slow to connect. - const mcpInitialization = Promise.allSettled([ - this.mcpClientManager.startConfiguredMcpServers(), - this.getExtensionLoader().start(this), - ]).then((results) => { - for (const result of results) { - if (result.status === 'rejected') { - debugLogger.error('Error initializing MCP clients:', result.reason); - } + // Add pending directories to workspace context + for (const dir of this.pendingIncludeDirectories) { + this.workspaceContext.addDirectory(dir); } - }); - if (!this.interactive) { - await mcpInitialization; - } + // Add plans directory to workspace context for plan file storage + if (this.planEnabled) { + const plansDir = this.storage.getProjectTempPlansDir(); + await fs.promises.mkdir(plansDir, { recursive: true }); + this.workspaceContext.addDirectory(plansDir); + } - if (this.skillsSupport) { - this.getSkillManager().setAdminSettings(this.adminSkillsEnabled); - if (this.adminSkillsEnabled) { - await this.getSkillManager().discoverSkills( - this.storage, - this.getExtensions(), - this.isTrustedFolder(), - ); - this.getSkillManager().setDisabledSkills(this.disabledSkills); + // Initialize centralized FileDiscoveryService + const discoverToolsHandle = startupProfiler.start('discover_tools'); + this.getFileService(); + if (this.getCheckpointingEnabled()) { + await this.getGitService(); + } + this.promptRegistry = new PromptRegistry(); + this.resourceRegistry = new ResourceRegistry(); - // Re-register ActivateSkillTool to update its schema with the discovered enabled skill enums - if (this.getSkillManager().getSkills().length > 0) { - this.getToolRegistry().unregisterTool(ActivateSkillTool.Name); - this.getToolRegistry().registerTool( - new ActivateSkillTool(this, this.messageBus), + this.agentRegistry = new AgentRegistry(this); + await this.agentRegistry.initialize(); + + coreEvents.on(CoreEvent.AgentsRefreshed, this.onAgentsRefreshed); + + this.toolRegistry = await this.createToolRegistry(); + discoverToolsHandle?.end(); + this.mcpClientManager = new McpClientManager( + this.clientVersion, + this.toolRegistry, + this, + this.eventEmitter, + ); + // We do not await this promise so that the CLI can start up even if + // MCP servers are slow to connect. + const mcpInitialization = Promise.allSettled([ + this.mcpClientManager.startConfiguredMcpServers(), + this.getExtensionLoader().start(this), + ]).then((results) => { + for (const result of results) { + if (result.status === 'rejected') { + debugLogger.error('Error initializing MCP clients:', result.reason); + } + } + }); + + if (!this.interactive) { + await mcpInitialization; + } + + if (this.skillsSupport) { + this.getSkillManager().setAdminSettings(this.adminSkillsEnabled); + if (this.adminSkillsEnabled) { + await this.getSkillManager().discoverSkills( + this.storage, + this.getExtensions(), + this.isTrustedFolder(), ); + this.getSkillManager().setDisabledSkills(this.disabledSkills); + + // Re-register ActivateSkillTool to update its schema with the discovered enabled skill enums + if (this.getSkillManager().getSkills().length > 0) { + this.getToolRegistry().unregisterTool(ActivateSkillTool.Name); + this.getToolRegistry().registerTool( + new ActivateSkillTool(this, this.messageBus), + ); + } } } - } - // Initialize hook system if enabled - if (this.getEnableHooks()) { - this.hookSystem = new HookSystem(this); - await this.hookSystem.initialize(); - } + // Initialize hook system if enabled + if (this.getEnableHooks()) { + this.hookSystem = new HookSystem(this); + await this.hookSystem.initialize(); + } - if (this.experimentalJitContext) { - this.contextManager = new ContextManager(this); - await this.contextManager.refresh(); - } + if (this.experimentalJitContext) { + this.contextManager = new ContextManager(this); + await this.contextManager.refresh(); + } - await this.geminiClient.initialize(); - this.syncPlanModeTools(); + await this.geminiClient.initialize(); + this.syncPlanModeTools(); + })(); + + return this.initPromise; } getContentGenerator(): ContentGenerator { @@ -2486,6 +2489,36 @@ export class Config { return this.hookSystem; } + addUserHint(hint: string): void { + const trimmed = hint.trim(); + if (trimmed.length === 0) { + return; + } + this.userHints.push({ text: trimmed, timestamp: Date.now() }); + } + + getUserHints(): string[] { + return this.userHints.map((h) => h.text); + } + + getUserHintsAfter(index: number): string[] { + if (index < 0) { + return this.getUserHints(); + } + return this.userHints.slice(index + 1).map((h) => h.text); + } + + getLatestHintIndex(): number { + return this.userHints.length - 1; + } + + getLastUserHintAt(): number | null { + if (this.userHints.length === 0) { + return null; + } + return this.userHints[this.userHints.length - 1].timestamp; + } + /** * Get hooks configuration */ diff --git a/packages/core/src/config/defaultModelConfigs.ts b/packages/core/src/config/defaultModelConfigs.ts index 773223dc0c..5edc8a1212 100644 --- a/packages/core/src/config/defaultModelConfigs.ts +++ b/packages/core/src/config/defaultModelConfigs.ts @@ -121,6 +121,19 @@ export const DEFAULT_MODEL_CONFIGS: ModelConfigServiceConfig = { }, }, }, + 'flash-lite-helper': { + extends: 'base', + modelConfig: { + model: 'gemini-2.5-flash-lite', + generateContentConfig: { + temperature: 0.2, + maxOutputTokens: 120, + thinkingConfig: { + thinkingBudget: 0, + }, + }, + }, + }, 'edit-corrector': { extends: 'base', modelConfig: { diff --git a/packages/core/src/core/__snapshots__/prompts.test.ts.snap b/packages/core/src/core/__snapshots__/prompts.test.ts.snap index 610d681c6e..db720ee296 100644 --- a/packages/core/src/core/__snapshots__/prompts.test.ts.snap +++ b/packages/core/src/core/__snapshots__/prompts.test.ts.snap @@ -11,6 +11,7 @@ exports[`Core System Prompt (prompts.ts) > ApprovalMode in System Prompt > Appro - **Idiomatic Changes:** When editing, understand the local context (imports, functions/classes) to ensure your changes integrate naturally and idiomatically. - **Comments:** Add code comments sparingly. Focus on *why* something is done, especially for complex logic, rather than *what* is done. Only add high-value comments if necessary for clarity or if requested by the user. Do not edit comments that are separate from the code you are changing. *NEVER* talk to the user or describe your changes through comments. - **Proactiveness:** Fulfill the user's request thoroughly. When adding features or fixing bugs, this includes adding tests to ensure quality. Consider all created files, especially tests, to be permanent artifacts unless the user says otherwise. +- **User Hints:** During execution, the user may provide real-time hints (marked as "User hint:" or "User hints:"). Treat these as high-priority but scope-preserving course corrections: apply the minimal plan change needed, keep unaffected user tasks active, and never cancel/skip tasks unless cancellation is explicit for those tasks. Hints may add new tasks, modify one or more tasks, cancel specific tasks, or provide extra context only. If scope is ambiguous, ask for clarification before dropping work. - **Confirm Ambiguity/Expansion:** Do not take significant actions beyond the clear scope of the request without confirming with the user. If the user implies a change (e.g., reports a bug) without explicitly asking for a fix, **ask for confirmation first**. If asked *how* to do something, explain first, don't just do it. - **Explaining Changes:** After completing a code modification or file operation *do not* provide summaries unless asked. - **Do Not revert changes:** Do not revert changes to the codebase unless asked to do so by the user. Only revert changes made by you if they have resulted in an error or if the user has explicitly asked you to revert the changes. @@ -142,6 +143,7 @@ exports[`Core System Prompt (prompts.ts) > ApprovalMode in System Prompt > Appro - **Idiomatic Changes:** When editing, understand the local context (imports, functions/classes) to ensure your changes integrate naturally and idiomatically. - **Comments:** Add code comments sparingly. Focus on *why* something is done, especially for complex logic, rather than *what* is done. Only add high-value comments if necessary for clarity or if requested by the user. Do not edit comments that are separate from the code you are changing. *NEVER* talk to the user or describe your changes through comments. - **Proactiveness:** Fulfill the user's request thoroughly. When adding features or fixing bugs, this includes adding tests to ensure quality. Consider all created files, especially tests, to be permanent artifacts unless the user says otherwise. +- **User Hints:** During execution, the user may provide real-time hints (marked as "User hint:" or "User hints:"). Treat these as high-priority but scope-preserving course corrections: apply the minimal plan change needed, keep unaffected user tasks active, and never cancel/skip tasks unless cancellation is explicit for those tasks. Hints may add new tasks, modify one or more tasks, cancel specific tasks, or provide extra context only. If scope is ambiguous, ask for clarification before dropping work. - **Confirm Ambiguity/Expansion:** Do not take significant actions beyond the clear scope of the request without confirming with the user. If the user implies a change (e.g., reports a bug) without explicitly asking for a fix, **ask for confirmation first**. If asked *how* to do something, explain first, don't just do it. - **Explaining Changes:** After completing a code modification or file operation *do not* provide summaries unless asked. - **Do Not revert changes:** Do not revert changes to the codebase unless asked to do so by the user. Only revert changes made by you if they have resulted in an error or if the user has explicitly asked you to revert the changes. @@ -278,6 +280,7 @@ exports[`Core System Prompt (prompts.ts) > ApprovalMode in System Prompt > shoul - **Idiomatic Changes:** When editing, understand the local context (imports, functions/classes) to ensure your changes integrate naturally and idiomatically. - **Comments:** Add code comments sparingly. Focus on *why* something is done, especially for complex logic, rather than *what* is done. Only add high-value comments if necessary for clarity or if requested by the user. Do not edit comments that are separate from the code you are changing. *NEVER* talk to the user or describe your changes through comments. - **Proactiveness:** Fulfill the user's request thoroughly. When adding features or fixing bugs, this includes adding tests to ensure quality. Consider all created files, especially tests, to be permanent artifacts unless the user says otherwise. +- **User Hints:** During execution, the user may provide real-time hints (marked as "User hint:" or "User hints:"). Treat these as high-priority but scope-preserving course corrections: apply the minimal plan change needed, keep unaffected user tasks active, and never cancel/skip tasks unless cancellation is explicit for those tasks. Hints may add new tasks, modify one or more tasks, cancel specific tasks, or provide extra context only. If scope is ambiguous, ask for clarification before dropping work. - **Confirm Ambiguity/Expansion:** Do not take significant actions beyond the clear scope of the request without confirming with the user. If the user implies a change (e.g., reports a bug) without explicitly asking for a fix, **ask for confirmation first**. If asked *how* to do something, explain first, don't just do it. - **Explaining Changes:** After completing a code modification or file operation *do not* provide summaries unless asked. - **Do Not revert changes:** Do not revert changes to the codebase unless asked to do so by the user. Only revert changes made by you if they have resulted in an error or if the user has explicitly asked you to revert the changes. @@ -390,6 +393,7 @@ exports[`Core System Prompt (prompts.ts) > ApprovalMode in System Prompt > shoul - **Idiomatic Changes:** When editing, understand the local context (imports, functions/classes) to ensure your changes integrate naturally and idiomatically. - **Comments:** Add code comments sparingly. Focus on *why* something is done, especially for complex logic, rather than *what* is done. Only add high-value comments if necessary for clarity or if requested by the user. Do not edit comments that are separate from the code you are changing. *NEVER* talk to the user or describe your changes through comments. - **Proactiveness:** Fulfill the user's request thoroughly. When adding features or fixing bugs, this includes adding tests to ensure quality. Consider all created files, especially tests, to be permanent artifacts unless the user says otherwise. +- **User Hints:** During execution, the user may provide real-time hints (marked as "User hint:" or "User hints:"). Treat these as high-priority but scope-preserving course corrections: apply the minimal plan change needed, keep unaffected user tasks active, and never cancel/skip tasks unless cancellation is explicit for those tasks. Hints may add new tasks, modify one or more tasks, cancel specific tasks, or provide extra context only. If scope is ambiguous, ask for clarification before dropping work. - **Confirm Ambiguity/Expansion:** Do not take significant actions beyond the clear scope of the request without confirming with the user. If the user implies a change (e.g., reports a bug) without explicitly asking for a fix, **ask for confirmation first**. If asked *how* to do something, explain first, don't just do it. - **Explaining Changes:** After completing a code modification or file operation *do not* provide summaries unless asked. - **Do Not revert changes:** Do not revert changes to the codebase unless asked to do so by the user. Only revert changes made by you if they have resulted in an error or if the user has explicitly asked you to revert the changes. @@ -526,6 +530,7 @@ exports[`Core System Prompt (prompts.ts) > should append userMemory with separat - **Technical Integrity:** You are responsible for the entire lifecycle: implementation, testing, and validation. Within the scope of your changes, prioritize readability and long-term maintainability by consolidating logic into clean abstractions rather than threading state across unrelated layers. Align strictly with the requested architectural direction, ensuring the final implementation is focused and free of redundant "just-in-case" alternatives. Validation is not merely running tests; it is the exhaustive process of ensuring that every aspect of your change—behavioral, structural, and stylistic—is correct and fully compatible with the broader project. For bug fixes, you must empirically reproduce the failure with a new test case or reproduction script before applying the fix. - **Expertise & Intent Alignment:** Provide proactive technical opinions grounded in research while strictly adhering to the user's intended workflow. Distinguish between **Directives** (unambiguous requests for action or implementation) and **Inquiries** (requests for analysis, advice, or observations). Assume all requests are Inquiries unless they contain an explicit instruction to perform a task. For Inquiries, your scope is strictly limited to research and analysis; you may propose a solution or strategy, but you MUST NOT modify files until a corresponding Directive is issued. Do not initiate implementation based on observations of bugs or statements of fact. Once an Inquiry is resolved, or while waiting for a Directive, stop and wait for the next user instruction. For Directives, only clarify if critically underspecified; otherwise, work autonomously. You should only seek user intervention if you have exhausted all possible routes or if a proposed solution would take the workspace in a significantly different architectural direction. - **Proactiveness:** When executing a Directive, persist through errors and obstacles by diagnosing failures in the execution phase and, if necessary, backtracking to the research or strategy phases to adjust your approach until a successful, verified outcome is achieved. Fulfill the user's request thoroughly, including adding tests when adding features or fixing bugs. Take reasonable liberties to fulfill broad goals while staying within the requested scope; however, prioritize simplicity and the removal of redundant logic over providing "just-in-case" alternatives that diverge from the established path. +- **User Hints:** During execution, the user may provide real-time hints (marked as "User hint:" or "User hints:"). Treat these as high-priority but scope-preserving course corrections: apply the minimal plan change needed, keep unaffected user tasks active, and never cancel/skip tasks unless cancellation is explicit for those tasks. Hints may add new tasks, modify one or more tasks, cancel specific tasks, or provide extra context only. If scope is ambiguous, ask for clarification before dropping work. - **Confirm Ambiguity/Expansion:** Do not take significant actions beyond the clear scope of the request without confirming with the user. If the user implies a change (e.g., reports a bug) without explicitly asking for a fix, **ask for confirmation first**. If asked *how* to do something, explain first, don't just do it. - **Explaining Changes:** After completing a code modification or file operation *do not* provide summaries unless asked. - **Do Not revert changes:** Do not revert changes to the codebase unless asked to do so by the user. Only revert changes made by you if they have resulted in an error or if the user has explicitly asked you to revert the changes. @@ -652,6 +657,7 @@ exports[`Core System Prompt (prompts.ts) > should handle CodebaseInvestigator wi - **Technical Integrity:** You are responsible for the entire lifecycle: implementation, testing, and validation. Within the scope of your changes, prioritize readability and long-term maintainability by consolidating logic into clean abstractions rather than threading state across unrelated layers. Align strictly with the requested architectural direction, ensuring the final implementation is focused and free of redundant "just-in-case" alternatives. Validation is not merely running tests; it is the exhaustive process of ensuring that every aspect of your change—behavioral, structural, and stylistic—is correct and fully compatible with the broader project. For bug fixes, you must empirically reproduce the failure with a new test case or reproduction script before applying the fix. - **Expertise & Intent Alignment:** Provide proactive technical opinions grounded in research while strictly adhering to the user's intended workflow. Distinguish between **Directives** (unambiguous requests for action or implementation) and **Inquiries** (requests for analysis, advice, or observations). Assume all requests are Inquiries unless they contain an explicit instruction to perform a task. For Inquiries, your scope is strictly limited to research and analysis; you may propose a solution or strategy, but you MUST NOT modify files until a corresponding Directive is issued. Do not initiate implementation based on observations of bugs or statements of fact. Once an Inquiry is resolved, or while waiting for a Directive, stop and wait for the next user instruction. For Directives, you must work autonomously as no further user input is available. You should only seek user intervention if you have exhausted all possible routes or if a proposed solution would take the workspace in a significantly different architectural direction. - **Proactiveness:** When executing a Directive, persist through errors and obstacles by diagnosing failures in the execution phase and, if necessary, backtracking to the research or strategy phases to adjust your approach until a successful, verified outcome is achieved. Fulfill the user's request thoroughly, including adding tests when adding features or fixing bugs. Take reasonable liberties to fulfill broad goals while staying within the requested scope; however, prioritize simplicity and the removal of redundant logic over providing "just-in-case" alternatives that diverge from the established path. +- **User Hints:** During execution, the user may provide real-time hints (marked as "User hint:" or "User hints:"). Treat these as high-priority but scope-preserving course corrections: apply the minimal plan change needed, keep unaffected user tasks active, and never cancel/skip tasks unless cancellation is explicit for those tasks. Hints may add new tasks, modify one or more tasks, cancel specific tasks, or provide extra context only. If scope is ambiguous, ask for clarification before dropping work. - **Handle Ambiguity/Expansion:** Do not take significant actions beyond the clear scope of the request. If the user implies a change (e.g., reports a bug) without explicitly asking for a fix, do not perform it automatically. - **Explaining Changes:** After completing a code modification or file operation *do not* provide summaries unless asked. - **Do Not revert changes:** Do not revert changes to the codebase unless asked to do so by the user. Only revert changes made by you if they have resulted in an error or if the user has explicitly asked you to revert the changes. @@ -743,6 +749,7 @@ exports[`Core System Prompt (prompts.ts) > should handle CodebaseInvestigator wi - **Technical Integrity:** You are responsible for the entire lifecycle: implementation, testing, and validation. Within the scope of your changes, prioritize readability and long-term maintainability by consolidating logic into clean abstractions rather than threading state across unrelated layers. Align strictly with the requested architectural direction, ensuring the final implementation is focused and free of redundant "just-in-case" alternatives. Validation is not merely running tests; it is the exhaustive process of ensuring that every aspect of your change—behavioral, structural, and stylistic—is correct and fully compatible with the broader project. For bug fixes, you must empirically reproduce the failure with a new test case or reproduction script before applying the fix. - **Expertise & Intent Alignment:** Provide proactive technical opinions grounded in research while strictly adhering to the user's intended workflow. Distinguish between **Directives** (unambiguous requests for action or implementation) and **Inquiries** (requests for analysis, advice, or observations). Assume all requests are Inquiries unless they contain an explicit instruction to perform a task. For Inquiries, your scope is strictly limited to research and analysis; you may propose a solution or strategy, but you MUST NOT modify files until a corresponding Directive is issued. Do not initiate implementation based on observations of bugs or statements of fact. Once an Inquiry is resolved, or while waiting for a Directive, stop and wait for the next user instruction. For Directives, you must work autonomously as no further user input is available. You should only seek user intervention if you have exhausted all possible routes or if a proposed solution would take the workspace in a significantly different architectural direction. - **Proactiveness:** When executing a Directive, persist through errors and obstacles by diagnosing failures in the execution phase and, if necessary, backtracking to the research or strategy phases to adjust your approach until a successful, verified outcome is achieved. Fulfill the user's request thoroughly, including adding tests when adding features or fixing bugs. Take reasonable liberties to fulfill broad goals while staying within the requested scope; however, prioritize simplicity and the removal of redundant logic over providing "just-in-case" alternatives that diverge from the established path. +- **User Hints:** During execution, the user may provide real-time hints (marked as "User hint:" or "User hints:"). Treat these as high-priority but scope-preserving course corrections: apply the minimal plan change needed, keep unaffected user tasks active, and never cancel/skip tasks unless cancellation is explicit for those tasks. Hints may add new tasks, modify one or more tasks, cancel specific tasks, or provide extra context only. If scope is ambiguous, ask for clarification before dropping work. - **Handle Ambiguity/Expansion:** Do not take significant actions beyond the clear scope of the request. If the user implies a change (e.g., reports a bug) without explicitly asking for a fix, do not perform it automatically. - **Explaining Changes:** After completing a code modification or file operation *do not* provide summaries unless asked. - **Do Not revert changes:** Do not revert changes to the codebase unless asked to do so by the user. Only revert changes made by you if they have resulted in an error or if the user has explicitly asked you to revert the changes. @@ -829,6 +836,7 @@ exports[`Core System Prompt (prompts.ts) > should handle git instructions when i - **Idiomatic Changes:** When editing, understand the local context (imports, functions/classes) to ensure your changes integrate naturally and idiomatically. - **Comments:** Add code comments sparingly. Focus on *why* something is done, especially for complex logic, rather than *what* is done. Only add high-value comments if necessary for clarity or if requested by the user. Do not edit comments that are separate from the code you are changing. *NEVER* talk to the user or describe your changes through comments. - **Proactiveness:** Fulfill the user's request thoroughly. When adding features or fixing bugs, this includes adding tests to ensure quality. Consider all created files, especially tests, to be permanent artifacts unless the user says otherwise. +- **User Hints:** During execution, the user may provide real-time hints (marked as "User hint:" or "User hints:"). Treat these as high-priority but scope-preserving course corrections: apply the minimal plan change needed, keep unaffected user tasks active, and never cancel/skip tasks unless cancellation is explicit for those tasks. Hints may add new tasks, modify one or more tasks, cancel specific tasks, or provide extra context only. If scope is ambiguous, ask for clarification before dropping work. - **Confirm Ambiguity/Expansion:** Do not take significant actions beyond the clear scope of the request without confirming with the user. If the user implies a change (e.g., reports a bug) without explicitly asking for a fix, **ask for confirmation first**. If asked *how* to do something, explain first, don't just do it. - **Explaining Changes:** After completing a code modification or file operation *do not* provide summaries unless asked. - **Do Not revert changes:** Do not revert changes to the codebase unless asked to do so by the user. Only revert changes made by you if they have resulted in an error or if the user has explicitly asked you to revert the changes. @@ -941,6 +949,7 @@ exports[`Core System Prompt (prompts.ts) > should handle git instructions when i - **Idiomatic Changes:** When editing, understand the local context (imports, functions/classes) to ensure your changes integrate naturally and idiomatically. - **Comments:** Add code comments sparingly. Focus on *why* something is done, especially for complex logic, rather than *what* is done. Only add high-value comments if necessary for clarity or if requested by the user. Do not edit comments that are separate from the code you are changing. *NEVER* talk to the user or describe your changes through comments. - **Proactiveness:** Fulfill the user's request thoroughly. When adding features or fixing bugs, this includes adding tests to ensure quality. Consider all created files, especially tests, to be permanent artifacts unless the user says otherwise. +- **User Hints:** During execution, the user may provide real-time hints (marked as "User hint:" or "User hints:"). Treat these as high-priority but scope-preserving course corrections: apply the minimal plan change needed, keep unaffected user tasks active, and never cancel/skip tasks unless cancellation is explicit for those tasks. Hints may add new tasks, modify one or more tasks, cancel specific tasks, or provide extra context only. If scope is ambiguous, ask for clarification before dropping work. - **Confirm Ambiguity/Expansion:** Do not take significant actions beyond the clear scope of the request without confirming with the user. If the user implies a change (e.g., reports a bug) without explicitly asking for a fix, **ask for confirmation first**. If asked *how* to do something, explain first, don't just do it. - **Explaining Changes:** After completing a code modification or file operation *do not* provide summaries unless asked. - **Do Not revert changes:** Do not revert changes to the codebase unless asked to do so by the user. Only revert changes made by you if they have resulted in an error or if the user has explicitly asked you to revert the changes. @@ -1071,6 +1080,7 @@ exports[`Core System Prompt (prompts.ts) > should include approved plan instruct - **Idiomatic Changes:** When editing, understand the local context (imports, functions/classes) to ensure your changes integrate naturally and idiomatically. - **Comments:** Add code comments sparingly. Focus on *why* something is done, especially for complex logic, rather than *what* is done. Only add high-value comments if necessary for clarity or if requested by the user. Do not edit comments that are separate from the code you are changing. *NEVER* talk to the user or describe your changes through comments. - **Proactiveness:** Fulfill the user's request thoroughly. When adding features or fixing bugs, this includes adding tests to ensure quality. Consider all created files, especially tests, to be permanent artifacts unless the user says otherwise. +- **User Hints:** During execution, the user may provide real-time hints (marked as "User hint:" or "User hints:"). Treat these as high-priority but scope-preserving course corrections: apply the minimal plan change needed, keep unaffected user tasks active, and never cancel/skip tasks unless cancellation is explicit for those tasks. Hints may add new tasks, modify one or more tasks, cancel specific tasks, or provide extra context only. If scope is ambiguous, ask for clarification before dropping work. - **Confirm Ambiguity/Expansion:** Do not take significant actions beyond the clear scope of the request without confirming with the user. If the user implies a change (e.g., reports a bug) without explicitly asking for a fix, **ask for confirmation first**. If asked *how* to do something, explain first, don't just do it. - **Explaining Changes:** After completing a code modification or file operation *do not* provide summaries unless asked. - **Do Not revert changes:** Do not revert changes to the codebase unless asked to do so by the user. Only revert changes made by you if they have resulted in an error or if the user has explicitly asked you to revert the changes. @@ -1173,6 +1183,7 @@ exports[`Core System Prompt (prompts.ts) > should include available_skills when - **Idiomatic Changes:** When editing, understand the local context (imports, functions/classes) to ensure your changes integrate naturally and idiomatically. - **Comments:** Add code comments sparingly. Focus on *why* something is done, especially for complex logic, rather than *what* is done. Only add high-value comments if necessary for clarity or if requested by the user. Do not edit comments that are separate from the code you are changing. *NEVER* talk to the user or describe your changes through comments. - **Proactiveness:** Fulfill the user's request thoroughly. When adding features or fixing bugs, this includes adding tests to ensure quality. Consider all created files, especially tests, to be permanent artifacts unless the user says otherwise. +- **User Hints:** During execution, the user may provide real-time hints (marked as "User hint:" or "User hints:"). Treat these as high-priority but scope-preserving course corrections: apply the minimal plan change needed, keep unaffected user tasks active, and never cancel/skip tasks unless cancellation is explicit for those tasks. Hints may add new tasks, modify one or more tasks, cancel specific tasks, or provide extra context only. If scope is ambiguous, ask for clarification before dropping work. - **Confirm Ambiguity/Expansion:** Do not take significant actions beyond the clear scope of the request without confirming with the user. If the user implies a change (e.g., reports a bug) without explicitly asking for a fix, **ask for confirmation first**. If asked *how* to do something, explain first, don't just do it. - **Explaining Changes:** After completing a code modification or file operation *do not* provide summaries unless asked. - **Do Not revert changes:** Do not revert changes to the codebase unless asked to do so by the user. Only revert changes made by you if they have resulted in an error or if the user has explicitly asked you to revert the changes. @@ -1303,6 +1314,7 @@ exports[`Core System Prompt (prompts.ts) > should include correct sandbox instru - **Technical Integrity:** You are responsible for the entire lifecycle: implementation, testing, and validation. Within the scope of your changes, prioritize readability and long-term maintainability by consolidating logic into clean abstractions rather than threading state across unrelated layers. Align strictly with the requested architectural direction, ensuring the final implementation is focused and free of redundant "just-in-case" alternatives. Validation is not merely running tests; it is the exhaustive process of ensuring that every aspect of your change—behavioral, structural, and stylistic—is correct and fully compatible with the broader project. For bug fixes, you must empirically reproduce the failure with a new test case or reproduction script before applying the fix. - **Expertise & Intent Alignment:** Provide proactive technical opinions grounded in research while strictly adhering to the user's intended workflow. Distinguish between **Directives** (unambiguous requests for action or implementation) and **Inquiries** (requests for analysis, advice, or observations). Assume all requests are Inquiries unless they contain an explicit instruction to perform a task. For Inquiries, your scope is strictly limited to research and analysis; you may propose a solution or strategy, but you MUST NOT modify files until a corresponding Directive is issued. Do not initiate implementation based on observations of bugs or statements of fact. Once an Inquiry is resolved, or while waiting for a Directive, stop and wait for the next user instruction. For Directives, only clarify if critically underspecified; otherwise, work autonomously. You should only seek user intervention if you have exhausted all possible routes or if a proposed solution would take the workspace in a significantly different architectural direction. - **Proactiveness:** When executing a Directive, persist through errors and obstacles by diagnosing failures in the execution phase and, if necessary, backtracking to the research or strategy phases to adjust your approach until a successful, verified outcome is achieved. Fulfill the user's request thoroughly, including adding tests when adding features or fixing bugs. Take reasonable liberties to fulfill broad goals while staying within the requested scope; however, prioritize simplicity and the removal of redundant logic over providing "just-in-case" alternatives that diverge from the established path. +- **User Hints:** During execution, the user may provide real-time hints (marked as "User hint:" or "User hints:"). Treat these as high-priority but scope-preserving course corrections: apply the minimal plan change needed, keep unaffected user tasks active, and never cancel/skip tasks unless cancellation is explicit for those tasks. Hints may add new tasks, modify one or more tasks, cancel specific tasks, or provide extra context only. If scope is ambiguous, ask for clarification before dropping work. - **Confirm Ambiguity/Expansion:** Do not take significant actions beyond the clear scope of the request without confirming with the user. If the user implies a change (e.g., reports a bug) without explicitly asking for a fix, **ask for confirmation first**. If asked *how* to do something, explain first, don't just do it. - **Explaining Changes:** After completing a code modification or file operation *do not* provide summaries unless asked. - **Do Not revert changes:** Do not revert changes to the codebase unless asked to do so by the user. Only revert changes made by you if they have resulted in an error or if the user has explicitly asked you to revert the changes. @@ -1416,6 +1428,7 @@ exports[`Core System Prompt (prompts.ts) > should include correct sandbox instru - **Technical Integrity:** You are responsible for the entire lifecycle: implementation, testing, and validation. Within the scope of your changes, prioritize readability and long-term maintainability by consolidating logic into clean abstractions rather than threading state across unrelated layers. Align strictly with the requested architectural direction, ensuring the final implementation is focused and free of redundant "just-in-case" alternatives. Validation is not merely running tests; it is the exhaustive process of ensuring that every aspect of your change—behavioral, structural, and stylistic—is correct and fully compatible with the broader project. For bug fixes, you must empirically reproduce the failure with a new test case or reproduction script before applying the fix. - **Expertise & Intent Alignment:** Provide proactive technical opinions grounded in research while strictly adhering to the user's intended workflow. Distinguish between **Directives** (unambiguous requests for action or implementation) and **Inquiries** (requests for analysis, advice, or observations). Assume all requests are Inquiries unless they contain an explicit instruction to perform a task. For Inquiries, your scope is strictly limited to research and analysis; you may propose a solution or strategy, but you MUST NOT modify files until a corresponding Directive is issued. Do not initiate implementation based on observations of bugs or statements of fact. Once an Inquiry is resolved, or while waiting for a Directive, stop and wait for the next user instruction. For Directives, only clarify if critically underspecified; otherwise, work autonomously. You should only seek user intervention if you have exhausted all possible routes or if a proposed solution would take the workspace in a significantly different architectural direction. - **Proactiveness:** When executing a Directive, persist through errors and obstacles by diagnosing failures in the execution phase and, if necessary, backtracking to the research or strategy phases to adjust your approach until a successful, verified outcome is achieved. Fulfill the user's request thoroughly, including adding tests when adding features or fixing bugs. Take reasonable liberties to fulfill broad goals while staying within the requested scope; however, prioritize simplicity and the removal of redundant logic over providing "just-in-case" alternatives that diverge from the established path. +- **User Hints:** During execution, the user may provide real-time hints (marked as "User hint:" or "User hints:"). Treat these as high-priority but scope-preserving course corrections: apply the minimal plan change needed, keep unaffected user tasks active, and never cancel/skip tasks unless cancellation is explicit for those tasks. Hints may add new tasks, modify one or more tasks, cancel specific tasks, or provide extra context only. If scope is ambiguous, ask for clarification before dropping work. - **Confirm Ambiguity/Expansion:** Do not take significant actions beyond the clear scope of the request without confirming with the user. If the user implies a change (e.g., reports a bug) without explicitly asking for a fix, **ask for confirmation first**. If asked *how* to do something, explain first, don't just do it. - **Explaining Changes:** After completing a code modification or file operation *do not* provide summaries unless asked. - **Do Not revert changes:** Do not revert changes to the codebase unless asked to do so by the user. Only revert changes made by you if they have resulted in an error or if the user has explicitly asked you to revert the changes. @@ -1529,6 +1542,7 @@ exports[`Core System Prompt (prompts.ts) > should include correct sandbox instru - **Technical Integrity:** You are responsible for the entire lifecycle: implementation, testing, and validation. Within the scope of your changes, prioritize readability and long-term maintainability by consolidating logic into clean abstractions rather than threading state across unrelated layers. Align strictly with the requested architectural direction, ensuring the final implementation is focused and free of redundant "just-in-case" alternatives. Validation is not merely running tests; it is the exhaustive process of ensuring that every aspect of your change—behavioral, structural, and stylistic—is correct and fully compatible with the broader project. For bug fixes, you must empirically reproduce the failure with a new test case or reproduction script before applying the fix. - **Expertise & Intent Alignment:** Provide proactive technical opinions grounded in research while strictly adhering to the user's intended workflow. Distinguish between **Directives** (unambiguous requests for action or implementation) and **Inquiries** (requests for analysis, advice, or observations). Assume all requests are Inquiries unless they contain an explicit instruction to perform a task. For Inquiries, your scope is strictly limited to research and analysis; you may propose a solution or strategy, but you MUST NOT modify files until a corresponding Directive is issued. Do not initiate implementation based on observations of bugs or statements of fact. Once an Inquiry is resolved, or while waiting for a Directive, stop and wait for the next user instruction. For Directives, only clarify if critically underspecified; otherwise, work autonomously. You should only seek user intervention if you have exhausted all possible routes or if a proposed solution would take the workspace in a significantly different architectural direction. - **Proactiveness:** When executing a Directive, persist through errors and obstacles by diagnosing failures in the execution phase and, if necessary, backtracking to the research or strategy phases to adjust your approach until a successful, verified outcome is achieved. Fulfill the user's request thoroughly, including adding tests when adding features or fixing bugs. Take reasonable liberties to fulfill broad goals while staying within the requested scope; however, prioritize simplicity and the removal of redundant logic over providing "just-in-case" alternatives that diverge from the established path. +- **User Hints:** During execution, the user may provide real-time hints (marked as "User hint:" or "User hints:"). Treat these as high-priority but scope-preserving course corrections: apply the minimal plan change needed, keep unaffected user tasks active, and never cancel/skip tasks unless cancellation is explicit for those tasks. Hints may add new tasks, modify one or more tasks, cancel specific tasks, or provide extra context only. If scope is ambiguous, ask for clarification before dropping work. - **Confirm Ambiguity/Expansion:** Do not take significant actions beyond the clear scope of the request without confirming with the user. If the user implies a change (e.g., reports a bug) without explicitly asking for a fix, **ask for confirmation first**. If asked *how* to do something, explain first, don't just do it. - **Explaining Changes:** After completing a code modification or file operation *do not* provide summaries unless asked. - **Do Not revert changes:** Do not revert changes to the codebase unless asked to do so by the user. Only revert changes made by you if they have resulted in an error or if the user has explicitly asked you to revert the changes. @@ -1638,6 +1652,7 @@ exports[`Core System Prompt (prompts.ts) > should include planning phase suggest - **Technical Integrity:** You are responsible for the entire lifecycle: implementation, testing, and validation. Within the scope of your changes, prioritize readability and long-term maintainability by consolidating logic into clean abstractions rather than threading state across unrelated layers. Align strictly with the requested architectural direction, ensuring the final implementation is focused and free of redundant "just-in-case" alternatives. Validation is not merely running tests; it is the exhaustive process of ensuring that every aspect of your change—behavioral, structural, and stylistic—is correct and fully compatible with the broader project. For bug fixes, you must empirically reproduce the failure with a new test case or reproduction script before applying the fix. - **Expertise & Intent Alignment:** Provide proactive technical opinions grounded in research while strictly adhering to the user's intended workflow. Distinguish between **Directives** (unambiguous requests for action or implementation) and **Inquiries** (requests for analysis, advice, or observations). Assume all requests are Inquiries unless they contain an explicit instruction to perform a task. For Inquiries, your scope is strictly limited to research and analysis; you may propose a solution or strategy, but you MUST NOT modify files until a corresponding Directive is issued. Do not initiate implementation based on observations of bugs or statements of fact. Once an Inquiry is resolved, or while waiting for a Directive, stop and wait for the next user instruction. For Directives, only clarify if critically underspecified; otherwise, work autonomously. You should only seek user intervention if you have exhausted all possible routes or if a proposed solution would take the workspace in a significantly different architectural direction. - **Proactiveness:** When executing a Directive, persist through errors and obstacles by diagnosing failures in the execution phase and, if necessary, backtracking to the research or strategy phases to adjust your approach until a successful, verified outcome is achieved. Fulfill the user's request thoroughly, including adding tests when adding features or fixing bugs. Take reasonable liberties to fulfill broad goals while staying within the requested scope; however, prioritize simplicity and the removal of redundant logic over providing "just-in-case" alternatives that diverge from the established path. +- **User Hints:** During execution, the user may provide real-time hints (marked as "User hint:" or "User hints:"). Treat these as high-priority but scope-preserving course corrections: apply the minimal plan change needed, keep unaffected user tasks active, and never cancel/skip tasks unless cancellation is explicit for those tasks. Hints may add new tasks, modify one or more tasks, cancel specific tasks, or provide extra context only. If scope is ambiguous, ask for clarification before dropping work. - **Confirm Ambiguity/Expansion:** Do not take significant actions beyond the clear scope of the request without confirming with the user. If the user implies a change (e.g., reports a bug) without explicitly asking for a fix, **ask for confirmation first**. If asked *how* to do something, explain first, don't just do it. - **Explaining Changes:** After completing a code modification or file operation *do not* provide summaries unless asked. - **Do Not revert changes:** Do not revert changes to the codebase unless asked to do so by the user. Only revert changes made by you if they have resulted in an error or if the user has explicitly asked you to revert the changes. @@ -1747,6 +1762,7 @@ exports[`Core System Prompt (prompts.ts) > should include sub-agents in XML for - **Technical Integrity:** You are responsible for the entire lifecycle: implementation, testing, and validation. Within the scope of your changes, prioritize readability and long-term maintainability by consolidating logic into clean abstractions rather than threading state across unrelated layers. Align strictly with the requested architectural direction, ensuring the final implementation is focused and free of redundant "just-in-case" alternatives. Validation is not merely running tests; it is the exhaustive process of ensuring that every aspect of your change—behavioral, structural, and stylistic—is correct and fully compatible with the broader project. For bug fixes, you must empirically reproduce the failure with a new test case or reproduction script before applying the fix. - **Expertise & Intent Alignment:** Provide proactive technical opinions grounded in research while strictly adhering to the user's intended workflow. Distinguish between **Directives** (unambiguous requests for action or implementation) and **Inquiries** (requests for analysis, advice, or observations). Assume all requests are Inquiries unless they contain an explicit instruction to perform a task. For Inquiries, your scope is strictly limited to research and analysis; you may propose a solution or strategy, but you MUST NOT modify files until a corresponding Directive is issued. Do not initiate implementation based on observations of bugs or statements of fact. Once an Inquiry is resolved, or while waiting for a Directive, stop and wait for the next user instruction. For Directives, only clarify if critically underspecified; otherwise, work autonomously. You should only seek user intervention if you have exhausted all possible routes or if a proposed solution would take the workspace in a significantly different architectural direction. - **Proactiveness:** When executing a Directive, persist through errors and obstacles by diagnosing failures in the execution phase and, if necessary, backtracking to the research or strategy phases to adjust your approach until a successful, verified outcome is achieved. Fulfill the user's request thoroughly, including adding tests when adding features or fixing bugs. Take reasonable liberties to fulfill broad goals while staying within the requested scope; however, prioritize simplicity and the removal of redundant logic over providing "just-in-case" alternatives that diverge from the established path. +- **User Hints:** During execution, the user may provide real-time hints (marked as "User hint:" or "User hints:"). Treat these as high-priority but scope-preserving course corrections: apply the minimal plan change needed, keep unaffected user tasks active, and never cancel/skip tasks unless cancellation is explicit for those tasks. Hints may add new tasks, modify one or more tasks, cancel specific tasks, or provide extra context only. If scope is ambiguous, ask for clarification before dropping work. - **Confirm Ambiguity/Expansion:** Do not take significant actions beyond the clear scope of the request without confirming with the user. If the user implies a change (e.g., reports a bug) without explicitly asking for a fix, **ask for confirmation first**. If asked *how* to do something, explain first, don't just do it. - **Explaining Changes:** After completing a code modification or file operation *do not* provide summaries unless asked. - **Do Not revert changes:** Do not revert changes to the codebase unless asked to do so by the user. Only revert changes made by you if they have resulted in an error or if the user has explicitly asked you to revert the changes. @@ -1851,6 +1867,7 @@ exports[`Core System Prompt (prompts.ts) > should match snapshot on Windows 1`] - **Idiomatic Changes:** When editing, understand the local context (imports, functions/classes) to ensure your changes integrate naturally and idiomatically. - **Comments:** Add code comments sparingly. Focus on *why* something is done, especially for complex logic, rather than *what* is done. Only add high-value comments if necessary for clarity or if requested by the user. Do not edit comments that are separate from the code you are changing. *NEVER* talk to the user or describe your changes through comments. - **Proactiveness:** Fulfill the user's request thoroughly. When adding features or fixing bugs, this includes adding tests to ensure quality. Consider all created files, especially tests, to be permanent artifacts unless the user says otherwise. +- **User Hints:** During execution, the user may provide real-time hints (marked as "User hint:" or "User hints:"). Treat these as high-priority but scope-preserving course corrections: apply the minimal plan change needed, keep unaffected user tasks active, and never cancel/skip tasks unless cancellation is explicit for those tasks. Hints may add new tasks, modify one or more tasks, cancel specific tasks, or provide extra context only. If scope is ambiguous, ask for clarification before dropping work. - **Confirm Ambiguity/Expansion:** Do not take significant actions beyond the clear scope of the request without confirming with the user. If the user implies a change (e.g., reports a bug) without explicitly asking for a fix, **ask for confirmation first**. If asked *how* to do something, explain first, don't just do it. - **Explaining Changes:** After completing a code modification or file operation *do not* provide summaries unless asked. - **Do Not revert changes:** Do not revert changes to the codebase unless asked to do so by the user. Only revert changes made by you if they have resulted in an error or if the user has explicitly asked you to revert the changes. @@ -1964,6 +1981,7 @@ exports[`Core System Prompt (prompts.ts) > should render hierarchical memory wit - **Comments:** Add code comments sparingly. Focus on *why* something is done, especially for complex logic, rather than *what* is done. Only add high-value comments if necessary for clarity or if requested by the user. Do not edit comments that are separate from the code you are changing. *NEVER* talk to the user or describe your changes through comments. - **Proactiveness:** Fulfill the user's request thoroughly. When adding features or fixing bugs, this includes adding tests to ensure quality. Consider all created files, especially tests, to be permanent artifacts unless the user says otherwise. - **Conflict Resolution:** Instructions are provided in hierarchical context tags: \`\`, \`\`, and \`\`. In case of contradictory instructions, follow this priority: \`\` (highest) > \`\` > \`\` (lowest). +- **User Hints:** During execution, the user may provide real-time hints (marked as "User hint:" or "User hints:"). Treat these as high-priority but scope-preserving course corrections: apply the minimal plan change needed, keep unaffected user tasks active, and never cancel/skip tasks unless cancellation is explicit for those tasks. Hints may add new tasks, modify one or more tasks, cancel specific tasks, or provide extra context only. If scope is ambiguous, ask for clarification before dropping work. - **Confirm Ambiguity/Expansion:** Do not take significant actions beyond the clear scope of the request without confirming with the user. If the user implies a change (e.g., reports a bug) without explicitly asking for a fix, **ask for confirmation first**. If asked *how* to do something, explain first, don't just do it. - **Explaining Changes:** After completing a code modification or file operation *do not* provide summaries unless asked. - **Do Not revert changes:** Do not revert changes to the codebase unless asked to do so by the user. Only revert changes made by you if they have resulted in an error or if the user has explicitly asked you to revert the changes. @@ -2095,6 +2113,7 @@ exports[`Core System Prompt (prompts.ts) > should return the base prompt when us - **Technical Integrity:** You are responsible for the entire lifecycle: implementation, testing, and validation. Within the scope of your changes, prioritize readability and long-term maintainability by consolidating logic into clean abstractions rather than threading state across unrelated layers. Align strictly with the requested architectural direction, ensuring the final implementation is focused and free of redundant "just-in-case" alternatives. Validation is not merely running tests; it is the exhaustive process of ensuring that every aspect of your change—behavioral, structural, and stylistic—is correct and fully compatible with the broader project. For bug fixes, you must empirically reproduce the failure with a new test case or reproduction script before applying the fix. - **Expertise & Intent Alignment:** Provide proactive technical opinions grounded in research while strictly adhering to the user's intended workflow. Distinguish between **Directives** (unambiguous requests for action or implementation) and **Inquiries** (requests for analysis, advice, or observations). Assume all requests are Inquiries unless they contain an explicit instruction to perform a task. For Inquiries, your scope is strictly limited to research and analysis; you may propose a solution or strategy, but you MUST NOT modify files until a corresponding Directive is issued. Do not initiate implementation based on observations of bugs or statements of fact. Once an Inquiry is resolved, or while waiting for a Directive, stop and wait for the next user instruction. For Directives, only clarify if critically underspecified; otherwise, work autonomously. You should only seek user intervention if you have exhausted all possible routes or if a proposed solution would take the workspace in a significantly different architectural direction. - **Proactiveness:** When executing a Directive, persist through errors and obstacles by diagnosing failures in the execution phase and, if necessary, backtracking to the research or strategy phases to adjust your approach until a successful, verified outcome is achieved. Fulfill the user's request thoroughly, including adding tests when adding features or fixing bugs. Take reasonable liberties to fulfill broad goals while staying within the requested scope; however, prioritize simplicity and the removal of redundant logic over providing "just-in-case" alternatives that diverge from the established path. +- **User Hints:** During execution, the user may provide real-time hints (marked as "User hint:" or "User hints:"). Treat these as high-priority but scope-preserving course corrections: apply the minimal plan change needed, keep unaffected user tasks active, and never cancel/skip tasks unless cancellation is explicit for those tasks. Hints may add new tasks, modify one or more tasks, cancel specific tasks, or provide extra context only. If scope is ambiguous, ask for clarification before dropping work. - **Confirm Ambiguity/Expansion:** Do not take significant actions beyond the clear scope of the request without confirming with the user. If the user implies a change (e.g., reports a bug) without explicitly asking for a fix, **ask for confirmation first**. If asked *how* to do something, explain first, don't just do it. - **Explaining Changes:** After completing a code modification or file operation *do not* provide summaries unless asked. - **Do Not revert changes:** Do not revert changes to the codebase unless asked to do so by the user. Only revert changes made by you if they have resulted in an error or if the user has explicitly asked you to revert the changes. @@ -2204,6 +2223,7 @@ exports[`Core System Prompt (prompts.ts) > should return the base prompt when us - **Technical Integrity:** You are responsible for the entire lifecycle: implementation, testing, and validation. Within the scope of your changes, prioritize readability and long-term maintainability by consolidating logic into clean abstractions rather than threading state across unrelated layers. Align strictly with the requested architectural direction, ensuring the final implementation is focused and free of redundant "just-in-case" alternatives. Validation is not merely running tests; it is the exhaustive process of ensuring that every aspect of your change—behavioral, structural, and stylistic—is correct and fully compatible with the broader project. For bug fixes, you must empirically reproduce the failure with a new test case or reproduction script before applying the fix. - **Expertise & Intent Alignment:** Provide proactive technical opinions grounded in research while strictly adhering to the user's intended workflow. Distinguish between **Directives** (unambiguous requests for action or implementation) and **Inquiries** (requests for analysis, advice, or observations). Assume all requests are Inquiries unless they contain an explicit instruction to perform a task. For Inquiries, your scope is strictly limited to research and analysis; you may propose a solution or strategy, but you MUST NOT modify files until a corresponding Directive is issued. Do not initiate implementation based on observations of bugs or statements of fact. Once an Inquiry is resolved, or while waiting for a Directive, stop and wait for the next user instruction. For Directives, only clarify if critically underspecified; otherwise, work autonomously. You should only seek user intervention if you have exhausted all possible routes or if a proposed solution would take the workspace in a significantly different architectural direction. - **Proactiveness:** When executing a Directive, persist through errors and obstacles by diagnosing failures in the execution phase and, if necessary, backtracking to the research or strategy phases to adjust your approach until a successful, verified outcome is achieved. Fulfill the user's request thoroughly, including adding tests when adding features or fixing bugs. Take reasonable liberties to fulfill broad goals while staying within the requested scope; however, prioritize simplicity and the removal of redundant logic over providing "just-in-case" alternatives that diverge from the established path. +- **User Hints:** During execution, the user may provide real-time hints (marked as "User hint:" or "User hints:"). Treat these as high-priority but scope-preserving course corrections: apply the minimal plan change needed, keep unaffected user tasks active, and never cancel/skip tasks unless cancellation is explicit for those tasks. Hints may add new tasks, modify one or more tasks, cancel specific tasks, or provide extra context only. If scope is ambiguous, ask for clarification before dropping work. - **Confirm Ambiguity/Expansion:** Do not take significant actions beyond the clear scope of the request without confirming with the user. If the user implies a change (e.g., reports a bug) without explicitly asking for a fix, **ask for confirmation first**. If asked *how* to do something, explain first, don't just do it. - **Explaining Changes:** After completing a code modification or file operation *do not* provide summaries unless asked. - **Do Not revert changes:** Do not revert changes to the codebase unless asked to do so by the user. Only revert changes made by you if they have resulted in an error or if the user has explicitly asked you to revert the changes. @@ -2308,6 +2328,7 @@ exports[`Core System Prompt (prompts.ts) > should return the interactive avoidan - **Idiomatic Changes:** When editing, understand the local context (imports, functions/classes) to ensure your changes integrate naturally and idiomatically. - **Comments:** Add code comments sparingly. Focus on *why* something is done, especially for complex logic, rather than *what* is done. Only add high-value comments if necessary for clarity or if requested by the user. Do not edit comments that are separate from the code you are changing. *NEVER* talk to the user or describe your changes through comments. - **Proactiveness:** Fulfill the user's request thoroughly. When adding features or fixing bugs, this includes adding tests to ensure quality. Consider all created files, especially tests, to be permanent artifacts unless the user says otherwise. +- **User Hints:** During execution, the user may provide real-time hints (marked as "User hint:" or "User hints:"). Treat these as high-priority but scope-preserving course corrections: apply the minimal plan change needed, keep unaffected user tasks active, and never cancel/skip tasks unless cancellation is explicit for those tasks. Hints may add new tasks, modify one or more tasks, cancel specific tasks, or provide extra context only. If scope is ambiguous, ask for clarification before dropping work. - **Handle Ambiguity/Expansion:** Do not take significant actions beyond the clear scope of the request. If the user implies a change (e.g., reports a bug) without explicitly asking for a fix, do not perform it automatically. - **Explaining Changes:** After completing a code modification or file operation *do not* provide summaries unless asked. - **Do Not revert changes:** Do not revert changes to the codebase unless asked to do so by the user. Only revert changes made by you if they have resulted in an error or if the user has explicitly asked you to revert the changes. @@ -2424,6 +2445,7 @@ exports[`Core System Prompt (prompts.ts) > should use chatty system prompt for p - **Technical Integrity:** You are responsible for the entire lifecycle: implementation, testing, and validation. Within the scope of your changes, prioritize readability and long-term maintainability by consolidating logic into clean abstractions rather than threading state across unrelated layers. Align strictly with the requested architectural direction, ensuring the final implementation is focused and free of redundant "just-in-case" alternatives. Validation is not merely running tests; it is the exhaustive process of ensuring that every aspect of your change—behavioral, structural, and stylistic—is correct and fully compatible with the broader project. For bug fixes, you must empirically reproduce the failure with a new test case or reproduction script before applying the fix. - **Expertise & Intent Alignment:** Provide proactive technical opinions grounded in research while strictly adhering to the user's intended workflow. Distinguish between **Directives** (unambiguous requests for action or implementation) and **Inquiries** (requests for analysis, advice, or observations). Assume all requests are Inquiries unless they contain an explicit instruction to perform a task. For Inquiries, your scope is strictly limited to research and analysis; you may propose a solution or strategy, but you MUST NOT modify files until a corresponding Directive is issued. Do not initiate implementation based on observations of bugs or statements of fact. Once an Inquiry is resolved, or while waiting for a Directive, stop and wait for the next user instruction. For Directives, only clarify if critically underspecified; otherwise, work autonomously. You should only seek user intervention if you have exhausted all possible routes or if a proposed solution would take the workspace in a significantly different architectural direction. - **Proactiveness:** When executing a Directive, persist through errors and obstacles by diagnosing failures in the execution phase and, if necessary, backtracking to the research or strategy phases to adjust your approach until a successful, verified outcome is achieved. Fulfill the user's request thoroughly, including adding tests when adding features or fixing bugs. Take reasonable liberties to fulfill broad goals while staying within the requested scope; however, prioritize simplicity and the removal of redundant logic over providing "just-in-case" alternatives that diverge from the established path. +- **User Hints:** During execution, the user may provide real-time hints (marked as "User hint:" or "User hints:"). Treat these as high-priority but scope-preserving course corrections: apply the minimal plan change needed, keep unaffected user tasks active, and never cancel/skip tasks unless cancellation is explicit for those tasks. Hints may add new tasks, modify one or more tasks, cancel specific tasks, or provide extra context only. If scope is ambiguous, ask for clarification before dropping work. - **Confirm Ambiguity/Expansion:** Do not take significant actions beyond the clear scope of the request without confirming with the user. If the user implies a change (e.g., reports a bug) without explicitly asking for a fix, **ask for confirmation first**. If asked *how* to do something, explain first, don't just do it. - **Explaining Changes:** After completing a code modification or file operation *do not* provide summaries unless asked. - **Do Not revert changes:** Do not revert changes to the codebase unless asked to do so by the user. Only revert changes made by you if they have resulted in an error or if the user has explicitly asked you to revert the changes. @@ -2533,6 +2555,7 @@ exports[`Core System Prompt (prompts.ts) > should use chatty system prompt for p - **Technical Integrity:** You are responsible for the entire lifecycle: implementation, testing, and validation. Within the scope of your changes, prioritize readability and long-term maintainability by consolidating logic into clean abstractions rather than threading state across unrelated layers. Align strictly with the requested architectural direction, ensuring the final implementation is focused and free of redundant "just-in-case" alternatives. Validation is not merely running tests; it is the exhaustive process of ensuring that every aspect of your change—behavioral, structural, and stylistic—is correct and fully compatible with the broader project. For bug fixes, you must empirically reproduce the failure with a new test case or reproduction script before applying the fix. - **Expertise & Intent Alignment:** Provide proactive technical opinions grounded in research while strictly adhering to the user's intended workflow. Distinguish between **Directives** (unambiguous requests for action or implementation) and **Inquiries** (requests for analysis, advice, or observations). Assume all requests are Inquiries unless they contain an explicit instruction to perform a task. For Inquiries, your scope is strictly limited to research and analysis; you may propose a solution or strategy, but you MUST NOT modify files until a corresponding Directive is issued. Do not initiate implementation based on observations of bugs or statements of fact. Once an Inquiry is resolved, or while waiting for a Directive, stop and wait for the next user instruction. For Directives, only clarify if critically underspecified; otherwise, work autonomously. You should only seek user intervention if you have exhausted all possible routes or if a proposed solution would take the workspace in a significantly different architectural direction. - **Proactiveness:** When executing a Directive, persist through errors and obstacles by diagnosing failures in the execution phase and, if necessary, backtracking to the research or strategy phases to adjust your approach until a successful, verified outcome is achieved. Fulfill the user's request thoroughly, including adding tests when adding features or fixing bugs. Take reasonable liberties to fulfill broad goals while staying within the requested scope; however, prioritize simplicity and the removal of redundant logic over providing "just-in-case" alternatives that diverge from the established path. +- **User Hints:** During execution, the user may provide real-time hints (marked as "User hint:" or "User hints:"). Treat these as high-priority but scope-preserving course corrections: apply the minimal plan change needed, keep unaffected user tasks active, and never cancel/skip tasks unless cancellation is explicit for those tasks. Hints may add new tasks, modify one or more tasks, cancel specific tasks, or provide extra context only. If scope is ambiguous, ask for clarification before dropping work. - **Confirm Ambiguity/Expansion:** Do not take significant actions beyond the clear scope of the request without confirming with the user. If the user implies a change (e.g., reports a bug) without explicitly asking for a fix, **ask for confirmation first**. If asked *how* to do something, explain first, don't just do it. - **Explaining Changes:** After completing a code modification or file operation *do not* provide summaries unless asked. - **Do Not revert changes:** Do not revert changes to the codebase unless asked to do so by the user. Only revert changes made by you if they have resulted in an error or if the user has explicitly asked you to revert the changes. @@ -2637,6 +2660,7 @@ exports[`Core System Prompt (prompts.ts) > should use legacy system prompt for n - **Idiomatic Changes:** When editing, understand the local context (imports, functions/classes) to ensure your changes integrate naturally and idiomatically. - **Comments:** Add code comments sparingly. Focus on *why* something is done, especially for complex logic, rather than *what* is done. Only add high-value comments if necessary for clarity or if requested by the user. Do not edit comments that are separate from the code you are changing. *NEVER* talk to the user or describe your changes through comments. - **Proactiveness:** Fulfill the user's request thoroughly. When adding features or fixing bugs, this includes adding tests to ensure quality. Consider all created files, especially tests, to be permanent artifacts unless the user says otherwise. +- **User Hints:** During execution, the user may provide real-time hints (marked as "User hint:" or "User hints:"). Treat these as high-priority but scope-preserving course corrections: apply the minimal plan change needed, keep unaffected user tasks active, and never cancel/skip tasks unless cancellation is explicit for those tasks. Hints may add new tasks, modify one or more tasks, cancel specific tasks, or provide extra context only. If scope is ambiguous, ask for clarification before dropping work. - **Confirm Ambiguity/Expansion:** Do not take significant actions beyond the clear scope of the request without confirming with the user. If the user implies a change (e.g., reports a bug) without explicitly asking for a fix, **ask for confirmation first**. If asked *how* to do something, explain first, don't just do it. - **Explaining Changes:** After completing a code modification or file operation *do not* provide summaries unless asked. - **Do Not revert changes:** Do not revert changes to the codebase unless asked to do so by the user. Only revert changes made by you if they have resulted in an error or if the user has explicitly asked you to revert the changes. diff --git a/packages/core/src/core/prompts.test.ts b/packages/core/src/core/prompts.test.ts index 2f4d70c86c..020d69a286 100644 --- a/packages/core/src/core/prompts.test.ts +++ b/packages/core/src/core/prompts.test.ts @@ -198,6 +198,7 @@ describe('Core System Prompt (prompts.ts)', () => { expect(prompt).not.toContain('No sub-agents are currently available.'); expect(prompt).toContain('# Core Mandates'); expect(prompt).toContain('- **Conventions:**'); + expect(prompt).toContain('- **User Hints:**'); expect(prompt).toContain('# Outside of Sandbox'); expect(prompt).toContain('# Final Reminder'); expect(prompt).toMatchSnapshot(); @@ -207,6 +208,7 @@ describe('Core System Prompt (prompts.ts)', () => { vi.mocked(mockConfig.getActiveModel).mockReturnValue(PREVIEW_GEMINI_MODEL); const prompt = getCoreSystemPrompt(mockConfig); expect(prompt).toContain('You are Gemini CLI, an interactive CLI agent'); // Check for core content + expect(prompt).toContain('- **User Hints:**'); expect(prompt).toContain('No Chitchat:'); expect(prompt).toMatchSnapshot(); }); diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts index 8232f73570..84af7270b6 100644 --- a/packages/core/src/index.ts +++ b/packages/core/src/index.ts @@ -28,6 +28,7 @@ export * from './commands/memory.js'; export * from './commands/types.js'; // Export Core Logic +export * from './core/baseLlmClient.js'; export * from './core/client.js'; export * from './core/contentGenerator.js'; export * from './core/loggingContentGenerator.js'; @@ -88,6 +89,7 @@ export * from './utils/formatters.js'; export * from './utils/generateContentResponseUtilities.js'; export * from './utils/filesearch/fileSearch.js'; export * from './utils/errorParsing.js'; +export * from './utils/flashLiteHelper.js'; export * from './utils/workspaceContext.js'; export * from './utils/environmentContext.js'; export * from './utils/ignorePatterns.js'; diff --git a/packages/core/src/prompts/snippets.legacy.ts b/packages/core/src/prompts/snippets.legacy.ts index 8d46fd6a1a..3671490089 100644 --- a/packages/core/src/prompts/snippets.legacy.ts +++ b/packages/core/src/prompts/snippets.legacy.ts @@ -159,6 +159,7 @@ export function renderCoreMandates(options?: CoreMandatesOptions): string { - **Idiomatic Changes:** When editing, understand the local context (imports, functions/classes) to ensure your changes integrate naturally and idiomatically. - **Comments:** Add code comments sparingly. Focus on *why* something is done, especially for complex logic, rather than *what* is done. Only add high-value comments if necessary for clarity or if requested by the user. Do not edit comments that are separate from the code you are changing. *NEVER* talk to the user or describe your changes through comments. - **Proactiveness:** Fulfill the user's request thoroughly. When adding features or fixing bugs, this includes adding tests to ensure quality. Consider all created files, especially tests, to be permanent artifacts unless the user says otherwise.${mandateConflictResolution(options.hasHierarchicalMemory)} +- **User Hints:** During execution, the user may provide real-time hints (marked as "User hint:" or "User hints:"). Treat these as high-priority but scope-preserving course corrections: apply the minimal plan change needed, keep unaffected user tasks active, and never cancel/skip tasks unless cancellation is explicit for those tasks. Hints may add new tasks, modify one or more tasks, cancel specific tasks, or provide extra context only. If scope is ambiguous, ask for clarification before dropping work. - ${mandateConfirm(options.interactive)} - **Explaining Changes:** After completing a code modification or file operation *do not* provide summaries unless asked. - **Do Not revert changes:** Do not revert changes to the codebase unless asked to do so by the user. Only revert changes made by you if they have resulted in an error or if the user has explicitly asked you to revert the changes.${mandateSkillGuidance(options.hasSkills)}${mandateExplainBeforeActing(options.isGemini3)}${mandateContinueWork(options.interactive)} diff --git a/packages/core/src/prompts/snippets.ts b/packages/core/src/prompts/snippets.ts index 2508181816..aed3c668d1 100644 --- a/packages/core/src/prompts/snippets.ts +++ b/packages/core/src/prompts/snippets.ts @@ -171,6 +171,7 @@ export function renderCoreMandates(options?: CoreMandatesOptions): string { - **Technical Integrity:** You are responsible for the entire lifecycle: implementation, testing, and validation. Within the scope of your changes, prioritize readability and long-term maintainability by consolidating logic into clean abstractions rather than threading state across unrelated layers. Align strictly with the requested architectural direction, ensuring the final implementation is focused and free of redundant "just-in-case" alternatives. Validation is not merely running tests; it is the exhaustive process of ensuring that every aspect of your change—behavioral, structural, and stylistic—is correct and fully compatible with the broader project. For bug fixes, you must empirically reproduce the failure with a new test case or reproduction script before applying the fix. - **Expertise & Intent Alignment:** Provide proactive technical opinions grounded in research while strictly adhering to the user's intended workflow. Distinguish between **Directives** (unambiguous requests for action or implementation) and **Inquiries** (requests for analysis, advice, or observations). Assume all requests are Inquiries unless they contain an explicit instruction to perform a task. For Inquiries, your scope is strictly limited to research and analysis; you may propose a solution or strategy, but you MUST NOT modify files until a corresponding Directive is issued. Do not initiate implementation based on observations of bugs or statements of fact. Once an Inquiry is resolved, or while waiting for a Directive, stop and wait for the next user instruction. ${options.interactive ? 'For Directives, only clarify if critically underspecified; otherwise, work autonomously.' : 'For Directives, you must work autonomously as no further user input is available.'} You should only seek user intervention if you have exhausted all possible routes or if a proposed solution would take the workspace in a significantly different architectural direction. - **Proactiveness:** When executing a Directive, persist through errors and obstacles by diagnosing failures in the execution phase and, if necessary, backtracking to the research or strategy phases to adjust your approach until a successful, verified outcome is achieved. Fulfill the user's request thoroughly, including adding tests when adding features or fixing bugs. Take reasonable liberties to fulfill broad goals while staying within the requested scope; however, prioritize simplicity and the removal of redundant logic over providing "just-in-case" alternatives that diverge from the established path.${mandateConflictResolution(options.hasHierarchicalMemory)} +- **User Hints:** During execution, the user may provide real-time hints (marked as "User hint:" or "User hints:"). Treat these as high-priority but scope-preserving course corrections: apply the minimal plan change needed, keep unaffected user tasks active, and never cancel/skip tasks unless cancellation is explicit for those tasks. Hints may add new tasks, modify one or more tasks, cancel specific tasks, or provide extra context only. If scope is ambiguous, ask for clarification before dropping work. - ${mandateConfirm(options.interactive)} - **Explaining Changes:** After completing a code modification or file operation *do not* provide summaries unless asked. - **Do Not revert changes:** Do not revert changes to the codebase unless asked to do so by the user. Only revert changes made by you if they have resulted in an error or if the user has explicitly asked you to revert the changes.${mandateSkillGuidance(options.hasSkills)} diff --git a/packages/core/src/services/test-data/resolved-aliases-retry.golden.json b/packages/core/src/services/test-data/resolved-aliases-retry.golden.json index 3b38b909d8..cca52aec89 100644 --- a/packages/core/src/services/test-data/resolved-aliases-retry.golden.json +++ b/packages/core/src/services/test-data/resolved-aliases-retry.golden.json @@ -126,6 +126,17 @@ } } }, + "flash-lite-helper": { + "model": "gemini-2.5-flash-lite", + "generateContentConfig": { + "temperature": 0.2, + "topP": 1, + "maxOutputTokens": 120, + "thinkingConfig": { + "thinkingBudget": 0 + } + } + }, "edit-corrector": { "model": "gemini-2.5-flash-lite", "generateContentConfig": { diff --git a/packages/core/src/services/test-data/resolved-aliases.golden.json b/packages/core/src/services/test-data/resolved-aliases.golden.json index 3b38b909d8..cca52aec89 100644 --- a/packages/core/src/services/test-data/resolved-aliases.golden.json +++ b/packages/core/src/services/test-data/resolved-aliases.golden.json @@ -126,6 +126,17 @@ } } }, + "flash-lite-helper": { + "model": "gemini-2.5-flash-lite", + "generateContentConfig": { + "temperature": 0.2, + "topP": 1, + "maxOutputTokens": 120, + "thinkingConfig": { + "thinkingBudget": 0 + } + } + }, "edit-corrector": { "model": "gemini-2.5-flash-lite", "generateContentConfig": { diff --git a/packages/core/src/telemetry/startupProfiler.ts b/packages/core/src/telemetry/startupProfiler.ts index 89421380b7..0b025ec8f6 100644 --- a/packages/core/src/telemetry/startupProfiler.ts +++ b/packages/core/src/telemetry/startupProfiler.ts @@ -243,6 +243,15 @@ export class StartupProfiler { // Clear all phases. this.phases.clear(); } + + /** + * Resets the profiler state for tests. + */ + clear(): void { + this.phases.clear(); + performance.clearMarks(); + performance.clearMeasures(); + } } export const startupProfiler = StartupProfiler.getInstance(); diff --git a/packages/core/src/utils/flashLiteHelper.test.ts b/packages/core/src/utils/flashLiteHelper.test.ts new file mode 100644 index 0000000000..0be7cdb756 --- /dev/null +++ b/packages/core/src/utils/flashLiteHelper.test.ts @@ -0,0 +1,151 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { describe, it, expect, vi } from 'vitest'; +import type { GeminiClient } from '../core/client.js'; +import { + DEFAULT_FLASH_LITE_MODEL_CONFIG_KEY, + generateFlashLiteText, + truncateFlashLiteInput, + generateSteeringAckMessage, +} from './flashLiteHelper.js'; + +describe('truncateFlashLiteInput', () => { + it('returns input as-is when below limit', () => { + expect(truncateFlashLiteInput('hello', 10)).toBe('hello'); + }); + + it('truncates and appends suffix when above limit', () => { + const input = 'abcdefghijklmnopqrstuvwxyz'; + const result = truncateFlashLiteInput(input, 20); + expect(result.length).toBe(20); + expect(result).toContain('...[truncated]'); + }); +}); + +describe('generateFlashLiteText', () => { + const abortSignal = new AbortController().signal; + + it('uses the default flash-lite helper model config and returns response text', async () => { + const geminiClient = { + generateContent: vi.fn().mockResolvedValue({ + candidates: [ + { content: { parts: [{ text: ' Got it. Skipping #2. ' }] } }, + ], + }), + } as unknown as GeminiClient; + + const result = await generateFlashLiteText(geminiClient, { + instruction: 'Write a short acknowledgement sentence.', + input: 'skip #2', + fallbackText: 'Got it.', + abortSignal, + }); + + expect(result).toBe('Got it. Skipping #2.'); + expect(geminiClient.generateContent).toHaveBeenCalledWith( + DEFAULT_FLASH_LITE_MODEL_CONFIG_KEY, + expect.any(Array), + abortSignal, + ); + }); + + it('returns fallback text when response text is empty', async () => { + const geminiClient = { + generateContent: vi.fn().mockResolvedValue({}), + } as unknown as GeminiClient; + + const result = await generateFlashLiteText(geminiClient, { + instruction: 'Return one sentence.', + input: 'cancel task 2', + fallbackText: 'Understood. Cancelling task 2.', + abortSignal, + }); + + expect(result).toBe('Understood. Cancelling task 2.'); + }); + + it('returns fallback text when generation throws', async () => { + const geminiClient = { + generateContent: vi.fn().mockRejectedValue(new Error('boom')), + } as unknown as GeminiClient; + + const result = await generateFlashLiteText(geminiClient, { + instruction: 'Return one sentence.', + input: 'cancel task 2', + fallbackText: 'Understood.', + abortSignal, + }); + + expect(result).toBe('Understood.'); + }); + + it('truncates the input before sending to the model', async () => { + const geminiClient = { + generateContent: vi.fn().mockResolvedValue({ + candidates: [{ content: { parts: [{ text: 'Ack.' }] } }], + }), + } as unknown as GeminiClient; + + const longInput = 'x'.repeat(200); + await generateFlashLiteText(geminiClient, { + instruction: 'Return one sentence.', + input: longInput, + fallbackText: 'Understood.', + abortSignal, + maxInputChars: 64, + }); + + const [, contents] = ( + geminiClient.generateContent as ReturnType + ).mock.calls[0]; + const promptText = contents[0].parts[0].text as string; + expect(promptText).toContain('...[truncated]'); + }); +}); + +describe('generateSteeringAckMessage', () => { + it('returns a shortened acknowledgement using flash-lite-helper', async () => { + const geminiClient = { + generateContent: vi.fn().mockResolvedValue({ + candidates: [ + { + content: { + parts: [{ text: 'Got it. I will focus on the tests now.' }], + }, + }, + ], + }), + } as unknown as GeminiClient; + + const result = await generateSteeringAckMessage( + geminiClient, + 'focus on tests', + ); + expect(result).toBe('Got it. I will focus on the tests now.'); + }); + + it('returns a fallback message if the model fails', async () => { + const geminiClient = { + generateContent: vi.fn().mockRejectedValue(new Error('timeout')), + } as unknown as GeminiClient; + + const result = await generateSteeringAckMessage( + geminiClient, + 'a very long hint that should be truncated in the fallback message if it was longer but it is not', + ); + expect(result).toContain('Understood. a very long hint'); + }); + + it('returns a very simple fallback if hint is empty', async () => { + const geminiClient = { + generateContent: vi.fn().mockRejectedValue(new Error('error')), + } as unknown as GeminiClient; + + const result = await generateSteeringAckMessage(geminiClient, ' '); + expect(result).toBe('Understood. Adjusting the plan.'); + }); +}); diff --git a/packages/core/src/utils/flashLiteHelper.ts b/packages/core/src/utils/flashLiteHelper.ts new file mode 100644 index 0000000000..9f63de4bcf --- /dev/null +++ b/packages/core/src/utils/flashLiteHelper.ts @@ -0,0 +1,154 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import type { Content } from '@google/genai'; +import type { GeminiClient } from '../core/client.js'; +import type { ModelConfigKey } from '../services/modelConfigService.js'; +import { debugLogger } from './debugLogger.js'; +import { getResponseText } from './partUtils.js'; + +export const DEFAULT_FLASH_LITE_MODEL_CONFIG_KEY: ModelConfigKey = { + model: 'flash-lite-helper', +}; + +export const DEFAULT_FLASH_LITE_MAX_INPUT_CHARS = 1200; +export const DEFAULT_FLASH_LITE_MAX_OUTPUT_CHARS = 180; +const INPUT_TRUNCATION_SUFFIX = '\n...[truncated]'; + +export const USER_STEERING_INSTRUCTION = + 'Internal instruction: Re-evaluate the active plan using this user steering update. ' + + 'Classify it as ADD_TASK, MODIFY_TASK, CANCEL_TASK, or EXTRA_CONTEXT. ' + + 'Apply minimal-diff changes only to affected tasks and keep unaffected tasks active. ' + + 'Do not cancel/skip tasks unless the user explicitly cancels them. ' + + 'Acknowledge the steering briefly and state the course correction.'; + +export function buildUserSteeringHintPrompt(hintText: string): string { + const trimmedText = hintText.trim(); + return `User steering update: "${trimmedText}"\n${USER_STEERING_INSTRUCTION}`; +} + +export function formatUserHintsForModel(hints: string[]): string | null { + if (hints.length === 0) { + return null; + } + const hintText = hints.map((hint) => `- ${hint}`).join('\n'); + return `User hints:\n${hintText}\n\n${USER_STEERING_INSTRUCTION}`; +} + +const STEERING_ACK_INSTRUCTION = + 'Write one short, friendly sentence acknowledging a user steering update for an in-progress task. ' + + 'Be concrete when possible (e.g., mention skipped/cancelled item numbers). ' + + 'Do not apologize, do not mention internal policy, and do not add extra steps.'; +const STEERING_ACK_TIMEOUT_MS = 1200; +const STEERING_ACK_MAX_INPUT_CHARS = 320; +const STEERING_ACK_MAX_OUTPUT_CHARS = 90; + +function buildSteeringFallbackMessage(hintText: string): string { + const normalized = hintText.replace(/\s+/g, ' ').trim(); + if (!normalized) { + return 'Understood. Adjusting the plan.'; + } + if (normalized.length <= 64) { + return `Understood. ${normalized}`; + } + return `Understood. ${normalized.slice(0, 61)}...`; +} + +export async function generateSteeringAckMessage( + geminiClient: GeminiClient, + hintText: string, +): Promise { + const fallbackText = buildSteeringFallbackMessage(hintText); + + const abortController = new AbortController(); + const timeout = setTimeout( + () => abortController.abort(), + STEERING_ACK_TIMEOUT_MS, + ); + + try { + return await generateFlashLiteText(geminiClient, { + instruction: STEERING_ACK_INSTRUCTION, + input: hintText.replace(/\s+/g, ' ').trim(), + fallbackText, + abortSignal: abortController.signal, + maxInputChars: STEERING_ACK_MAX_INPUT_CHARS, + maxOutputChars: STEERING_ACK_MAX_OUTPUT_CHARS, + }); + } finally { + clearTimeout(timeout); + } +} + +export interface GenerateFlashLiteTextOptions { + instruction: string; + input: string; + fallbackText: string; + abortSignal: AbortSignal; + modelConfigKey?: ModelConfigKey; + maxInputChars?: number; + maxOutputChars?: number; +} + +export function truncateFlashLiteInput( + input: string, + maxInputChars: number = DEFAULT_FLASH_LITE_MAX_INPUT_CHARS, +): string { + if (maxInputChars <= INPUT_TRUNCATION_SUFFIX.length) { + return input.slice(0, Math.max(maxInputChars, 0)); + } + if (input.length <= maxInputChars) { + return input; + } + const keepChars = maxInputChars - INPUT_TRUNCATION_SUFFIX.length; + return input.slice(0, keepChars) + INPUT_TRUNCATION_SUFFIX; +} + +export async function generateFlashLiteText( + geminiClient: GeminiClient, + options: GenerateFlashLiteTextOptions, +): Promise { + const { + instruction, + input, + fallbackText, + abortSignal, + modelConfigKey = DEFAULT_FLASH_LITE_MODEL_CONFIG_KEY, + maxInputChars = DEFAULT_FLASH_LITE_MAX_INPUT_CHARS, + maxOutputChars = DEFAULT_FLASH_LITE_MAX_OUTPUT_CHARS, + } = options; + + const safeInstruction = instruction.trim(); + if (!safeInstruction) { + return fallbackText; + } + + const safeInput = truncateFlashLiteInput(input.trim(), maxInputChars); + const prompt = `${safeInstruction}\n\nUser input:\n"""${safeInput}"""`; + const contents: Content[] = [{ role: 'user', parts: [{ text: prompt }] }]; + + try { + const response = await geminiClient.generateContent( + modelConfigKey, + contents, + abortSignal, + ); + const responseText = getResponseText(response)?.replace(/\s+/g, ' ').trim(); + if (!responseText) { + return fallbackText; + } + + if (maxOutputChars > 0 && responseText.length > maxOutputChars) { + return responseText.slice(0, maxOutputChars).trimEnd(); + } + return responseText; + } catch (error) { + debugLogger.debug( + `[FlashLiteHelper] Generation failed: ${error instanceof Error ? error.message : String(error)}`, + ); + return fallbackText; + } +} diff --git a/schemas/settings.schema.json b/schemas/settings.schema.json index d5146d46f8..049e413ff3 100644 --- a/schemas/settings.schema.json +++ b/schemas/settings.schema.json @@ -507,7 +507,7 @@ "modelConfigs": { "title": "Model Configs", "description": "Model configurations.", - "markdownDescription": "Model configurations.\n\n- Category: `Model`\n- Requires restart: `no`\n- Default: `{\n \"aliases\": {\n \"base\": {\n \"modelConfig\": {\n \"generateContentConfig\": {\n \"temperature\": 0,\n \"topP\": 1\n }\n }\n },\n \"chat-base\": {\n \"extends\": \"base\",\n \"modelConfig\": {\n \"generateContentConfig\": {\n \"thinkingConfig\": {\n \"includeThoughts\": true\n },\n \"temperature\": 1,\n \"topP\": 0.95,\n \"topK\": 64\n }\n }\n },\n \"chat-base-2.5\": {\n \"extends\": \"chat-base\",\n \"modelConfig\": {\n \"generateContentConfig\": {\n \"thinkingConfig\": {\n \"thinkingBudget\": 8192\n }\n }\n }\n },\n \"chat-base-3\": {\n \"extends\": \"chat-base\",\n \"modelConfig\": {\n \"generateContentConfig\": {\n \"thinkingConfig\": {\n \"thinkingLevel\": \"HIGH\"\n }\n }\n }\n },\n \"gemini-3-pro-preview\": {\n \"extends\": \"chat-base-3\",\n \"modelConfig\": {\n \"model\": \"gemini-3-pro-preview\"\n }\n },\n \"gemini-3-flash-preview\": {\n \"extends\": \"chat-base-3\",\n \"modelConfig\": {\n \"model\": \"gemini-3-flash-preview\"\n }\n },\n \"gemini-2.5-pro\": {\n \"extends\": \"chat-base-2.5\",\n \"modelConfig\": {\n \"model\": \"gemini-2.5-pro\"\n }\n },\n \"gemini-2.5-flash\": {\n \"extends\": \"chat-base-2.5\",\n \"modelConfig\": {\n \"model\": \"gemini-2.5-flash\"\n }\n },\n \"gemini-2.5-flash-lite\": {\n \"extends\": \"chat-base-2.5\",\n \"modelConfig\": {\n \"model\": \"gemini-2.5-flash-lite\"\n }\n },\n \"gemini-2.5-flash-base\": {\n \"extends\": \"base\",\n \"modelConfig\": {\n \"model\": \"gemini-2.5-flash\"\n }\n },\n \"classifier\": {\n \"extends\": \"base\",\n \"modelConfig\": {\n \"model\": \"gemini-2.5-flash-lite\",\n \"generateContentConfig\": {\n \"maxOutputTokens\": 1024,\n \"thinkingConfig\": {\n \"thinkingBudget\": 512\n }\n }\n }\n },\n \"prompt-completion\": {\n \"extends\": \"base\",\n \"modelConfig\": {\n \"model\": \"gemini-2.5-flash-lite\",\n \"generateContentConfig\": {\n \"temperature\": 0.3,\n \"maxOutputTokens\": 16000,\n \"thinkingConfig\": {\n \"thinkingBudget\": 0\n }\n }\n }\n },\n \"edit-corrector\": {\n \"extends\": \"base\",\n \"modelConfig\": {\n \"model\": \"gemini-2.5-flash-lite\",\n \"generateContentConfig\": {\n \"thinkingConfig\": {\n \"thinkingBudget\": 0\n }\n }\n }\n },\n \"summarizer-default\": {\n \"extends\": \"base\",\n \"modelConfig\": {\n \"model\": \"gemini-2.5-flash-lite\",\n \"generateContentConfig\": {\n \"maxOutputTokens\": 2000\n }\n }\n },\n \"summarizer-shell\": {\n \"extends\": \"base\",\n \"modelConfig\": {\n \"model\": \"gemini-2.5-flash-lite\",\n \"generateContentConfig\": {\n \"maxOutputTokens\": 2000\n }\n }\n },\n \"web-search\": {\n \"extends\": \"gemini-2.5-flash-base\",\n \"modelConfig\": {\n \"generateContentConfig\": {\n \"tools\": [\n {\n \"googleSearch\": {}\n }\n ]\n }\n }\n },\n \"web-fetch\": {\n \"extends\": \"gemini-2.5-flash-base\",\n \"modelConfig\": {\n \"generateContentConfig\": {\n \"tools\": [\n {\n \"urlContext\": {}\n }\n ]\n }\n }\n },\n \"web-fetch-fallback\": {\n \"extends\": \"gemini-2.5-flash-base\",\n \"modelConfig\": {}\n },\n \"loop-detection\": {\n \"extends\": \"gemini-2.5-flash-base\",\n \"modelConfig\": {}\n },\n \"loop-detection-double-check\": {\n \"extends\": \"base\",\n \"modelConfig\": {\n \"model\": \"gemini-2.5-pro\"\n }\n },\n \"llm-edit-fixer\": {\n \"extends\": \"gemini-2.5-flash-base\",\n \"modelConfig\": {}\n },\n \"next-speaker-checker\": {\n \"extends\": \"gemini-2.5-flash-base\",\n \"modelConfig\": {}\n },\n \"chat-compression-3-pro\": {\n \"modelConfig\": {\n \"model\": \"gemini-3-pro-preview\"\n }\n },\n \"chat-compression-3-flash\": {\n \"modelConfig\": {\n \"model\": \"gemini-3-flash-preview\"\n }\n },\n \"chat-compression-2.5-pro\": {\n \"modelConfig\": {\n \"model\": \"gemini-2.5-pro\"\n }\n },\n \"chat-compression-2.5-flash\": {\n \"modelConfig\": {\n \"model\": \"gemini-2.5-flash\"\n }\n },\n \"chat-compression-2.5-flash-lite\": {\n \"modelConfig\": {\n \"model\": \"gemini-2.5-flash-lite\"\n }\n },\n \"chat-compression-default\": {\n \"modelConfig\": {\n \"model\": \"gemini-2.5-pro\"\n }\n }\n },\n \"overrides\": [\n {\n \"match\": {\n \"model\": \"chat-base\",\n \"isRetry\": true\n },\n \"modelConfig\": {\n \"generateContentConfig\": {\n \"temperature\": 1\n }\n }\n }\n ]\n}`", + "markdownDescription": "Model configurations.\n\n- Category: `Model`\n- Requires restart: `no`\n- Default: `{\n \"aliases\": {\n \"base\": {\n \"modelConfig\": {\n \"generateContentConfig\": {\n \"temperature\": 0,\n \"topP\": 1\n }\n }\n },\n \"chat-base\": {\n \"extends\": \"base\",\n \"modelConfig\": {\n \"generateContentConfig\": {\n \"thinkingConfig\": {\n \"includeThoughts\": true\n },\n \"temperature\": 1,\n \"topP\": 0.95,\n \"topK\": 64\n }\n }\n },\n \"chat-base-2.5\": {\n \"extends\": \"chat-base\",\n \"modelConfig\": {\n \"generateContentConfig\": {\n \"thinkingConfig\": {\n \"thinkingBudget\": 8192\n }\n }\n }\n },\n \"chat-base-3\": {\n \"extends\": \"chat-base\",\n \"modelConfig\": {\n \"generateContentConfig\": {\n \"thinkingConfig\": {\n \"thinkingLevel\": \"HIGH\"\n }\n }\n }\n },\n \"gemini-3-pro-preview\": {\n \"extends\": \"chat-base-3\",\n \"modelConfig\": {\n \"model\": \"gemini-3-pro-preview\"\n }\n },\n \"gemini-3-flash-preview\": {\n \"extends\": \"chat-base-3\",\n \"modelConfig\": {\n \"model\": \"gemini-3-flash-preview\"\n }\n },\n \"gemini-2.5-pro\": {\n \"extends\": \"chat-base-2.5\",\n \"modelConfig\": {\n \"model\": \"gemini-2.5-pro\"\n }\n },\n \"gemini-2.5-flash\": {\n \"extends\": \"chat-base-2.5\",\n \"modelConfig\": {\n \"model\": \"gemini-2.5-flash\"\n }\n },\n \"gemini-2.5-flash-lite\": {\n \"extends\": \"chat-base-2.5\",\n \"modelConfig\": {\n \"model\": \"gemini-2.5-flash-lite\"\n }\n },\n \"gemini-2.5-flash-base\": {\n \"extends\": \"base\",\n \"modelConfig\": {\n \"model\": \"gemini-2.5-flash\"\n }\n },\n \"classifier\": {\n \"extends\": \"base\",\n \"modelConfig\": {\n \"model\": \"gemini-2.5-flash-lite\",\n \"generateContentConfig\": {\n \"maxOutputTokens\": 1024,\n \"thinkingConfig\": {\n \"thinkingBudget\": 512\n }\n }\n }\n },\n \"prompt-completion\": {\n \"extends\": \"base\",\n \"modelConfig\": {\n \"model\": \"gemini-2.5-flash-lite\",\n \"generateContentConfig\": {\n \"temperature\": 0.3,\n \"maxOutputTokens\": 16000,\n \"thinkingConfig\": {\n \"thinkingBudget\": 0\n }\n }\n }\n },\n \"flash-lite-helper\": {\n \"extends\": \"base\",\n \"modelConfig\": {\n \"model\": \"gemini-2.5-flash-lite\",\n \"generateContentConfig\": {\n \"temperature\": 0.2,\n \"maxOutputTokens\": 120,\n \"thinkingConfig\": {\n \"thinkingBudget\": 0\n }\n }\n }\n },\n \"edit-corrector\": {\n \"extends\": \"base\",\n \"modelConfig\": {\n \"model\": \"gemini-2.5-flash-lite\",\n \"generateContentConfig\": {\n \"thinkingConfig\": {\n \"thinkingBudget\": 0\n }\n }\n }\n },\n \"summarizer-default\": {\n \"extends\": \"base\",\n \"modelConfig\": {\n \"model\": \"gemini-2.5-flash-lite\",\n \"generateContentConfig\": {\n \"maxOutputTokens\": 2000\n }\n }\n },\n \"summarizer-shell\": {\n \"extends\": \"base\",\n \"modelConfig\": {\n \"model\": \"gemini-2.5-flash-lite\",\n \"generateContentConfig\": {\n \"maxOutputTokens\": 2000\n }\n }\n },\n \"web-search\": {\n \"extends\": \"gemini-2.5-flash-base\",\n \"modelConfig\": {\n \"generateContentConfig\": {\n \"tools\": [\n {\n \"googleSearch\": {}\n }\n ]\n }\n }\n },\n \"web-fetch\": {\n \"extends\": \"gemini-2.5-flash-base\",\n \"modelConfig\": {\n \"generateContentConfig\": {\n \"tools\": [\n {\n \"urlContext\": {}\n }\n ]\n }\n }\n },\n \"web-fetch-fallback\": {\n \"extends\": \"gemini-2.5-flash-base\",\n \"modelConfig\": {}\n },\n \"loop-detection\": {\n \"extends\": \"gemini-2.5-flash-base\",\n \"modelConfig\": {}\n },\n \"loop-detection-double-check\": {\n \"extends\": \"base\",\n \"modelConfig\": {\n \"model\": \"gemini-2.5-pro\"\n }\n },\n \"llm-edit-fixer\": {\n \"extends\": \"gemini-2.5-flash-base\",\n \"modelConfig\": {}\n },\n \"next-speaker-checker\": {\n \"extends\": \"gemini-2.5-flash-base\",\n \"modelConfig\": {}\n },\n \"chat-compression-3-pro\": {\n \"modelConfig\": {\n \"model\": \"gemini-3-pro-preview\"\n }\n },\n \"chat-compression-3-flash\": {\n \"modelConfig\": {\n \"model\": \"gemini-3-flash-preview\"\n }\n },\n \"chat-compression-2.5-pro\": {\n \"modelConfig\": {\n \"model\": \"gemini-2.5-pro\"\n }\n },\n \"chat-compression-2.5-flash\": {\n \"modelConfig\": {\n \"model\": \"gemini-2.5-flash\"\n }\n },\n \"chat-compression-2.5-flash-lite\": {\n \"modelConfig\": {\n \"model\": \"gemini-2.5-flash-lite\"\n }\n },\n \"chat-compression-default\": {\n \"modelConfig\": {\n \"model\": \"gemini-2.5-pro\"\n }\n }\n },\n \"overrides\": [\n {\n \"match\": {\n \"model\": \"chat-base\",\n \"isRetry\": true\n },\n \"modelConfig\": {\n \"generateContentConfig\": {\n \"temperature\": 1\n }\n }\n }\n ]\n}`", "default": { "aliases": { "base": { @@ -612,6 +612,19 @@ } } }, + "flash-lite-helper": { + "extends": "base", + "modelConfig": { + "model": "gemini-2.5-flash-lite", + "generateContentConfig": { + "temperature": 0.2, + "maxOutputTokens": 120, + "thinkingConfig": { + "thinkingBudget": 0 + } + } + } + }, "edit-corrector": { "extends": "base", "modelConfig": { @@ -737,7 +750,7 @@ "aliases": { "title": "Model Config Aliases", "description": "Named presets for model configs. Can be used in place of a model name and can inherit from other aliases using an `extends` property.", - "markdownDescription": "Named presets for model configs. Can be used in place of a model name and can inherit from other aliases using an `extends` property.\n\n- Category: `Model`\n- Requires restart: `no`\n- Default: `{\n \"base\": {\n \"modelConfig\": {\n \"generateContentConfig\": {\n \"temperature\": 0,\n \"topP\": 1\n }\n }\n },\n \"chat-base\": {\n \"extends\": \"base\",\n \"modelConfig\": {\n \"generateContentConfig\": {\n \"thinkingConfig\": {\n \"includeThoughts\": true\n },\n \"temperature\": 1,\n \"topP\": 0.95,\n \"topK\": 64\n }\n }\n },\n \"chat-base-2.5\": {\n \"extends\": \"chat-base\",\n \"modelConfig\": {\n \"generateContentConfig\": {\n \"thinkingConfig\": {\n \"thinkingBudget\": 8192\n }\n }\n }\n },\n \"chat-base-3\": {\n \"extends\": \"chat-base\",\n \"modelConfig\": {\n \"generateContentConfig\": {\n \"thinkingConfig\": {\n \"thinkingLevel\": \"HIGH\"\n }\n }\n }\n },\n \"gemini-3-pro-preview\": {\n \"extends\": \"chat-base-3\",\n \"modelConfig\": {\n \"model\": \"gemini-3-pro-preview\"\n }\n },\n \"gemini-3-flash-preview\": {\n \"extends\": \"chat-base-3\",\n \"modelConfig\": {\n \"model\": \"gemini-3-flash-preview\"\n }\n },\n \"gemini-2.5-pro\": {\n \"extends\": \"chat-base-2.5\",\n \"modelConfig\": {\n \"model\": \"gemini-2.5-pro\"\n }\n },\n \"gemini-2.5-flash\": {\n \"extends\": \"chat-base-2.5\",\n \"modelConfig\": {\n \"model\": \"gemini-2.5-flash\"\n }\n },\n \"gemini-2.5-flash-lite\": {\n \"extends\": \"chat-base-2.5\",\n \"modelConfig\": {\n \"model\": \"gemini-2.5-flash-lite\"\n }\n },\n \"gemini-2.5-flash-base\": {\n \"extends\": \"base\",\n \"modelConfig\": {\n \"model\": \"gemini-2.5-flash\"\n }\n },\n \"classifier\": {\n \"extends\": \"base\",\n \"modelConfig\": {\n \"model\": \"gemini-2.5-flash-lite\",\n \"generateContentConfig\": {\n \"maxOutputTokens\": 1024,\n \"thinkingConfig\": {\n \"thinkingBudget\": 512\n }\n }\n }\n },\n \"prompt-completion\": {\n \"extends\": \"base\",\n \"modelConfig\": {\n \"model\": \"gemini-2.5-flash-lite\",\n \"generateContentConfig\": {\n \"temperature\": 0.3,\n \"maxOutputTokens\": 16000,\n \"thinkingConfig\": {\n \"thinkingBudget\": 0\n }\n }\n }\n },\n \"edit-corrector\": {\n \"extends\": \"base\",\n \"modelConfig\": {\n \"model\": \"gemini-2.5-flash-lite\",\n \"generateContentConfig\": {\n \"thinkingConfig\": {\n \"thinkingBudget\": 0\n }\n }\n }\n },\n \"summarizer-default\": {\n \"extends\": \"base\",\n \"modelConfig\": {\n \"model\": \"gemini-2.5-flash-lite\",\n \"generateContentConfig\": {\n \"maxOutputTokens\": 2000\n }\n }\n },\n \"summarizer-shell\": {\n \"extends\": \"base\",\n \"modelConfig\": {\n \"model\": \"gemini-2.5-flash-lite\",\n \"generateContentConfig\": {\n \"maxOutputTokens\": 2000\n }\n }\n },\n \"web-search\": {\n \"extends\": \"gemini-2.5-flash-base\",\n \"modelConfig\": {\n \"generateContentConfig\": {\n \"tools\": [\n {\n \"googleSearch\": {}\n }\n ]\n }\n }\n },\n \"web-fetch\": {\n \"extends\": \"gemini-2.5-flash-base\",\n \"modelConfig\": {\n \"generateContentConfig\": {\n \"tools\": [\n {\n \"urlContext\": {}\n }\n ]\n }\n }\n },\n \"web-fetch-fallback\": {\n \"extends\": \"gemini-2.5-flash-base\",\n \"modelConfig\": {}\n },\n \"loop-detection\": {\n \"extends\": \"gemini-2.5-flash-base\",\n \"modelConfig\": {}\n },\n \"loop-detection-double-check\": {\n \"extends\": \"base\",\n \"modelConfig\": {\n \"model\": \"gemini-2.5-pro\"\n }\n },\n \"llm-edit-fixer\": {\n \"extends\": \"gemini-2.5-flash-base\",\n \"modelConfig\": {}\n },\n \"next-speaker-checker\": {\n \"extends\": \"gemini-2.5-flash-base\",\n \"modelConfig\": {}\n },\n \"chat-compression-3-pro\": {\n \"modelConfig\": {\n \"model\": \"gemini-3-pro-preview\"\n }\n },\n \"chat-compression-3-flash\": {\n \"modelConfig\": {\n \"model\": \"gemini-3-flash-preview\"\n }\n },\n \"chat-compression-2.5-pro\": {\n \"modelConfig\": {\n \"model\": \"gemini-2.5-pro\"\n }\n },\n \"chat-compression-2.5-flash\": {\n \"modelConfig\": {\n \"model\": \"gemini-2.5-flash\"\n }\n },\n \"chat-compression-2.5-flash-lite\": {\n \"modelConfig\": {\n \"model\": \"gemini-2.5-flash-lite\"\n }\n },\n \"chat-compression-default\": {\n \"modelConfig\": {\n \"model\": \"gemini-2.5-pro\"\n }\n }\n}`", + "markdownDescription": "Named presets for model configs. Can be used in place of a model name and can inherit from other aliases using an `extends` property.\n\n- Category: `Model`\n- Requires restart: `no`\n- Default: `{\n \"base\": {\n \"modelConfig\": {\n \"generateContentConfig\": {\n \"temperature\": 0,\n \"topP\": 1\n }\n }\n },\n \"chat-base\": {\n \"extends\": \"base\",\n \"modelConfig\": {\n \"generateContentConfig\": {\n \"thinkingConfig\": {\n \"includeThoughts\": true\n },\n \"temperature\": 1,\n \"topP\": 0.95,\n \"topK\": 64\n }\n }\n },\n \"chat-base-2.5\": {\n \"extends\": \"chat-base\",\n \"modelConfig\": {\n \"generateContentConfig\": {\n \"thinkingConfig\": {\n \"thinkingBudget\": 8192\n }\n }\n }\n },\n \"chat-base-3\": {\n \"extends\": \"chat-base\",\n \"modelConfig\": {\n \"generateContentConfig\": {\n \"thinkingConfig\": {\n \"thinkingLevel\": \"HIGH\"\n }\n }\n }\n },\n \"gemini-3-pro-preview\": {\n \"extends\": \"chat-base-3\",\n \"modelConfig\": {\n \"model\": \"gemini-3-pro-preview\"\n }\n },\n \"gemini-3-flash-preview\": {\n \"extends\": \"chat-base-3\",\n \"modelConfig\": {\n \"model\": \"gemini-3-flash-preview\"\n }\n },\n \"gemini-2.5-pro\": {\n \"extends\": \"chat-base-2.5\",\n \"modelConfig\": {\n \"model\": \"gemini-2.5-pro\"\n }\n },\n \"gemini-2.5-flash\": {\n \"extends\": \"chat-base-2.5\",\n \"modelConfig\": {\n \"model\": \"gemini-2.5-flash\"\n }\n },\n \"gemini-2.5-flash-lite\": {\n \"extends\": \"chat-base-2.5\",\n \"modelConfig\": {\n \"model\": \"gemini-2.5-flash-lite\"\n }\n },\n \"gemini-2.5-flash-base\": {\n \"extends\": \"base\",\n \"modelConfig\": {\n \"model\": \"gemini-2.5-flash\"\n }\n },\n \"classifier\": {\n \"extends\": \"base\",\n \"modelConfig\": {\n \"model\": \"gemini-2.5-flash-lite\",\n \"generateContentConfig\": {\n \"maxOutputTokens\": 1024,\n \"thinkingConfig\": {\n \"thinkingBudget\": 512\n }\n }\n }\n },\n \"prompt-completion\": {\n \"extends\": \"base\",\n \"modelConfig\": {\n \"model\": \"gemini-2.5-flash-lite\",\n \"generateContentConfig\": {\n \"temperature\": 0.3,\n \"maxOutputTokens\": 16000,\n \"thinkingConfig\": {\n \"thinkingBudget\": 0\n }\n }\n }\n },\n \"flash-lite-helper\": {\n \"extends\": \"base\",\n \"modelConfig\": {\n \"model\": \"gemini-2.5-flash-lite\",\n \"generateContentConfig\": {\n \"temperature\": 0.2,\n \"maxOutputTokens\": 120,\n \"thinkingConfig\": {\n \"thinkingBudget\": 0\n }\n }\n }\n },\n \"edit-corrector\": {\n \"extends\": \"base\",\n \"modelConfig\": {\n \"model\": \"gemini-2.5-flash-lite\",\n \"generateContentConfig\": {\n \"thinkingConfig\": {\n \"thinkingBudget\": 0\n }\n }\n }\n },\n \"summarizer-default\": {\n \"extends\": \"base\",\n \"modelConfig\": {\n \"model\": \"gemini-2.5-flash-lite\",\n \"generateContentConfig\": {\n \"maxOutputTokens\": 2000\n }\n }\n },\n \"summarizer-shell\": {\n \"extends\": \"base\",\n \"modelConfig\": {\n \"model\": \"gemini-2.5-flash-lite\",\n \"generateContentConfig\": {\n \"maxOutputTokens\": 2000\n }\n }\n },\n \"web-search\": {\n \"extends\": \"gemini-2.5-flash-base\",\n \"modelConfig\": {\n \"generateContentConfig\": {\n \"tools\": [\n {\n \"googleSearch\": {}\n }\n ]\n }\n }\n },\n \"web-fetch\": {\n \"extends\": \"gemini-2.5-flash-base\",\n \"modelConfig\": {\n \"generateContentConfig\": {\n \"tools\": [\n {\n \"urlContext\": {}\n }\n ]\n }\n }\n },\n \"web-fetch-fallback\": {\n \"extends\": \"gemini-2.5-flash-base\",\n \"modelConfig\": {}\n },\n \"loop-detection\": {\n \"extends\": \"gemini-2.5-flash-base\",\n \"modelConfig\": {}\n },\n \"loop-detection-double-check\": {\n \"extends\": \"base\",\n \"modelConfig\": {\n \"model\": \"gemini-2.5-pro\"\n }\n },\n \"llm-edit-fixer\": {\n \"extends\": \"gemini-2.5-flash-base\",\n \"modelConfig\": {}\n },\n \"next-speaker-checker\": {\n \"extends\": \"gemini-2.5-flash-base\",\n \"modelConfig\": {}\n },\n \"chat-compression-3-pro\": {\n \"modelConfig\": {\n \"model\": \"gemini-3-pro-preview\"\n }\n },\n \"chat-compression-3-flash\": {\n \"modelConfig\": {\n \"model\": \"gemini-3-flash-preview\"\n }\n },\n \"chat-compression-2.5-pro\": {\n \"modelConfig\": {\n \"model\": \"gemini-2.5-pro\"\n }\n },\n \"chat-compression-2.5-flash\": {\n \"modelConfig\": {\n \"model\": \"gemini-2.5-flash\"\n }\n },\n \"chat-compression-2.5-flash-lite\": {\n \"modelConfig\": {\n \"model\": \"gemini-2.5-flash-lite\"\n }\n },\n \"chat-compression-default\": {\n \"modelConfig\": {\n \"model\": \"gemini-2.5-pro\"\n }\n }\n}`", "default": { "base": { "modelConfig": { @@ -841,6 +854,19 @@ } } }, + "flash-lite-helper": { + "extends": "base", + "modelConfig": { + "model": "gemini-2.5-flash-lite", + "generateContentConfig": { + "temperature": 0.2, + "maxOutputTokens": 120, + "thinkingConfig": { + "thinkingBudget": 0 + } + } + } + }, "edit-corrector": { "extends": "base", "modelConfig": {