diff --git a/docs/cli/settings.md b/docs/cli/settings.md index baec68a27a..c17e9898ce 100644 --- a/docs/cli/settings.md +++ b/docs/cli/settings.md @@ -132,6 +132,7 @@ they appear in the UI. | Enable Tool Output Masking | `experimental.toolOutputMasking.enabled` | Enables tool output masking to save tokens. | `true` | | Use OSC 52 Paste | `experimental.useOSC52Paste` | Use OSC 52 sequence for pasting instead of clipboardy (useful for remote sessions). | `false` | | Plan | `experimental.plan` | Enable planning features (Plan Mode and tools). | `false` | +| Model Steering | `experimental.modelSteering` | Enable model steering (user hints) to guide the model during tool execution. | `false` | ### Skills diff --git a/docs/get-started/configuration.md b/docs/get-started/configuration.md index 21dbebff22..26831693db 100644 --- a/docs/get-started/configuration.md +++ b/docs/get-started/configuration.md @@ -956,6 +956,11 @@ their corresponding top-level category object in your `settings.json` file. - **Default:** `false` - **Requires restart:** Yes +- **`experimental.modelSteering`** (boolean): + - **Description:** Enable model steering (user hints) to guide the model + during tool execution. + - **Default:** `false` + #### `skills` - **`skills.enabled`** (boolean): diff --git a/evals/model_steering.eval.ts b/evals/model_steering.eval.ts new file mode 100644 index 0000000000..87109c1225 --- /dev/null +++ b/evals/model_steering.eval.ts @@ -0,0 +1,89 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { describe, expect } from 'vitest'; +import { act } from 'react'; +import path from 'node:path'; +import fs from 'node:fs'; +import { appEvalTest } from './app-test-helper.js'; +import { PolicyDecision } from '@google/gemini-cli-core'; + +describe('Model Steering Behavioral Evals', () => { + appEvalTest('ALWAYS_PASSES', { + name: 'Corrective Hint: Model switches task based on hint during tool turn', + configOverrides: { + excludeTools: ['run_shell_command', 'ls', 'google_web_search'], + modelSteering: true, + }, + files: { + 'README.md': + '# Gemini CLI\nThis is a tool for developers.\nLicense: Apache-2.0\nLine 4\nLine 5\nLine 6', + }, + prompt: 'Find the first 5 lines of README.md', + setup: async (rig) => { + // Pause on any relevant tool to inject a corrective hint + rig.setBreakpoint(['read_file', 'list_directory', 'glob']); + }, + assert: async (rig) => { + // Wait for the model to pause on any tool call + await rig.waitForPendingConfirmation( + /read_file|list_directory|glob/i, + 30000, + ); + + // Interrupt with a corrective hint + await rig.addUserHint( + 'Actually, stop what you are doing. Just tell me a short knock-knock joke about a robot instead.', + ); + + // Resolve the tool to let the turn finish and the model see the hint + await rig.resolveAwaitedTool(); + + // Verify the model pivots to the new task + await rig.waitForOutput(/Knock,? knock/i, 40000); + await rig.waitForIdle(30000); + + const output = rig.getStaticOutput(); + expect(output).toMatch(/Knock,? knock/i); + expect(output).not.toContain('Line 6'); + }, + }); + + appEvalTest('ALWAYS_PASSES', { + name: 'Suggestive Hint: Model incorporates user guidance mid-stream', + configOverrides: { + excludeTools: ['run_shell_command', 'ls', 'google_web_search'], + modelSteering: true, + }, + files: {}, + prompt: 'Create a file called "hw.js" with a JS hello world.', + setup: async (rig) => { + // Pause on write_file to inject a suggestive hint + rig.setBreakpoint(['write_file']); + }, + assert: async (rig) => { + // Wait for the model to start creating the first file + await rig.waitForPendingConfirmation('write_file', 30000); + + await rig.addUserHint( + 'Next, create a file called "hw.py" with a python hello world.', + ); + + // Resolve and wait for the model to complete both tasks + await rig.resolveAwaitedTool(); + await rig.waitForPendingConfirmation('write_file', 30000); + await rig.resolveAwaitedTool(); + await rig.waitForIdle(60000); + + const testDir = rig.getTestDir(); + const hwJs = path.join(testDir, 'hw.js'); + const hwPy = path.join(testDir, 'hw.py'); + + expect(fs.existsSync(hwJs), 'hw.js should exist').toBe(true); + expect(fs.existsSync(hwPy), 'hw.py should exist').toBe(true); + }, + }); +}); diff --git a/packages/cli/src/config/config.ts b/packages/cli/src/config/config.ts index 4109751950..4a17ae8ecc 100755 --- a/packages/cli/src/config/config.ts +++ b/packages/cli/src/config/config.ts @@ -818,6 +818,7 @@ export async function loadCliConfig( skillsSupport: settings.skills?.enabled ?? true, disabledSkills: settings.skills?.disabled, experimentalJitContext: settings.experimental?.jitContext, + modelSteering: settings.experimental?.modelSteering, toolOutputMasking: settings.experimental?.toolOutputMasking, noBrowser: !!process.env['NO_BROWSER'], summarizeToolOutput: settings.model?.summarizeToolOutput, diff --git a/packages/cli/src/config/settingsSchema.ts b/packages/cli/src/config/settingsSchema.ts index 5049bb3c55..a684b5553a 100644 --- a/packages/cli/src/config/settingsSchema.ts +++ b/packages/cli/src/config/settingsSchema.ts @@ -1635,6 +1635,16 @@ const SETTINGS_SCHEMA = { description: 'Enable planning features (Plan Mode and tools).', showInDialog: true, }, + modelSteering: { + type: 'boolean', + label: 'Model Steering', + category: 'Experimental', + requiresRestart: false, + default: false, + description: + 'Enable model steering (user hints) to guide the model during tool execution.', + showInDialog: true, + }, }, }, diff --git a/packages/cli/src/integration-tests/modelSteering.test.tsx b/packages/cli/src/integration-tests/modelSteering.test.tsx new file mode 100644 index 0000000000..ca1970cebc --- /dev/null +++ b/packages/cli/src/integration-tests/modelSteering.test.tsx @@ -0,0 +1,84 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { describe, it, afterEach } from 'vitest'; +import { AppRig } from '../test-utils/AppRig.js'; +import path from 'node:path'; +import { fileURLToPath } from 'node:url'; +import { PolicyDecision } from '@google/gemini-cli-core'; + +const __dirname = path.dirname(fileURLToPath(import.meta.url)); + +describe('Model Steering Integration', () => { + let rig: AppRig | undefined; + + afterEach(async () => { + await rig?.unmount(); + }); + + it('should steer the model using a hint during a tool turn', async () => { + const fakeResponsesPath = path.join( + __dirname, + '../test-utils/fixtures/steering.responses', + ); + rig = new AppRig({ + fakeResponsesPath, + configOverrides: { modelSteering: true }, + }); + await rig.initialize(); + rig.render(); + await rig.waitForIdle(); + + rig.setToolPolicy('list_directory', PolicyDecision.ASK_USER); + rig.setToolPolicy('read_file', PolicyDecision.ASK_USER); + + rig.setMockCommands([ + { + command: /list_directory/, + result: { + output: 'file1.txt\nfile2.js\nfile3.md', + exitCode: 0, + }, + }, + { + command: /read_file file1.txt/, + result: { + output: 'This is file1.txt content.', + exitCode: 0, + }, + }, + ]); + + // Start a long task + await rig.type('Start long task'); + await rig.pressEnter(); + + // Wait for the model to call 'list_directory' (Confirming state) + await rig.waitForOutput('ReadFolder'); + + // Injected a hint while the model is in a tool turn + await rig.addUserHint('focus on .txt'); + + // Resolve list_directory (Proceed) + await rig.resolveTool('ReadFolder'); + + // Wait for the model to process the hint and output the next action + // Based on steering.responses, it should first acknowledge the hint + await rig.waitForOutput('ACK: I will focus on .txt files now.'); + + // Then it should proceed with the next action + await rig.waitForOutput( + /Since you want me to focus on .txt files,[\s\S]*I will read file1.txt/, + ); + await rig.waitForOutput('ReadFile'); + + // Resolve read_file (Proceed) + await rig.resolveTool('ReadFile'); + + // Wait for final completion + await rig.waitForOutput('Task complete.'); + }); +}); diff --git a/packages/cli/src/test-utils/AppRig.test.tsx b/packages/cli/src/test-utils/AppRig.test.tsx index 1c24b09539..bada7965f7 100644 --- a/packages/cli/src/test-utils/AppRig.test.tsx +++ b/packages/cli/src/test-utils/AppRig.test.tsx @@ -4,10 +4,11 @@ * SPDX-License-Identifier: Apache-2.0 */ -import { describe, it, afterEach } from 'vitest'; +import { describe, it, afterEach, expect } from 'vitest'; import { AppRig } from './AppRig.js'; import path from 'node:path'; import { fileURLToPath } from 'node:url'; +import { debugLogger } from '@google/gemini-cli-core'; const __dirname = path.dirname(fileURLToPath(import.meta.url)); @@ -18,6 +19,47 @@ describe('AppRig', () => { await rig?.unmount(); }); + it('should handle deterministic tool turns with breakpoints', async () => { + const fakeResponsesPath = path.join( + __dirname, + 'fixtures', + 'steering.responses', + ); + rig = new AppRig({ + fakeResponsesPath, + configOverrides: { modelSteering: true }, + }); + await rig.initialize(); + rig.render(); + await rig.waitForIdle(); + + // Set breakpoints on the canonical tool names + rig.setBreakpoint('list_directory'); + rig.setBreakpoint('read_file'); + + // Start a task + debugLogger.log('[Test] Sending message: Start long task'); + await rig.sendMessage('Start long task'); + + // Wait for the first breakpoint (list_directory) + const pending1 = await rig.waitForPendingConfirmation('list_directory'); + expect(pending1.toolName).toBe('list_directory'); + + // Injected a hint + await rig.addUserHint('focus on .txt'); + + // Resolve and wait for the NEXT breakpoint (read_file) + // resolveTool will automatically remove the breakpoint policy for list_directory + await rig.resolveTool('list_directory'); + + const pending2 = await rig.waitForPendingConfirmation('read_file'); + expect(pending2.toolName).toBe('read_file'); + + // Resolve and finish. Also removes read_file breakpoint. + await rig.resolveTool('read_file'); + await rig.waitForOutput('Task complete.', 100000); + }); + it('should render the app and handle a simple message', async () => { const fakeResponsesPath = path.join( __dirname, diff --git a/packages/cli/src/test-utils/AppRig.tsx b/packages/cli/src/test-utils/AppRig.tsx index b0db8b8ac6..c4a9ad515c 100644 --- a/packages/cli/src/test-utils/AppRig.tsx +++ b/packages/cli/src/test-utils/AppRig.tsx @@ -74,6 +74,20 @@ class MockExtensionManager extends ExtensionLoader { setRequestSetting = vi.fn(); } +// Mock GeminiRespondingSpinner to disable animations (avoiding 'act()' warnings) without triggering screen reader mode. +vi.mock('../ui/components/GeminiRespondingSpinner.js', async () => { + const React = await import('react'); + const { Text } = await import('ink'); + return { + GeminiSpinner: () => React.createElement(Text, null, '...'), + GeminiRespondingSpinner: ({ + nonRespondingDisplay, + }: { + nonRespondingDisplay: string; + }) => React.createElement(Text, null, nonRespondingDisplay || '...'), + }; +}); + export interface AppRigOptions { fakeResponsesPath?: string; terminalWidth?: number; @@ -449,12 +463,11 @@ export class AppRig { this.lastAwaitedConfirmation = undefined; } - async addUserHint(_hint: string) { + async addUserHint(hint: string) { if (!this.config) throw new Error('AppRig not initialized'); - // TODO(joshualitt): Land hints. - // await act(async () => { - // this.config!.addUserHint(hint); - // }); + await act(async () => { + this.config!.userHintService.addUserHint(hint); + }); } getConfig(): Config { diff --git a/packages/cli/src/test-utils/fixtures/steering.responses b/packages/cli/src/test-utils/fixtures/steering.responses new file mode 100644 index 0000000000..66407f819e --- /dev/null +++ b/packages/cli/src/test-utils/fixtures/steering.responses @@ -0,0 +1,4 @@ +{"method":"generateContentStream","response":[{"candidates":[{"content":{"role":"model","parts":[{"text":"Starting a long task. First, I'll list the files."},{"functionCall":{"name":"list_directory","args":{"dir_path":"."}}}]},"finishReason":"STOP"}]}]} +{"method":"generateContent","response":{"candidates":[{"content":{"role":"model","parts":[{"text":"ACK: I will focus on .txt files now."}]},"finishReason":"STOP"}]}} +{"method":"generateContentStream","response":[{"candidates":[{"content":{"role":"model","parts":[{"text":"I see the files. Since you want me to focus on .txt files, I will read file1.txt."},{"functionCall":{"name":"read_file","args":{"file_path":"file1.txt"}}}]},"finishReason":"STOP"}]}]} +{"method":"generateContentStream","response":[{"candidates":[{"content":{"role":"model","parts":[{"text":"I have read file1.txt. Task complete."}]},"finishReason":"STOP"}]}]} diff --git a/packages/cli/src/test-utils/render.tsx b/packages/cli/src/test-utils/render.tsx index cb944b7c91..f043fade8d 100644 --- a/packages/cli/src/test-utils/render.tsx +++ b/packages/cli/src/test-utils/render.tsx @@ -166,6 +166,8 @@ const baseMockUiState = { proQuotaRequest: null, validationRequest: null, }, + hintMode: false, + hintBuffer: '', }; export const mockAppState: AppState = { @@ -219,6 +221,10 @@ const mockUIActions: UIActions = { setActiveBackgroundShellPid: vi.fn(), setIsBackgroundShellListOpen: vi.fn(), setAuthContext: vi.fn(), + onHintInput: vi.fn(), + onHintBackspace: vi.fn(), + onHintClear: vi.fn(), + onHintSubmit: vi.fn(), handleRestart: vi.fn(), handleNewAgentsSelect: vi.fn(), }; diff --git a/packages/cli/src/ui/AppContainer.tsx b/packages/cli/src/ui/AppContainer.tsx index 627027680a..af05cb41a8 100644 --- a/packages/cli/src/ui/AppContainer.tsx +++ b/packages/cli/src/ui/AppContainer.tsx @@ -79,6 +79,8 @@ import { type AgentsDiscoveredPayload, ChangeAuthRequestedError, CoreToolCallStatus, + generateSteeringAckMessage, + buildUserSteeringHintPrompt, } from '@google/gemini-cli-core'; import { validateAuthMethod } from '../config/auth.js'; import process from 'node:process'; @@ -996,6 +998,30 @@ Logging in with Google... Restarting Gemini CLI to continue. } }, [pendingRestorePrompt, inputHistory, historyManager.history]); + const pendingHintsRef = useRef([]); + const [pendingHintCount, setPendingHintCount] = useState(0); + + const consumePendingHints = useCallback(() => { + if (pendingHintsRef.current.length === 0) { + return null; + } + const hint = pendingHintsRef.current.join('\n'); + pendingHintsRef.current = []; + setPendingHintCount(0); + return hint; + }, []); + + useEffect(() => { + const hintListener = (hint: string) => { + pendingHintsRef.current.push(hint); + setPendingHintCount((prev) => prev + 1); + }; + config.userHintService.onUserHint(hintListener); + return () => { + config.userHintService.offUserHint(hintListener); + }; + }, [config]); + const { streamingState, submitQuery, @@ -1034,6 +1060,7 @@ Logging in with Google... Restarting Gemini CLI to continue. terminalWidth, terminalHeight, embeddedShellFocused, + consumePendingHints, ); toggleBackgroundShellRef.current = toggleBackgroundShell; @@ -1142,10 +1169,38 @@ Logging in with Google... Restarting Gemini CLI to continue. ], ); + const handleHintSubmit = useCallback( + (hint: string) => { + const trimmed = hint.trim(); + if (!trimmed) { + return; + } + config.userHintService.addUserHint(trimmed); + // Render hints with a distinct style. + historyManager.addItem({ + type: 'hint', + text: trimmed, + }); + }, + [config, historyManager], + ); + const handleFinalSubmit = useCallback( async (submittedValue: string) => { const isSlash = isSlashCommand(submittedValue.trim()); const isIdle = streamingState === StreamingState.Idle; + const isAgentRunning = + streamingState === StreamingState.Responding || + isToolExecuting([ + ...pendingSlashCommandHistoryItems, + ...pendingGeminiHistoryItems, + ]); + + if (config.isModelSteeringEnabled() && isAgentRunning && !isSlash) { + handleHintSubmit(submittedValue); + addInput(submittedValue); + return; + } if (isSlash || (isIdle && isMcpReady)) { if (!isSlash) { @@ -1187,7 +1242,10 @@ Logging in with Google... Restarting Gemini CLI to continue. isMcpReady, streamingState, messageQueue.length, + pendingSlashCommandHistoryItems, + pendingGeminiHistoryItems, config, + handleHintSubmit, ], ); @@ -1939,6 +1997,44 @@ Logging in with Google... Restarting Gemini CLI to continue. setShortcutsHelpVisible, ]); + useEffect(() => { + if ( + !isConfigInitialized || + !config.isModelSteeringEnabled() || + streamingState !== StreamingState.Idle || + !isMcpReady || + isToolAwaitingConfirmation(pendingHistoryItems) + ) { + return; + } + + const pendingHint = consumePendingHints(); + if (!pendingHint) { + return; + } + + void generateSteeringAckMessage( + config.getBaseLlmClient(), + pendingHint, + ).then((ackText) => { + historyManager.addItem({ + type: 'info', + text: ackText, + }); + }); + void submitQuery([{ text: buildUserSteeringHintPrompt(pendingHint) }]); + }, [ + config, + historyManager, + isConfigInitialized, + isMcpReady, + streamingState, + submitQuery, + consumePendingHints, + pendingHistoryItems, + pendingHintCount, + ]); + const allToolCalls = useMemo( () => pendingHistoryItems @@ -2105,6 +2201,13 @@ Logging in with Google... Restarting Gemini CLI to continue. isBackgroundShellListOpen, adminSettingsChanged, newAgents, + hintMode: + config.isModelSteeringEnabled() && + isToolExecuting([ + ...pendingSlashCommandHistoryItems, + ...pendingGeminiHistoryItems, + ]), + hintBuffer: '', }), [ isThemeDialogOpen, @@ -2276,6 +2379,10 @@ Logging in with Google... Restarting Gemini CLI to continue. setActiveBackgroundShellPid, setIsBackgroundShellListOpen, setAuthContext, + onHintInput: () => {}, + onHintBackspace: () => {}, + onHintClear: () => {}, + onHintSubmit: () => {}, handleRestart: async () => { if (process.send) { const remoteSettings = config.getRemoteAdminSettings(); diff --git a/packages/cli/src/ui/commands/clearCommand.test.ts b/packages/cli/src/ui/commands/clearCommand.test.ts index bc204044f7..d33dc5884d 100644 --- a/packages/cli/src/ui/commands/clearCommand.test.ts +++ b/packages/cli/src/ui/commands/clearCommand.test.ts @@ -27,9 +27,11 @@ import { uiTelemetryService } from '@google/gemini-cli-core'; describe('clearCommand', () => { let mockContext: CommandContext; let mockResetChat: ReturnType; + let mockHintClear: ReturnType; beforeEach(() => { mockResetChat = vi.fn().mockResolvedValue(undefined); + mockHintClear = vi.fn(); const mockGetChatRecordingService = vi.fn(); vi.clearAllMocks(); @@ -50,12 +52,15 @@ describe('clearCommand', () => { fireSessionEndEvent: vi.fn().mockResolvedValue(undefined), fireSessionStartEvent: vi.fn().mockResolvedValue(undefined), }), + userHintService: { + clear: mockHintClear, + }, }, }, }); }); - it('should set debug message, reset chat, reset telemetry, and clear UI when config is available', async () => { + it('should set debug message, reset chat, reset telemetry, clear hints, and clear UI when config is available', async () => { if (!clearCommand.action) { throw new Error('clearCommand must have an action.'); } @@ -68,6 +73,7 @@ describe('clearCommand', () => { expect(mockContext.ui.setDebugMessage).toHaveBeenCalledTimes(1); expect(mockResetChat).toHaveBeenCalledTimes(1); + expect(mockHintClear).toHaveBeenCalledTimes(1); expect(uiTelemetryService.setLastPromptTokenCount).toHaveBeenCalledWith(0); expect(uiTelemetryService.setLastPromptTokenCount).toHaveBeenCalledTimes(1); expect(mockContext.ui.clear).toHaveBeenCalledTimes(1); diff --git a/packages/cli/src/ui/commands/clearCommand.ts b/packages/cli/src/ui/commands/clearCommand.ts index 7707876000..385d3f9540 100644 --- a/packages/cli/src/ui/commands/clearCommand.ts +++ b/packages/cli/src/ui/commands/clearCommand.ts @@ -43,6 +43,9 @@ export const clearCommand: SlashCommand = { context.ui.setDebugMessage('Clearing terminal.'); } + // Reset user steering hints + config?.userHintService.clear(); + // Start a new conversation recording with a new session ID if (config && chatRecordingService) { const newSessionId = randomUUID(); diff --git a/packages/cli/src/ui/components/HistoryItemDisplay.test.tsx b/packages/cli/src/ui/components/HistoryItemDisplay.test.tsx index 8d995cf467..a1e0779385 100644 --- a/packages/cli/src/ui/components/HistoryItemDisplay.test.tsx +++ b/packages/cli/src/ui/components/HistoryItemDisplay.test.tsx @@ -45,6 +45,18 @@ describe('', () => { expect(lastFrame()).toContain('Hello'); }); + it('renders HintMessage for "hint" type', () => { + const item: HistoryItem = { + ...baseItem, + type: 'hint', + text: 'Try using ripgrep first', + }; + const { lastFrame } = renderWithProviders( + , + ); + expect(lastFrame()).toContain('Try using ripgrep first'); + }); + it('renders UserMessage for "user" type with slash command', () => { const item: HistoryItem = { ...baseItem, diff --git a/packages/cli/src/ui/components/HistoryItemDisplay.tsx b/packages/cli/src/ui/components/HistoryItemDisplay.tsx index 8735566641..f863e2272a 100644 --- a/packages/cli/src/ui/components/HistoryItemDisplay.tsx +++ b/packages/cli/src/ui/components/HistoryItemDisplay.tsx @@ -35,6 +35,7 @@ import { ChatList } from './views/ChatList.js'; import { HooksList } from './views/HooksList.js'; import { ModelMessage } from './messages/ModelMessage.js'; import { ThinkingMessage } from './messages/ThinkingMessage.js'; +import { HintMessage } from './messages/HintMessage.js'; import { getInlineThinkingMode } from '../utils/inlineThinkingMode.js'; import { useSettings } from '../contexts/SettingsContext.js'; @@ -65,6 +66,9 @@ export const HistoryItemDisplay: React.FC = ({ {itemForDisplay.type === 'thinking' && inlineThinkingMode !== 'off' && ( )} + {itemForDisplay.type === 'hint' && ( + + )} {itemForDisplay.type === 'user' && ( )} @@ -96,6 +100,7 @@ export const HistoryItemDisplay: React.FC = ({ text={itemForDisplay.text} icon={itemForDisplay.icon} color={itemForDisplay.color} + marginBottom={itemForDisplay.marginBottom} /> )} {itemForDisplay.type === 'warning' && ( diff --git a/packages/cli/src/ui/components/InputPrompt.tsx b/packages/cli/src/ui/components/InputPrompt.tsx index ca9400e4d1..7e9ec74ac4 100644 --- a/packages/cli/src/ui/components/InputPrompt.tsx +++ b/packages/cli/src/ui/components/InputPrompt.tsx @@ -226,6 +226,7 @@ export const InputPrompt: React.FC = ({ backgroundShells, backgroundShellHeight, shortcutsHelpVisible, + hintMode, } = useUIState(); const [suppressCompletion, setSuppressCompletion] = useState(false); const { handlePress: registerPlainTabPress, resetCount: resetPlainTabPress } = @@ -267,7 +268,7 @@ export const InputPrompt: React.FC = ({ ]); const [expandedSuggestionIndex, setExpandedSuggestionIndex] = useState(-1); - const shellHistory = useShellHistory(config.getProjectRoot()); + const shellHistory = useShellHistory(config.getProjectRoot(), config.storage); const shellHistoryData = shellHistory.history; const completion = useCommandCompletion({ @@ -1420,7 +1421,9 @@ export const InputPrompt: React.FC = ({ /> ) : null} = ({ text }) => { + const prefix = '💡 '; + const prefixWidth = prefix.length; + const config = useConfig(); + const useBackgroundColor = config.getUseBackgroundColor(); + + return ( + + + + + {prefix} + + + + + {`Steering Hint: ${text}`} + + + + + ); +}; diff --git a/packages/cli/src/ui/components/messages/InfoMessage.tsx b/packages/cli/src/ui/components/messages/InfoMessage.tsx index d092e292b1..e725a23993 100644 --- a/packages/cli/src/ui/components/messages/InfoMessage.tsx +++ b/packages/cli/src/ui/components/messages/InfoMessage.tsx @@ -13,19 +13,21 @@ interface InfoMessageProps { text: string; icon?: string; color?: string; + marginBottom?: number; } export const InfoMessage: React.FC = ({ text, icon, color, + marginBottom, }) => { color ??= theme.status.warning; const prefix = icon ?? 'ℹ '; const prefixWidth = prefix.length; return ( - + {prefix} diff --git a/packages/cli/src/ui/contexts/UIActionsContext.tsx b/packages/cli/src/ui/contexts/UIActionsContext.tsx index c80507f9d7..af8706cfb1 100644 --- a/packages/cli/src/ui/contexts/UIActionsContext.tsx +++ b/packages/cli/src/ui/contexts/UIActionsContext.tsx @@ -80,6 +80,10 @@ export interface UIActions { setActiveBackgroundShellPid: (pid: number) => void; setIsBackgroundShellListOpen: (isOpen: boolean) => void; setAuthContext: (context: { requiresRestart?: boolean }) => void; + onHintInput: (char: string) => void; + onHintBackspace: () => void; + onHintClear: () => void; + onHintSubmit: (hint: string) => void; handleRestart: () => void; handleNewAgentsSelect: (choice: NewAgentsChoice) => Promise; } diff --git a/packages/cli/src/ui/contexts/UIStateContext.tsx b/packages/cli/src/ui/contexts/UIStateContext.tsx index e64b5a1f99..2df7473b0c 100644 --- a/packages/cli/src/ui/contexts/UIStateContext.tsx +++ b/packages/cli/src/ui/contexts/UIStateContext.tsx @@ -177,6 +177,8 @@ export interface UIState { isBackgroundShellListOpen: boolean; adminSettingsChanged: boolean; newAgents: AgentDefinition[] | null; + hintMode: boolean; + hintBuffer: string; transientMessage: { text: string; type: TransientMessageType; diff --git a/packages/cli/src/ui/hooks/atCommandProcessor.ts b/packages/cli/src/ui/hooks/atCommandProcessor.ts index ebf7707573..36868d597b 100644 --- a/packages/cli/src/ui/hooks/atCommandProcessor.ts +++ b/packages/cli/src/ui/hooks/atCommandProcessor.ts @@ -21,7 +21,10 @@ import { CoreToolCallStatus, } from '@google/gemini-cli-core'; import { Buffer } from 'node:buffer'; -import type { HistoryItem, IndividualToolCallDisplay } from '../types.js'; +import type { + HistoryItemToolGroup, + IndividualToolCallDisplay, +} from '../types.js'; import type { UseHistoryManagerReturn } from './useHistoryManager.js'; const REF_CONTENT_HEADER = `\n${REFERENCE_CONTENT_START}`; @@ -697,7 +700,7 @@ export async function handleAtCommand({ { type: 'tool_group', tools: allDisplays, - } as Omit, + } as HistoryItemToolGroup, userMessageTimestamp, ); } diff --git a/packages/cli/src/ui/hooks/useGeminiStream.test.tsx b/packages/cli/src/ui/hooks/useGeminiStream.test.tsx index 24c01ce06d..2aac982b6d 100644 --- a/packages/cli/src/ui/hooks/useGeminiStream.test.tsx +++ b/packages/cli/src/ui/hooks/useGeminiStream.test.tsx @@ -65,6 +65,11 @@ const MockedGeminiClientClass = vi.hoisted(() => this.startChat = mockStartChat; this.sendMessageStream = mockSendMessageStream; this.addHistory = vi.fn(); + this.generateContent = vi.fn().mockResolvedValue({ + candidates: [ + { content: { parts: [{ text: 'Got it. Focusing on tests only.' }] } }, + ], + }); this.getCurrentSequenceModel = vi.fn().mockReturnValue('test-model'); this.getChat = vi.fn().mockReturnValue({ recordCompletedToolCalls: vi.fn(), @@ -264,6 +269,13 @@ describe('useGeminiStream', () => { getGlobalMemory: vi.fn(() => ''), getUserMemory: vi.fn(() => ''), getMessageBus: vi.fn(() => mockMessageBus), + getBaseLlmClient: vi.fn(() => ({ + generateContent: vi.fn().mockResolvedValue({ + candidates: [ + { content: { parts: [{ text: 'Got it. Focusing on tests only.' }] } }, + ], + }), + })), getIdeMode: vi.fn(() => false), getEnableHooks: vi.fn(() => false), } as unknown as Config; @@ -675,6 +687,114 @@ describe('useGeminiStream', () => { ); }); + it('should inject steering hint prompt for continuation', async () => { + const toolCallResponseParts: Part[] = [{ text: 'tool final response' }]; + const completedToolCalls: TrackedToolCall[] = [ + { + request: { + callId: 'call1', + name: 'tool1', + args: {}, + isClientInitiated: false, + prompt_id: 'prompt-id-ack', + }, + status: 'success', + responseSubmittedToGemini: false, + response: { + callId: 'call1', + responseParts: toolCallResponseParts, + errorType: undefined, + }, + tool: { + displayName: 'MockTool', + }, + invocation: { + getDescription: () => `Mock description`, + } as unknown as AnyToolInvocation, + } as TrackedCompletedToolCall, + ]; + + mockSendMessageStream.mockReturnValue( + (async function* () { + yield { + type: ServerGeminiEventType.Content, + value: 'Applied the requested adjustment.', + }; + })(), + ); + + let capturedOnComplete: + | ((completedTools: TrackedToolCall[]) => Promise) + | null = null; + mockUseToolScheduler.mockImplementation((onComplete) => { + capturedOnComplete = onComplete; + return [ + [], + mockScheduleToolCalls, + mockMarkToolsAsSubmitted, + vi.fn(), + mockCancelAllToolCalls, + 0, + ]; + }); + + renderHookWithProviders(() => + useGeminiStream( + new MockedGeminiClientClass(mockConfig), + [], + mockAddItem, + mockConfig, + mockLoadedSettings, + mockOnDebugMessage, + mockHandleSlashCommand, + false, + () => 'vscode' as EditorType, + () => {}, + () => Promise.resolve(), + false, + () => {}, + () => {}, + () => {}, + 80, + 24, + undefined, + () => 'focus on tests only', + ), + ); + + await act(async () => { + if (capturedOnComplete) { + await new Promise((resolve) => setTimeout(resolve, 0)); + await capturedOnComplete(completedToolCalls); + } + }); + + await waitFor(() => { + expect(mockSendMessageStream).toHaveBeenCalledTimes(1); + }); + + const sentParts = mockSendMessageStream.mock.calls[0][0] as Part[]; + const injectedHintPart = sentParts[0] as { text?: string }; + expect(injectedHintPart.text).toContain('User steering update:'); + expect(injectedHintPart.text).toContain( + '\nfocus on tests only\n', + ); + expect(injectedHintPart.text).toContain( + 'Classify it as ADD_TASK, MODIFY_TASK, CANCEL_TASK, or EXTRA_CONTEXT.', + ); + expect(injectedHintPart.text).toContain( + 'Do not cancel/skip tasks unless the user explicitly cancels them.', + ); + expect( + mockAddItem.mock.calls.some( + ([item]) => + item?.type === 'info' && + typeof item.text === 'string' && + item.text.includes('Got it. Focusing on tests only.'), + ), + ).toBe(true); + }); + it('should handle all tool calls being cancelled', async () => { const cancelledToolCalls: TrackedToolCall[] = [ { diff --git a/packages/cli/src/ui/hooks/useGeminiStream.ts b/packages/cli/src/ui/hooks/useGeminiStream.ts index e86f23a51e..a4733699ce 100644 --- a/packages/cli/src/ui/hooks/useGeminiStream.ts +++ b/packages/cli/src/ui/hooks/useGeminiStream.ts @@ -34,6 +34,8 @@ import { coreEvents, CoreEvent, CoreToolCallStatus, + buildUserSteeringHintPrompt, + generateSteeringAckMessage, } from '@google/gemini-cli-core'; import type { Config, @@ -55,6 +57,7 @@ import type { HistoryItemThinking, HistoryItemWithoutId, HistoryItemToolGroup, + HistoryItemInfo, IndividualToolCallDisplay, SlashCommandProcessorResult, HistoryItemModel, @@ -191,6 +194,7 @@ export const useGeminiStream = ( terminalWidth: number, terminalHeight: number, isShellFocused?: boolean, + consumeUserHint?: () => string | null, ) => { const [initError, setInitError] = useState(null); const [retryStatus, setRetryStatus] = useState( @@ -1604,6 +1608,29 @@ export const useGeminiStream = ( const responsesToSend: Part[] = geminiTools.flatMap( (toolCall) => toolCall.response.responseParts, ); + + if (consumeUserHint) { + const userHint = consumeUserHint(); + if (userHint && userHint.trim().length > 0) { + const hintText = userHint.trim(); + responsesToSend.unshift({ + text: buildUserSteeringHintPrompt(hintText), + }); + void generateSteeringAckMessage( + config.getBaseLlmClient(), + hintText, + ).then((ackText) => { + addItem({ + type: 'info', + icon: '· ', + color: theme.text.secondary, + marginBottom: 1, + text: ackText, + } as HistoryItemInfo); + }); + } + } + const callIdsToMarkAsSubmitted = geminiTools.map( (toolCall) => toolCall.request.callId, ); @@ -1636,6 +1663,8 @@ export const useGeminiStream = ( modelSwitchedFromQuotaError, addItem, registerBackgroundShell, + consumeUserHint, + config, ], ); diff --git a/packages/cli/src/ui/types.ts b/packages/cli/src/ui/types.ts index 7cb06fbd15..2d40f0a48c 100644 --- a/packages/cli/src/ui/types.ts +++ b/packages/cli/src/ui/types.ts @@ -148,6 +148,7 @@ export type HistoryItemInfo = HistoryItemBase & { text: string; icon?: string; color?: string; + marginBottom?: number; }; export type HistoryItemError = HistoryItemBase & { @@ -252,6 +253,11 @@ export type HistoryItemThinking = HistoryItemBase & { thought: ThoughtSummary; }; +export type HistoryItemHint = HistoryItemBase & { + type: 'hint'; + text: string; +}; + export type HistoryItemChatList = HistoryItemBase & { type: 'chat_list'; chats: ChatDetail[]; @@ -376,6 +382,7 @@ export type HistoryItemWithoutId = | HistoryItemMcpStatus | HistoryItemChatList | HistoryItemThinking + | HistoryItemHint | HistoryItemHooksList; export type HistoryItem = HistoryItemWithoutId & { id: number }; @@ -401,6 +408,7 @@ export enum MessageType { MCP_STATUS = 'mcp_status', CHAT_LIST = 'chat_list', HOOKS_LIST = 'hooks_list', + HINT = 'hint', } // Simplified message structure for internal feedback diff --git a/packages/core/src/agents/local-executor.test.ts b/packages/core/src/agents/local-executor.test.ts index d2634ecc52..a9a0697bce 100644 --- a/packages/core/src/agents/local-executor.test.ts +++ b/packages/core/src/agents/local-executor.test.ts @@ -2037,6 +2037,215 @@ describe('LocalAgentExecutor', () => { expect(recoveryEvent.success).toBe(true); expect(recoveryEvent.reason).toBe(AgentTerminateMode.MAX_TURNS); }); + + describe('Model Steering', () => { + let configWithHints: Config; + + beforeEach(() => { + configWithHints = makeFakeConfig({ modelSteering: true }); + vi.spyOn(configWithHints, 'getAgentRegistry').mockReturnValue({ + getAllAgentNames: () => [], + } as unknown as AgentRegistry); + vi.spyOn(configWithHints, 'getToolRegistry').mockReturnValue( + parentToolRegistry, + ); + }); + + it('should inject user hints into the next turn after they are added', async () => { + const definition = createTestDefinition(); + + const executor = await LocalAgentExecutor.create( + definition, + configWithHints, + ); + + // Turn 1: Model calls LS + mockModelResponse( + [{ name: LS_TOOL_NAME, args: { path: '.' }, id: 'call1' }], + 'T1: Listing', + ); + + // We use a manual promise to ensure the hint is added WHILE Turn 1 is "running" + let resolveToolCall: (value: unknown) => void; + const toolCallPromise = new Promise((resolve) => { + resolveToolCall = resolve; + }); + mockScheduleAgentTools.mockReturnValueOnce(toolCallPromise); + + // Turn 2: Model calls complete_task + mockModelResponse( + [ + { + name: TASK_COMPLETE_TOOL_NAME, + args: { finalResult: 'Done' }, + id: 'call2', + }, + ], + 'T2: Done', + ); + + const runPromise = executor.run({ goal: 'Hint test' }, signal); + + // Give the loop a chance to start and register the listener + await vi.advanceTimersByTimeAsync(1); + + configWithHints.userHintService.addUserHint('Initial Hint'); + + // Resolve the tool call to complete Turn 1 + resolveToolCall!([ + { + status: 'success', + request: { + callId: 'call1', + name: LS_TOOL_NAME, + args: { path: '.' }, + isClientInitiated: false, + prompt_id: 'p1', + }, + tool: {} as AnyDeclarativeTool, + invocation: {} as AnyToolInvocation, + response: { + callId: 'call1', + resultDisplay: 'file1.txt', + responseParts: [ + { + functionResponse: { + name: LS_TOOL_NAME, + response: { result: 'file1.txt' }, + id: 'call1', + }, + }, + ], + }, + }, + ]); + + await runPromise; + + // The first call to sendMessageStream should NOT contain the hint (it was added after start) + // The SECOND call to sendMessageStream SHOULD contain the hint + expect(mockSendMessageStream).toHaveBeenCalledTimes(2); + const secondTurnMessageParts = mockSendMessageStream.mock.calls[1][1]; + expect(secondTurnMessageParts).toContainEqual( + expect.objectContaining({ + text: expect.stringContaining('Initial Hint'), + }), + ); + }); + + it('should NOT inject legacy hints added before executor was created', async () => { + const definition = createTestDefinition(); + configWithHints.userHintService.addUserHint('Legacy Hint'); + + const executor = await LocalAgentExecutor.create( + definition, + configWithHints, + ); + + mockModelResponse([ + { + name: TASK_COMPLETE_TOOL_NAME, + args: { finalResult: 'Done' }, + id: 'call1', + }, + ]); + + await executor.run({ goal: 'Isolation test' }, signal); + + // The first call to sendMessageStream should NOT contain the legacy hint + expect(mockSendMessageStream).toHaveBeenCalled(); + const firstTurnMessageParts = mockSendMessageStream.mock.calls[0][1]; + // We expect only the goal, no hints injected at turn start + for (const part of firstTurnMessageParts) { + if (part.text) { + expect(part.text).not.toContain('Legacy Hint'); + } + } + }); + + it('should inject mid-execution hints into subsequent turns', async () => { + const definition = createTestDefinition(); + const executor = await LocalAgentExecutor.create( + definition, + configWithHints, + ); + + // Turn 1: Model calls LS + mockModelResponse( + [{ name: LS_TOOL_NAME, args: { path: '.' }, id: 'call1' }], + 'T1: Listing', + ); + + // We use a manual promise to ensure the hint is added WHILE Turn 1 is "running" + let resolveToolCall: (value: unknown) => void; + const toolCallPromise = new Promise((resolve) => { + resolveToolCall = resolve; + }); + mockScheduleAgentTools.mockReturnValueOnce(toolCallPromise); + + // Turn 2: Model calls complete_task + mockModelResponse( + [ + { + name: TASK_COMPLETE_TOOL_NAME, + args: { finalResult: 'Done' }, + id: 'call2', + }, + ], + 'T2: Done', + ); + + // Start execution + const runPromise = executor.run({ goal: 'Mid-turn hint test' }, signal); + + // Small delay to ensure the run loop has reached the await and registered listener + await vi.advanceTimersByTimeAsync(1); + + // Add the hint while the tool call is pending + configWithHints.userHintService.addUserHint('Corrective Hint'); + + // Now resolve the tool call to complete Turn 1 + resolveToolCall!([ + { + status: 'success', + request: { + callId: 'call1', + name: LS_TOOL_NAME, + args: { path: '.' }, + isClientInitiated: false, + prompt_id: 'p1', + }, + tool: {} as AnyDeclarativeTool, + invocation: {} as AnyToolInvocation, + response: { + callId: 'call1', + resultDisplay: 'file1.txt', + responseParts: [ + { + functionResponse: { + name: LS_TOOL_NAME, + response: { result: 'file1.txt' }, + id: 'call1', + }, + }, + ], + }, + }, + ]); + + await runPromise; + + expect(mockSendMessageStream).toHaveBeenCalledTimes(2); + + // The second turn (turn 1) should contain the corrective hint. + const secondTurnMessageParts = mockSendMessageStream.mock.calls[1][1]; + expect(secondTurnMessageParts).toContainEqual( + expect.objectContaining({ + text: expect.stringContaining('Corrective Hint'), + }), + ); + }); + }); }); describe('Chat Compression', () => { const mockWorkResponse = (id: string) => { diff --git a/packages/core/src/agents/local-executor.ts b/packages/core/src/agents/local-executor.ts index b30f1ae53e..0d2f009a9e 100644 --- a/packages/core/src/agents/local-executor.ts +++ b/packages/core/src/agents/local-executor.ts @@ -60,6 +60,7 @@ import { getToolCallContext } from '../utils/toolCallContext.js'; import { scheduleAgentTools } from './agent-scheduler.js'; import { DeadlineTimer } from '../utils/deadlineTimer.js'; import { LlmRole } from '../telemetry/types.js'; +import { formatUserHintsForModel } from '../utils/fastAckHelper.js'; /** A callback function to report on agent activity. */ export type ActivityCallback = (activity: SubagentActivityEvent) => void; @@ -463,45 +464,82 @@ export class LocalAgentExecutor { const query = this.definition.promptConfig.query ? templateString(this.definition.promptConfig.query, augmentedInputs) : DEFAULT_QUERY_STRING; - let currentMessage: Content = { role: 'user', parts: [{ text: query }] }; - while (true) { - // Check for termination conditions like max turns. - const reason = this.checkTermination(turnCounter, maxTurns); - if (reason) { - terminateReason = reason; - break; - } + const pendingHintsQueue: string[] = []; + const hintListener = (hint: string) => { + pendingHintsQueue.push(hint); + }; + // Capture the index of the last hint before starting to avoid re-injecting old hints. + // NOTE: Hints added AFTER this point will be broadcast to all currently running + // local agents via the listener below. + const startIndex = + this.runtimeContext.userHintService.getLatestHintIndex(); + this.runtimeContext.userHintService.onUserHint(hintListener); - // Check for timeout or external abort. - if (combinedSignal.aborted) { - // Determine which signal caused the abort. - terminateReason = deadlineTimer.signal.aborted - ? AgentTerminateMode.TIMEOUT - : AgentTerminateMode.ABORTED; - break; - } + try { + const initialHints = + this.runtimeContext.userHintService.getUserHintsAfter(startIndex); + const formattedInitialHints = formatUserHintsForModel(initialHints); - const turnResult = await this.executeTurn( - chat, - currentMessage, - turnCounter++, - combinedSignal, - deadlineTimer.signal, - onWaitingForConfirmation, - ); + let currentMessage: Content = formattedInitialHints + ? { + role: 'user', + parts: [{ text: formattedInitialHints }, { text: query }], + } + : { role: 'user', parts: [{ text: query }] }; - if (turnResult.status === 'stop') { - terminateReason = turnResult.terminateReason; - // Only set finalResult if the turn provided one (e.g., error or goal). - if (turnResult.finalResult) { - finalResult = turnResult.finalResult; + while (true) { + // Check for termination conditions like max turns. + const reason = this.checkTermination(turnCounter, maxTurns); + if (reason) { + terminateReason = reason; + break; } - break; // Exit the loop for *any* stop reason. - } - // If status is 'continue', update message for the next loop - currentMessage = turnResult.nextMessage; + // Check for timeout or external abort. + if (combinedSignal.aborted) { + // Determine which signal caused the abort. + terminateReason = deadlineTimer.signal.aborted + ? AgentTerminateMode.TIMEOUT + : AgentTerminateMode.ABORTED; + break; + } + + const turnResult = await this.executeTurn( + chat, + currentMessage, + turnCounter++, + combinedSignal, + deadlineTimer.signal, + onWaitingForConfirmation, + ); + + if (turnResult.status === 'stop') { + terminateReason = turnResult.terminateReason; + // Only set finalResult if the turn provided one (e.g., error or goal). + if (turnResult.finalResult) { + finalResult = turnResult.finalResult; + } + break; // Exit the loop for *any* stop reason. + } + + // If status is 'continue', update message for the next loop + currentMessage = turnResult.nextMessage; + + // Check for new user steering hints collected via subscription + if (pendingHintsQueue.length > 0) { + const hintsToProcess = [...pendingHintsQueue]; + pendingHintsQueue.length = 0; + const formattedHints = formatUserHintsForModel(hintsToProcess); + if (formattedHints) { + // Append hints to the current message (next turn) + currentMessage.parts ??= []; + currentMessage.parts.unshift({ text: formattedHints }); + } + } + } + } finally { + this.runtimeContext.userHintService.offUserHint(hintListener); } // === UNIFIED RECOVERY BLOCK === diff --git a/packages/core/src/agents/subagent-tool.test.ts b/packages/core/src/agents/subagent-tool.test.ts index 1a89e84c4b..135365712d 100644 --- a/packages/core/src/agents/subagent-tool.test.ts +++ b/packages/core/src/agents/subagent-tool.test.ts @@ -148,4 +148,129 @@ describe('SubAgentInvocation', () => { updateOutput, ); }); + + describe('withUserHints', () => { + it('should NOT modify query for local agents', async () => { + mockConfig = makeFakeConfig({ modelSteering: true }); + mockConfig.userHintService.addUserHint('Test Hint'); + + const tool = new SubagentTool(testDefinition, mockConfig, mockMessageBus); + const params = { query: 'original query' }; + // @ts-expect-error - accessing private method for testing + const invocation = tool.createInvocation(params, mockMessageBus); + + // @ts-expect-error - accessing private method for testing + const hintedParams = invocation.withUserHints(params); + + expect(hintedParams.query).toBe('original query'); + }); + + it('should NOT modify query for remote agents if model steering is disabled', async () => { + mockConfig = makeFakeConfig({ modelSteering: false }); + mockConfig.userHintService.addUserHint('Test Hint'); + + const tool = new SubagentTool( + testRemoteDefinition, + mockConfig, + mockMessageBus, + ); + const params = { query: 'original query' }; + // @ts-expect-error - accessing private method for testing + const invocation = tool.createInvocation(params, mockMessageBus); + + // @ts-expect-error - accessing private method for testing + const hintedParams = invocation.withUserHints(params); + + expect(hintedParams.query).toBe('original query'); + }); + + it('should NOT modify query for remote agents if there are no hints', async () => { + mockConfig = makeFakeConfig({ modelSteering: true }); + + const tool = new SubagentTool( + testRemoteDefinition, + mockConfig, + mockMessageBus, + ); + const params = { query: 'original query' }; + // @ts-expect-error - accessing private method for testing + const invocation = tool.createInvocation(params, mockMessageBus); + + // @ts-expect-error - accessing private method for testing + const hintedParams = invocation.withUserHints(params); + + expect(hintedParams.query).toBe('original query'); + }); + + it('should prepend hints to query for remote agents when hints exist and steering is enabled', async () => { + mockConfig = makeFakeConfig({ modelSteering: true }); + + const tool = new SubagentTool( + testRemoteDefinition, + mockConfig, + mockMessageBus, + ); + const params = { query: 'original query' }; + // @ts-expect-error - accessing private method for testing + const invocation = tool.createInvocation(params, mockMessageBus); + + mockConfig.userHintService.addUserHint('Hint 1'); + mockConfig.userHintService.addUserHint('Hint 2'); + + // @ts-expect-error - accessing private method for testing + const hintedParams = invocation.withUserHints(params); + + expect(hintedParams.query).toContain('Hint 1'); + expect(hintedParams.query).toContain('Hint 2'); + expect(hintedParams.query).toMatch(/original query$/); + }); + + it('should NOT include legacy hints added before the invocation was created', async () => { + mockConfig = makeFakeConfig({ modelSteering: true }); + mockConfig.userHintService.addUserHint('Legacy Hint'); + + const tool = new SubagentTool( + testRemoteDefinition, + mockConfig, + mockMessageBus, + ); + const params = { query: 'original query' }; + + // Creation of invocation captures the current hint state + // @ts-expect-error - accessing private method for testing + const invocation = tool.createInvocation(params, mockMessageBus); + + // Verify no hints are present yet + // @ts-expect-error - accessing private method for testing + let hintedParams = invocation.withUserHints(params); + expect(hintedParams.query).toBe('original query'); + + // Add a new hint after creation + mockConfig.userHintService.addUserHint('New Hint'); + // @ts-expect-error - accessing private method for testing + hintedParams = invocation.withUserHints(params); + + expect(hintedParams.query).toContain('New Hint'); + expect(hintedParams.query).not.toContain('Legacy Hint'); + }); + + it('should NOT modify query if query is missing or not a string', async () => { + mockConfig = makeFakeConfig({ modelSteering: true }); + mockConfig.userHintService.addUserHint('Hint'); + + const tool = new SubagentTool( + testRemoteDefinition, + mockConfig, + mockMessageBus, + ); + const params = { other: 'param' }; + // @ts-expect-error - accessing private method for testing + const invocation = tool.createInvocation(params, mockMessageBus); + + // @ts-expect-error - accessing private method for testing + const hintedParams = invocation.withUserHints(params); + + expect(hintedParams).toEqual(params); + }); + }); }); diff --git a/packages/core/src/agents/subagent-tool.ts b/packages/core/src/agents/subagent-tool.ts index f311b60562..3a92452c3d 100644 --- a/packages/core/src/agents/subagent-tool.ts +++ b/packages/core/src/agents/subagent-tool.ts @@ -18,6 +18,7 @@ import type { MessageBus } from '../confirmation-bus/message-bus.js'; import type { AgentDefinition, AgentInputs } from './types.js'; import { SubagentToolWrapper } from './subagent-tool-wrapper.js'; import { SchemaValidator } from '../utils/schemaValidator.js'; +import { formatUserHintsForModel } from '../utils/fastAckHelper.js'; export class SubagentTool extends BaseDeclarativeTool { constructor( @@ -65,6 +66,8 @@ export class SubagentTool extends BaseDeclarativeTool { } class SubAgentInvocation extends BaseToolInvocation { + private readonly startIndex: number; + constructor( params: AgentInputs, private readonly definition: AgentDefinition, @@ -79,6 +82,7 @@ class SubAgentInvocation extends BaseToolInvocation { _toolName ?? definition.name, _toolDisplayName ?? definition.displayName ?? definition.name, ); + this.startIndex = config.userHintService.getLatestHintIndex(); } getDescription(): string { @@ -88,7 +92,10 @@ class SubAgentInvocation extends BaseToolInvocation { override async shouldConfirmExecute( abortSignal: AbortSignal, ): Promise { - const invocation = this.buildSubInvocation(this.definition, this.params); + const invocation = this.buildSubInvocation( + this.definition, + this.withUserHints(this.params), + ); return invocation.shouldConfirmExecute(abortSignal); } @@ -107,11 +114,38 @@ class SubAgentInvocation extends BaseToolInvocation { ); } - const invocation = this.buildSubInvocation(this.definition, this.params); + const invocation = this.buildSubInvocation( + this.definition, + this.withUserHints(this.params), + ); return invocation.execute(signal, updateOutput); } + private withUserHints(agentArgs: AgentInputs): AgentInputs { + if (this.definition.kind !== 'remote') { + return agentArgs; + } + + const userHints = this.config.userHintService.getUserHintsAfter( + this.startIndex, + ); + const formattedHints = formatUserHintsForModel(userHints); + if (!formattedHints) { + return agentArgs; + } + + const query = agentArgs['query']; + if (typeof query !== 'string' || query.trim().length === 0) { + return agentArgs; + } + + return { + ...agentArgs, + query: `${formattedHints}\n\n${query}`, + }; + } + private buildSubInvocation( definition: AgentDefinition, agentArgs: AgentInputs, diff --git a/packages/core/src/config/config.ts b/packages/core/src/config/config.ts index 646e853b0f..525e22743d 100644 --- a/packages/core/src/config/config.ts +++ b/packages/core/src/config/config.ts @@ -125,6 +125,7 @@ import { } from '../telemetry/loggers.js'; import { fetchAdminControls } from '../code_assist/admin/admin_controls.js'; import { isSubpath } from '../utils/paths.js'; +import { UserHintService } from './userHintService.js'; export interface AccessibilitySettings { enableLoadingPhrases?: boolean; @@ -481,6 +482,7 @@ export interface ConfigParameters { toolOutputMasking?: Partial; disableLLMCorrection?: boolean; plan?: boolean; + modelSteering?: boolean; onModelChange?: (model: string) => void; mcpEnabled?: boolean; extensionsEnabled?: boolean; @@ -670,11 +672,13 @@ export class Config { private readonly experimentalJitContext: boolean; private readonly disableLLMCorrection: boolean; private readonly planEnabled: boolean; + private readonly modelSteering: boolean; private contextManager?: ContextManager; private terminalBackground: string | undefined = undefined; private remoteAdminSettings: AdminControlsSettings | undefined; private latestApiRequest: GenerateContentParameters | undefined; private lastModeSwitchTime: number = Date.now(); + readonly userHintService: UserHintService; private approvedPlanPath: string | undefined; constructor(params: ConfigParameters) { @@ -763,6 +767,10 @@ export class Config { this.adminSkillsEnabled = params.adminSkillsEnabled ?? true; this.modelAvailabilityService = new ModelAvailabilityService(); this.experimentalJitContext = params.experimentalJitContext ?? false; + this.modelSteering = params.modelSteering ?? false; + this.userHintService = new UserHintService(() => + this.isModelSteeringEnabled(), + ); this.toolOutputMasking = { enabled: params.toolOutputMasking?.enabled ?? true, toolProtectionThreshold: @@ -1637,6 +1645,10 @@ export class Config { return this.experimentalJitContext; } + isModelSteeringEnabled(): boolean { + return this.modelSteering; + } + getToolOutputMaskingEnabled(): boolean { return this.toolOutputMasking.enabled; } diff --git a/packages/core/src/config/userHintService.test.ts b/packages/core/src/config/userHintService.test.ts new file mode 100644 index 0000000000..faf301c6d1 --- /dev/null +++ b/packages/core/src/config/userHintService.test.ts @@ -0,0 +1,77 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { describe, it, expect, vi } from 'vitest'; +import { UserHintService } from './userHintService.js'; + +describe('UserHintService', () => { + it('is disabled by default and ignores hints', () => { + const service = new UserHintService(() => false); + service.addUserHint('this hint should be ignored'); + expect(service.getUserHints()).toEqual([]); + expect(service.getLatestHintIndex()).toBe(-1); + }); + + it('stores trimmed hints and exposes them via indexing when enabled', () => { + const service = new UserHintService(() => true); + + service.addUserHint(' first hint '); + service.addUserHint('second hint'); + service.addUserHint(' '); + + expect(service.getUserHints()).toEqual(['first hint', 'second hint']); + expect(service.getLatestHintIndex()).toBe(1); + expect(service.getUserHintsAfter(-1)).toEqual([ + 'first hint', + 'second hint', + ]); + expect(service.getUserHintsAfter(0)).toEqual(['second hint']); + expect(service.getUserHintsAfter(1)).toEqual([]); + }); + + it('tracks the last hint timestamp', () => { + const service = new UserHintService(() => true); + + expect(service.getLastUserHintAt()).toBeNull(); + service.addUserHint('hint'); + + const timestamp = service.getLastUserHintAt(); + expect(timestamp).not.toBeNull(); + expect(typeof timestamp).toBe('number'); + }); + + it('notifies listeners when a hint is added', () => { + const service = new UserHintService(() => true); + const listener = vi.fn(); + service.onUserHint(listener); + + service.addUserHint('new hint'); + + expect(listener).toHaveBeenCalledWith('new hint'); + }); + + it('does NOT notify listeners after they are unregistered', () => { + const service = new UserHintService(() => true); + const listener = vi.fn(); + service.onUserHint(listener); + service.offUserHint(listener); + + service.addUserHint('ignored hint'); + + expect(listener).not.toHaveBeenCalled(); + }); + + it('should clear all hints', () => { + const service = new UserHintService(() => true); + service.addUserHint('hint 1'); + service.addUserHint('hint 2'); + expect(service.getUserHints()).toHaveLength(2); + + service.clear(); + expect(service.getUserHints()).toHaveLength(0); + expect(service.getLatestHintIndex()).toBe(-1); + }); +}); diff --git a/packages/core/src/config/userHintService.ts b/packages/core/src/config/userHintService.ts new file mode 100644 index 0000000000..227e54b18c --- /dev/null +++ b/packages/core/src/config/userHintService.ts @@ -0,0 +1,87 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +/** + * Service for managing user steering hints during a session. + */ +export class UserHintService { + private readonly userHints: Array<{ text: string; timestamp: number }> = []; + private readonly userHintListeners: Set<(hint: string) => void> = new Set(); + + constructor(private readonly isEnabled: () => boolean) {} + + /** + * Adds a new steering hint from the user. + */ + addUserHint(hint: string): void { + if (!this.isEnabled()) { + return; + } + const trimmed = hint.trim(); + if (trimmed.length === 0) { + return; + } + this.userHints.push({ text: trimmed, timestamp: Date.now() }); + for (const listener of this.userHintListeners) { + listener(trimmed); + } + } + + /** + * Registers a listener for new user hints. + */ + onUserHint(listener: (hint: string) => void): void { + this.userHintListeners.add(listener); + } + + /** + * Unregisters a listener for new user hints. + */ + offUserHint(listener: (hint: string) => void): void { + this.userHintListeners.delete(listener); + } + + /** + * Returns all collected hints. + */ + getUserHints(): string[] { + return this.userHints.map((h) => h.text); + } + + /** + * Returns hints added after a specific index. + */ + getUserHintsAfter(index: number): string[] { + if (index < 0) { + return this.getUserHints(); + } + return this.userHints.slice(index + 1).map((h) => h.text); + } + + /** + * Returns the index of the latest hint. + */ + getLatestHintIndex(): number { + return this.userHints.length - 1; + } + + /** + * Returns the timestamp of the last user hint. + */ + getLastUserHintAt(): number | null { + if (this.userHints.length === 0) { + return null; + } + return this.userHints[this.userHints.length - 1].timestamp; + } + + /** + * Clears all collected hints. + */ + clear(): void { + this.userHints.length = 0; + } +} diff --git a/packages/core/src/core/__snapshots__/prompts.test.ts.snap b/packages/core/src/core/__snapshots__/prompts.test.ts.snap index 22d0e6f71a..bf73399537 100644 --- a/packages/core/src/core/__snapshots__/prompts.test.ts.snap +++ b/packages/core/src/core/__snapshots__/prompts.test.ts.snap @@ -11,6 +11,7 @@ exports[`Core System Prompt (prompts.ts) > ApprovalMode in System Prompt > Appro - **Idiomatic Changes:** When editing, understand the local context (imports, functions/classes) to ensure your changes integrate naturally and idiomatically. - **Comments:** Add code comments sparingly. Focus on *why* something is done, especially for complex logic, rather than *what* is done. Only add high-value comments if necessary for clarity or if requested by the user. Do not edit comments that are separate from the code you are changing. *NEVER* talk to the user or describe your changes through comments. - **Proactiveness:** Fulfill the user's request thoroughly. When adding features or fixing bugs, this includes adding tests to ensure quality. Consider all created files, especially tests, to be permanent artifacts unless the user says otherwise. +- **User Hints:** During execution, the user may provide real-time hints (marked as "User hint:" or "User hints:"). Treat these as high-priority but scope-preserving course corrections: apply the minimal plan change needed, keep unaffected user tasks active, and never cancel/skip tasks unless cancellation is explicit for those tasks. Hints may add new tasks, modify one or more tasks, cancel specific tasks, or provide extra context only. If scope is ambiguous, ask for clarification before dropping work. - **Confirm Ambiguity/Expansion:** Do not take significant actions beyond the clear scope of the request without confirming with the user. If the user implies a change (e.g., reports a bug) without explicitly asking for a fix, **ask for confirmation first**. If asked *how* to do something, explain first, don't just do it. - **Explaining Changes:** After completing a code modification or file operation *do not* provide summaries unless asked. - **Do Not revert changes:** Do not revert changes to the codebase unless asked to do so by the user. Only revert changes made by you if they have resulted in an error or if the user has explicitly asked you to revert the changes. @@ -142,6 +143,7 @@ exports[`Core System Prompt (prompts.ts) > ApprovalMode in System Prompt > Appro - **Idiomatic Changes:** When editing, understand the local context (imports, functions/classes) to ensure your changes integrate naturally and idiomatically. - **Comments:** Add code comments sparingly. Focus on *why* something is done, especially for complex logic, rather than *what* is done. Only add high-value comments if necessary for clarity or if requested by the user. Do not edit comments that are separate from the code you are changing. *NEVER* talk to the user or describe your changes through comments. - **Proactiveness:** Fulfill the user's request thoroughly. When adding features or fixing bugs, this includes adding tests to ensure quality. Consider all created files, especially tests, to be permanent artifacts unless the user says otherwise. +- **User Hints:** During execution, the user may provide real-time hints (marked as "User hint:" or "User hints:"). Treat these as high-priority but scope-preserving course corrections: apply the minimal plan change needed, keep unaffected user tasks active, and never cancel/skip tasks unless cancellation is explicit for those tasks. Hints may add new tasks, modify one or more tasks, cancel specific tasks, or provide extra context only. If scope is ambiguous, ask for clarification before dropping work. - **Confirm Ambiguity/Expansion:** Do not take significant actions beyond the clear scope of the request without confirming with the user. If the user implies a change (e.g., reports a bug) without explicitly asking for a fix, **ask for confirmation first**. If asked *how* to do something, explain first, don't just do it. - **Explaining Changes:** After completing a code modification or file operation *do not* provide summaries unless asked. - **Do Not revert changes:** Do not revert changes to the codebase unless asked to do so by the user. Only revert changes made by you if they have resulted in an error or if the user has explicitly asked you to revert the changes. @@ -278,6 +280,7 @@ exports[`Core System Prompt (prompts.ts) > ApprovalMode in System Prompt > shoul - **Idiomatic Changes:** When editing, understand the local context (imports, functions/classes) to ensure your changes integrate naturally and idiomatically. - **Comments:** Add code comments sparingly. Focus on *why* something is done, especially for complex logic, rather than *what* is done. Only add high-value comments if necessary for clarity or if requested by the user. Do not edit comments that are separate from the code you are changing. *NEVER* talk to the user or describe your changes through comments. - **Proactiveness:** Fulfill the user's request thoroughly. When adding features or fixing bugs, this includes adding tests to ensure quality. Consider all created files, especially tests, to be permanent artifacts unless the user says otherwise. +- **User Hints:** During execution, the user may provide real-time hints (marked as "User hint:" or "User hints:"). Treat these as high-priority but scope-preserving course corrections: apply the minimal plan change needed, keep unaffected user tasks active, and never cancel/skip tasks unless cancellation is explicit for those tasks. Hints may add new tasks, modify one or more tasks, cancel specific tasks, or provide extra context only. If scope is ambiguous, ask for clarification before dropping work. - **Confirm Ambiguity/Expansion:** Do not take significant actions beyond the clear scope of the request without confirming with the user. If the user implies a change (e.g., reports a bug) without explicitly asking for a fix, **ask for confirmation first**. If asked *how* to do something, explain first, don't just do it. - **Explaining Changes:** After completing a code modification or file operation *do not* provide summaries unless asked. - **Do Not revert changes:** Do not revert changes to the codebase unless asked to do so by the user. Only revert changes made by you if they have resulted in an error or if the user has explicitly asked you to revert the changes. @@ -390,6 +393,7 @@ exports[`Core System Prompt (prompts.ts) > ApprovalMode in System Prompt > shoul - **Idiomatic Changes:** When editing, understand the local context (imports, functions/classes) to ensure your changes integrate naturally and idiomatically. - **Comments:** Add code comments sparingly. Focus on *why* something is done, especially for complex logic, rather than *what* is done. Only add high-value comments if necessary for clarity or if requested by the user. Do not edit comments that are separate from the code you are changing. *NEVER* talk to the user or describe your changes through comments. - **Proactiveness:** Fulfill the user's request thoroughly. When adding features or fixing bugs, this includes adding tests to ensure quality. Consider all created files, especially tests, to be permanent artifacts unless the user says otherwise. +- **User Hints:** During execution, the user may provide real-time hints (marked as "User hint:" or "User hints:"). Treat these as high-priority but scope-preserving course corrections: apply the minimal plan change needed, keep unaffected user tasks active, and never cancel/skip tasks unless cancellation is explicit for those tasks. Hints may add new tasks, modify one or more tasks, cancel specific tasks, or provide extra context only. If scope is ambiguous, ask for clarification before dropping work. - **Confirm Ambiguity/Expansion:** Do not take significant actions beyond the clear scope of the request without confirming with the user. If the user implies a change (e.g., reports a bug) without explicitly asking for a fix, **ask for confirmation first**. If asked *how* to do something, explain first, don't just do it. - **Explaining Changes:** After completing a code modification or file operation *do not* provide summaries unless asked. - **Do Not revert changes:** Do not revert changes to the codebase unless asked to do so by the user. Only revert changes made by you if they have resulted in an error or if the user has explicitly asked you to revert the changes. @@ -557,6 +561,7 @@ Use the following guidelines to optimize your search and read patterns. - **Expertise & Intent Alignment:** Provide proactive technical opinions grounded in research while strictly adhering to the user's intended workflow. Distinguish between **Directives** (unambiguous requests for action or implementation) and **Inquiries** (requests for analysis, advice, or observations). Assume all requests are Inquiries unless they contain an explicit instruction to perform a task. For Inquiries, your scope is strictly limited to research and analysis; you may propose a solution or strategy, but you MUST NOT modify files until a corresponding Directive is issued. Do not initiate implementation based on observations of bugs or statements of fact. Once an Inquiry is resolved, or while waiting for a Directive, stop and wait for the next user instruction. For Directives, only clarify if critically underspecified; otherwise, work autonomously. You should only seek user intervention if you have exhausted all possible routes or if a proposed solution would take the workspace in a significantly different architectural direction. - **Proactiveness:** When executing a Directive, persist through errors and obstacles by diagnosing failures in the execution phase and, if necessary, backtracking to the research or strategy phases to adjust your approach until a successful, verified outcome is achieved. Fulfill the user's request thoroughly, including adding tests when adding features or fixing bugs. Take reasonable liberties to fulfill broad goals while staying within the requested scope; however, prioritize simplicity and the removal of redundant logic over providing "just-in-case" alternatives that diverge from the established path. - **Testing:** ALWAYS search for and update related tests after making a code change. You must add a new test case to the existing test file (if one exists) or create a new test file to verify your changes. +- **User Hints:** During execution, the user may provide real-time hints (marked as "User hint:" or "User hints:"). Treat these as high-priority but scope-preserving course corrections: apply the minimal plan change needed, keep unaffected user tasks active, and never cancel/skip tasks unless cancellation is explicit for those tasks. Hints may add new tasks, modify one or more tasks, cancel specific tasks, or provide extra context only. If scope is ambiguous, ask for clarification before dropping work. - **Confirm Ambiguity/Expansion:** Do not take significant actions beyond the clear scope of the request without confirming with the user. If the user implies a change (e.g., reports a bug) without explicitly asking for a fix, **ask for confirmation first**. If asked *how* to do something, explain first, don't just do it. - **Explaining Changes:** After completing a code modification or file operation *do not* provide summaries unless asked. - **Do Not revert changes:** Do not revert changes to the codebase unless asked to do so by the user. Only revert changes made by you if they have resulted in an error or if the user has explicitly asked you to revert the changes. @@ -712,6 +717,7 @@ Use the following guidelines to optimize your search and read patterns. - **Expertise & Intent Alignment:** Provide proactive technical opinions grounded in research while strictly adhering to the user's intended workflow. Distinguish between **Directives** (unambiguous requests for action or implementation) and **Inquiries** (requests for analysis, advice, or observations). Assume all requests are Inquiries unless they contain an explicit instruction to perform a task. For Inquiries, your scope is strictly limited to research and analysis; you may propose a solution or strategy, but you MUST NOT modify files until a corresponding Directive is issued. Do not initiate implementation based on observations of bugs or statements of fact. Once an Inquiry is resolved, or while waiting for a Directive, stop and wait for the next user instruction. For Directives, you must work autonomously as no further user input is available. You should only seek user intervention if you have exhausted all possible routes or if a proposed solution would take the workspace in a significantly different architectural direction. - **Proactiveness:** When executing a Directive, persist through errors and obstacles by diagnosing failures in the execution phase and, if necessary, backtracking to the research or strategy phases to adjust your approach until a successful, verified outcome is achieved. Fulfill the user's request thoroughly, including adding tests when adding features or fixing bugs. Take reasonable liberties to fulfill broad goals while staying within the requested scope; however, prioritize simplicity and the removal of redundant logic over providing "just-in-case" alternatives that diverge from the established path. - **Testing:** ALWAYS search for and update related tests after making a code change. You must add a new test case to the existing test file (if one exists) or create a new test file to verify your changes. +- **User Hints:** During execution, the user may provide real-time hints (marked as "User hint:" or "User hints:"). Treat these as high-priority but scope-preserving course corrections: apply the minimal plan change needed, keep unaffected user tasks active, and never cancel/skip tasks unless cancellation is explicit for those tasks. Hints may add new tasks, modify one or more tasks, cancel specific tasks, or provide extra context only. If scope is ambiguous, ask for clarification before dropping work. - **Handle Ambiguity/Expansion:** Do not take significant actions beyond the clear scope of the request. If the user implies a change (e.g., reports a bug) without explicitly asking for a fix, do not perform it automatically. - **Explaining Changes:** After completing a code modification or file operation *do not* provide summaries unless asked. - **Do Not revert changes:** Do not revert changes to the codebase unless asked to do so by the user. Only revert changes made by you if they have resulted in an error or if the user has explicitly asked you to revert the changes. @@ -833,6 +839,7 @@ Use the following guidelines to optimize your search and read patterns. - **Expertise & Intent Alignment:** Provide proactive technical opinions grounded in research while strictly adhering to the user's intended workflow. Distinguish between **Directives** (unambiguous requests for action or implementation) and **Inquiries** (requests for analysis, advice, or observations). Assume all requests are Inquiries unless they contain an explicit instruction to perform a task. For Inquiries, your scope is strictly limited to research and analysis; you may propose a solution or strategy, but you MUST NOT modify files until a corresponding Directive is issued. Do not initiate implementation based on observations of bugs or statements of fact. Once an Inquiry is resolved, or while waiting for a Directive, stop and wait for the next user instruction. For Directives, you must work autonomously as no further user input is available. You should only seek user intervention if you have exhausted all possible routes or if a proposed solution would take the workspace in a significantly different architectural direction. - **Proactiveness:** When executing a Directive, persist through errors and obstacles by diagnosing failures in the execution phase and, if necessary, backtracking to the research or strategy phases to adjust your approach until a successful, verified outcome is achieved. Fulfill the user's request thoroughly, including adding tests when adding features or fixing bugs. Take reasonable liberties to fulfill broad goals while staying within the requested scope; however, prioritize simplicity and the removal of redundant logic over providing "just-in-case" alternatives that diverge from the established path. - **Testing:** ALWAYS search for and update related tests after making a code change. You must add a new test case to the existing test file (if one exists) or create a new test file to verify your changes. +- **User Hints:** During execution, the user may provide real-time hints (marked as "User hint:" or "User hints:"). Treat these as high-priority but scope-preserving course corrections: apply the minimal plan change needed, keep unaffected user tasks active, and never cancel/skip tasks unless cancellation is explicit for those tasks. Hints may add new tasks, modify one or more tasks, cancel specific tasks, or provide extra context only. If scope is ambiguous, ask for clarification before dropping work. - **Handle Ambiguity/Expansion:** Do not take significant actions beyond the clear scope of the request. If the user implies a change (e.g., reports a bug) without explicitly asking for a fix, do not perform it automatically. - **Explaining Changes:** After completing a code modification or file operation *do not* provide summaries unless asked. - **Do Not revert changes:** Do not revert changes to the codebase unless asked to do so by the user. Only revert changes made by you if they have resulted in an error or if the user has explicitly asked you to revert the changes. @@ -918,6 +925,7 @@ exports[`Core System Prompt (prompts.ts) > should handle git instructions when i - **Idiomatic Changes:** When editing, understand the local context (imports, functions/classes) to ensure your changes integrate naturally and idiomatically. - **Comments:** Add code comments sparingly. Focus on *why* something is done, especially for complex logic, rather than *what* is done. Only add high-value comments if necessary for clarity or if requested by the user. Do not edit comments that are separate from the code you are changing. *NEVER* talk to the user or describe your changes through comments. - **Proactiveness:** Fulfill the user's request thoroughly. When adding features or fixing bugs, this includes adding tests to ensure quality. Consider all created files, especially tests, to be permanent artifacts unless the user says otherwise. +- **User Hints:** During execution, the user may provide real-time hints (marked as "User hint:" or "User hints:"). Treat these as high-priority but scope-preserving course corrections: apply the minimal plan change needed, keep unaffected user tasks active, and never cancel/skip tasks unless cancellation is explicit for those tasks. Hints may add new tasks, modify one or more tasks, cancel specific tasks, or provide extra context only. If scope is ambiguous, ask for clarification before dropping work. - **Confirm Ambiguity/Expansion:** Do not take significant actions beyond the clear scope of the request without confirming with the user. If the user implies a change (e.g., reports a bug) without explicitly asking for a fix, **ask for confirmation first**. If asked *how* to do something, explain first, don't just do it. - **Explaining Changes:** After completing a code modification or file operation *do not* provide summaries unless asked. - **Do Not revert changes:** Do not revert changes to the codebase unless asked to do so by the user. Only revert changes made by you if they have resulted in an error or if the user has explicitly asked you to revert the changes. @@ -1030,6 +1038,7 @@ exports[`Core System Prompt (prompts.ts) > should handle git instructions when i - **Idiomatic Changes:** When editing, understand the local context (imports, functions/classes) to ensure your changes integrate naturally and idiomatically. - **Comments:** Add code comments sparingly. Focus on *why* something is done, especially for complex logic, rather than *what* is done. Only add high-value comments if necessary for clarity or if requested by the user. Do not edit comments that are separate from the code you are changing. *NEVER* talk to the user or describe your changes through comments. - **Proactiveness:** Fulfill the user's request thoroughly. When adding features or fixing bugs, this includes adding tests to ensure quality. Consider all created files, especially tests, to be permanent artifacts unless the user says otherwise. +- **User Hints:** During execution, the user may provide real-time hints (marked as "User hint:" or "User hints:"). Treat these as high-priority but scope-preserving course corrections: apply the minimal plan change needed, keep unaffected user tasks active, and never cancel/skip tasks unless cancellation is explicit for those tasks. Hints may add new tasks, modify one or more tasks, cancel specific tasks, or provide extra context only. If scope is ambiguous, ask for clarification before dropping work. - **Confirm Ambiguity/Expansion:** Do not take significant actions beyond the clear scope of the request without confirming with the user. If the user implies a change (e.g., reports a bug) without explicitly asking for a fix, **ask for confirmation first**. If asked *how* to do something, explain first, don't just do it. - **Explaining Changes:** After completing a code modification or file operation *do not* provide summaries unless asked. - **Do Not revert changes:** Do not revert changes to the codebase unless asked to do so by the user. Only revert changes made by you if they have resulted in an error or if the user has explicitly asked you to revert the changes. @@ -1160,6 +1169,7 @@ exports[`Core System Prompt (prompts.ts) > should include approved plan instruct - **Idiomatic Changes:** When editing, understand the local context (imports, functions/classes) to ensure your changes integrate naturally and idiomatically. - **Comments:** Add code comments sparingly. Focus on *why* something is done, especially for complex logic, rather than *what* is done. Only add high-value comments if necessary for clarity or if requested by the user. Do not edit comments that are separate from the code you are changing. *NEVER* talk to the user or describe your changes through comments. - **Proactiveness:** Fulfill the user's request thoroughly. When adding features or fixing bugs, this includes adding tests to ensure quality. Consider all created files, especially tests, to be permanent artifacts unless the user says otherwise. +- **User Hints:** During execution, the user may provide real-time hints (marked as "User hint:" or "User hints:"). Treat these as high-priority but scope-preserving course corrections: apply the minimal plan change needed, keep unaffected user tasks active, and never cancel/skip tasks unless cancellation is explicit for those tasks. Hints may add new tasks, modify one or more tasks, cancel specific tasks, or provide extra context only. If scope is ambiguous, ask for clarification before dropping work. - **Confirm Ambiguity/Expansion:** Do not take significant actions beyond the clear scope of the request without confirming with the user. If the user implies a change (e.g., reports a bug) without explicitly asking for a fix, **ask for confirmation first**. If asked *how* to do something, explain first, don't just do it. - **Explaining Changes:** After completing a code modification or file operation *do not* provide summaries unless asked. - **Do Not revert changes:** Do not revert changes to the codebase unless asked to do so by the user. Only revert changes made by you if they have resulted in an error or if the user has explicitly asked you to revert the changes. @@ -1262,6 +1272,7 @@ exports[`Core System Prompt (prompts.ts) > should include available_skills when - **Idiomatic Changes:** When editing, understand the local context (imports, functions/classes) to ensure your changes integrate naturally and idiomatically. - **Comments:** Add code comments sparingly. Focus on *why* something is done, especially for complex logic, rather than *what* is done. Only add high-value comments if necessary for clarity or if requested by the user. Do not edit comments that are separate from the code you are changing. *NEVER* talk to the user or describe your changes through comments. - **Proactiveness:** Fulfill the user's request thoroughly. When adding features or fixing bugs, this includes adding tests to ensure quality. Consider all created files, especially tests, to be permanent artifacts unless the user says otherwise. +- **User Hints:** During execution, the user may provide real-time hints (marked as "User hint:" or "User hints:"). Treat these as high-priority but scope-preserving course corrections: apply the minimal plan change needed, keep unaffected user tasks active, and never cancel/skip tasks unless cancellation is explicit for those tasks. Hints may add new tasks, modify one or more tasks, cancel specific tasks, or provide extra context only. If scope is ambiguous, ask for clarification before dropping work. - **Confirm Ambiguity/Expansion:** Do not take significant actions beyond the clear scope of the request without confirming with the user. If the user implies a change (e.g., reports a bug) without explicitly asking for a fix, **ask for confirmation first**. If asked *how* to do something, explain first, don't just do it. - **Explaining Changes:** After completing a code modification or file operation *do not* provide summaries unless asked. - **Do Not revert changes:** Do not revert changes to the codebase unless asked to do so by the user. Only revert changes made by you if they have resulted in an error or if the user has explicitly asked you to revert the changes. @@ -1423,6 +1434,7 @@ Use the following guidelines to optimize your search and read patterns. - **Expertise & Intent Alignment:** Provide proactive technical opinions grounded in research while strictly adhering to the user's intended workflow. Distinguish between **Directives** (unambiguous requests for action or implementation) and **Inquiries** (requests for analysis, advice, or observations). Assume all requests are Inquiries unless they contain an explicit instruction to perform a task. For Inquiries, your scope is strictly limited to research and analysis; you may propose a solution or strategy, but you MUST NOT modify files until a corresponding Directive is issued. Do not initiate implementation based on observations of bugs or statements of fact. Once an Inquiry is resolved, or while waiting for a Directive, stop and wait for the next user instruction. For Directives, only clarify if critically underspecified; otherwise, work autonomously. You should only seek user intervention if you have exhausted all possible routes or if a proposed solution would take the workspace in a significantly different architectural direction. - **Proactiveness:** When executing a Directive, persist through errors and obstacles by diagnosing failures in the execution phase and, if necessary, backtracking to the research or strategy phases to adjust your approach until a successful, verified outcome is achieved. Fulfill the user's request thoroughly, including adding tests when adding features or fixing bugs. Take reasonable liberties to fulfill broad goals while staying within the requested scope; however, prioritize simplicity and the removal of redundant logic over providing "just-in-case" alternatives that diverge from the established path. - **Testing:** ALWAYS search for and update related tests after making a code change. You must add a new test case to the existing test file (if one exists) or create a new test file to verify your changes. +- **User Hints:** During execution, the user may provide real-time hints (marked as "User hint:" or "User hints:"). Treat these as high-priority but scope-preserving course corrections: apply the minimal plan change needed, keep unaffected user tasks active, and never cancel/skip tasks unless cancellation is explicit for those tasks. Hints may add new tasks, modify one or more tasks, cancel specific tasks, or provide extra context only. If scope is ambiguous, ask for clarification before dropping work. - **Confirm Ambiguity/Expansion:** Do not take significant actions beyond the clear scope of the request without confirming with the user. If the user implies a change (e.g., reports a bug) without explicitly asking for a fix, **ask for confirmation first**. If asked *how* to do something, explain first, don't just do it. - **Explaining Changes:** After completing a code modification or file operation *do not* provide summaries unless asked. - **Do Not revert changes:** Do not revert changes to the codebase unless asked to do so by the user. Only revert changes made by you if they have resulted in an error or if the user has explicitly asked you to revert the changes. @@ -1574,6 +1586,7 @@ Use the following guidelines to optimize your search and read patterns. - **Expertise & Intent Alignment:** Provide proactive technical opinions grounded in research while strictly adhering to the user's intended workflow. Distinguish between **Directives** (unambiguous requests for action or implementation) and **Inquiries** (requests for analysis, advice, or observations). Assume all requests are Inquiries unless they contain an explicit instruction to perform a task. For Inquiries, your scope is strictly limited to research and analysis; you may propose a solution or strategy, but you MUST NOT modify files until a corresponding Directive is issued. Do not initiate implementation based on observations of bugs or statements of fact. Once an Inquiry is resolved, or while waiting for a Directive, stop and wait for the next user instruction. For Directives, only clarify if critically underspecified; otherwise, work autonomously. You should only seek user intervention if you have exhausted all possible routes or if a proposed solution would take the workspace in a significantly different architectural direction. - **Proactiveness:** When executing a Directive, persist through errors and obstacles by diagnosing failures in the execution phase and, if necessary, backtracking to the research or strategy phases to adjust your approach until a successful, verified outcome is achieved. Fulfill the user's request thoroughly, including adding tests when adding features or fixing bugs. Take reasonable liberties to fulfill broad goals while staying within the requested scope; however, prioritize simplicity and the removal of redundant logic over providing "just-in-case" alternatives that diverge from the established path. - **Testing:** ALWAYS search for and update related tests after making a code change. You must add a new test case to the existing test file (if one exists) or create a new test file to verify your changes. +- **User Hints:** During execution, the user may provide real-time hints (marked as "User hint:" or "User hints:"). Treat these as high-priority but scope-preserving course corrections: apply the minimal plan change needed, keep unaffected user tasks active, and never cancel/skip tasks unless cancellation is explicit for those tasks. Hints may add new tasks, modify one or more tasks, cancel specific tasks, or provide extra context only. If scope is ambiguous, ask for clarification before dropping work. - **Confirm Ambiguity/Expansion:** Do not take significant actions beyond the clear scope of the request without confirming with the user. If the user implies a change (e.g., reports a bug) without explicitly asking for a fix, **ask for confirmation first**. If asked *how* to do something, explain first, don't just do it. - **Explaining Changes:** After completing a code modification or file operation *do not* provide summaries unless asked. - **Do Not revert changes:** Do not revert changes to the codebase unless asked to do so by the user. Only revert changes made by you if they have resulted in an error or if the user has explicitly asked you to revert the changes. @@ -1716,6 +1729,7 @@ Use the following guidelines to optimize your search and read patterns. - **Expertise & Intent Alignment:** Provide proactive technical opinions grounded in research while strictly adhering to the user's intended workflow. Distinguish between **Directives** (unambiguous requests for action or implementation) and **Inquiries** (requests for analysis, advice, or observations). Assume all requests are Inquiries unless they contain an explicit instruction to perform a task. For Inquiries, your scope is strictly limited to research and analysis; you may propose a solution or strategy, but you MUST NOT modify files until a corresponding Directive is issued. Do not initiate implementation based on observations of bugs or statements of fact. Once an Inquiry is resolved, or while waiting for a Directive, stop and wait for the next user instruction. For Directives, only clarify if critically underspecified; otherwise, work autonomously. You should only seek user intervention if you have exhausted all possible routes or if a proposed solution would take the workspace in a significantly different architectural direction. - **Proactiveness:** When executing a Directive, persist through errors and obstacles by diagnosing failures in the execution phase and, if necessary, backtracking to the research or strategy phases to adjust your approach until a successful, verified outcome is achieved. Fulfill the user's request thoroughly, including adding tests when adding features or fixing bugs. Take reasonable liberties to fulfill broad goals while staying within the requested scope; however, prioritize simplicity and the removal of redundant logic over providing "just-in-case" alternatives that diverge from the established path. - **Testing:** ALWAYS search for and update related tests after making a code change. You must add a new test case to the existing test file (if one exists) or create a new test file to verify your changes. +- **User Hints:** During execution, the user may provide real-time hints (marked as "User hint:" or "User hints:"). Treat these as high-priority but scope-preserving course corrections: apply the minimal plan change needed, keep unaffected user tasks active, and never cancel/skip tasks unless cancellation is explicit for those tasks. Hints may add new tasks, modify one or more tasks, cancel specific tasks, or provide extra context only. If scope is ambiguous, ask for clarification before dropping work. - **Confirm Ambiguity/Expansion:** Do not take significant actions beyond the clear scope of the request without confirming with the user. If the user implies a change (e.g., reports a bug) without explicitly asking for a fix, **ask for confirmation first**. If asked *how* to do something, explain first, don't just do it. - **Explaining Changes:** After completing a code modification or file operation *do not* provide summaries unless asked. - **Do Not revert changes:** Do not revert changes to the codebase unless asked to do so by the user. Only revert changes made by you if they have resulted in an error or if the user has explicitly asked you to revert the changes. @@ -1858,6 +1872,7 @@ Use the following guidelines to optimize your search and read patterns. - **Expertise & Intent Alignment:** Provide proactive technical opinions grounded in research while strictly adhering to the user's intended workflow. Distinguish between **Directives** (unambiguous requests for action or implementation) and **Inquiries** (requests for analysis, advice, or observations). Assume all requests are Inquiries unless they contain an explicit instruction to perform a task. For Inquiries, your scope is strictly limited to research and analysis; you may propose a solution or strategy, but you MUST NOT modify files until a corresponding Directive is issued. Do not initiate implementation based on observations of bugs or statements of fact. Once an Inquiry is resolved, or while waiting for a Directive, stop and wait for the next user instruction. For Directives, only clarify if critically underspecified; otherwise, work autonomously. You should only seek user intervention if you have exhausted all possible routes or if a proposed solution would take the workspace in a significantly different architectural direction. - **Proactiveness:** When executing a Directive, persist through errors and obstacles by diagnosing failures in the execution phase and, if necessary, backtracking to the research or strategy phases to adjust your approach until a successful, verified outcome is achieved. Fulfill the user's request thoroughly, including adding tests when adding features or fixing bugs. Take reasonable liberties to fulfill broad goals while staying within the requested scope; however, prioritize simplicity and the removal of redundant logic over providing "just-in-case" alternatives that diverge from the established path. - **Testing:** ALWAYS search for and update related tests after making a code change. You must add a new test case to the existing test file (if one exists) or create a new test file to verify your changes. +- **User Hints:** During execution, the user may provide real-time hints (marked as "User hint:" or "User hints:"). Treat these as high-priority but scope-preserving course corrections: apply the minimal plan change needed, keep unaffected user tasks active, and never cancel/skip tasks unless cancellation is explicit for those tasks. Hints may add new tasks, modify one or more tasks, cancel specific tasks, or provide extra context only. If scope is ambiguous, ask for clarification before dropping work. - **Confirm Ambiguity/Expansion:** Do not take significant actions beyond the clear scope of the request without confirming with the user. If the user implies a change (e.g., reports a bug) without explicitly asking for a fix, **ask for confirmation first**. If asked *how* to do something, explain first, don't just do it. - **Explaining Changes:** After completing a code modification or file operation *do not* provide summaries unless asked. - **Do Not revert changes:** Do not revert changes to the codebase unless asked to do so by the user. Only revert changes made by you if they have resulted in an error or if the user has explicitly asked you to revert the changes. @@ -1996,6 +2011,7 @@ Use the following guidelines to optimize your search and read patterns. - **Expertise & Intent Alignment:** Provide proactive technical opinions grounded in research while strictly adhering to the user's intended workflow. Distinguish between **Directives** (unambiguous requests for action or implementation) and **Inquiries** (requests for analysis, advice, or observations). Assume all requests are Inquiries unless they contain an explicit instruction to perform a task. For Inquiries, your scope is strictly limited to research and analysis; you may propose a solution or strategy, but you MUST NOT modify files until a corresponding Directive is issued. Do not initiate implementation based on observations of bugs or statements of fact. Once an Inquiry is resolved, or while waiting for a Directive, stop and wait for the next user instruction. For Directives, only clarify if critically underspecified; otherwise, work autonomously. You should only seek user intervention if you have exhausted all possible routes or if a proposed solution would take the workspace in a significantly different architectural direction. - **Proactiveness:** When executing a Directive, persist through errors and obstacles by diagnosing failures in the execution phase and, if necessary, backtracking to the research or strategy phases to adjust your approach until a successful, verified outcome is achieved. Fulfill the user's request thoroughly, including adding tests when adding features or fixing bugs. Take reasonable liberties to fulfill broad goals while staying within the requested scope; however, prioritize simplicity and the removal of redundant logic over providing "just-in-case" alternatives that diverge from the established path. - **Testing:** ALWAYS search for and update related tests after making a code change. You must add a new test case to the existing test file (if one exists) or create a new test file to verify your changes. +- **User Hints:** During execution, the user may provide real-time hints (marked as "User hint:" or "User hints:"). Treat these as high-priority but scope-preserving course corrections: apply the minimal plan change needed, keep unaffected user tasks active, and never cancel/skip tasks unless cancellation is explicit for those tasks. Hints may add new tasks, modify one or more tasks, cancel specific tasks, or provide extra context only. If scope is ambiguous, ask for clarification before dropping work. - **Confirm Ambiguity/Expansion:** Do not take significant actions beyond the clear scope of the request without confirming with the user. If the user implies a change (e.g., reports a bug) without explicitly asking for a fix, **ask for confirmation first**. If asked *how* to do something, explain first, don't just do it. - **Explaining Changes:** After completing a code modification or file operation *do not* provide summaries unless asked. - **Do Not revert changes:** Do not revert changes to the codebase unless asked to do so by the user. Only revert changes made by you if they have resulted in an error or if the user has explicitly asked you to revert the changes. @@ -2134,6 +2150,7 @@ Use the following guidelines to optimize your search and read patterns. - **Expertise & Intent Alignment:** Provide proactive technical opinions grounded in research while strictly adhering to the user's intended workflow. Distinguish between **Directives** (unambiguous requests for action or implementation) and **Inquiries** (requests for analysis, advice, or observations). Assume all requests are Inquiries unless they contain an explicit instruction to perform a task. For Inquiries, your scope is strictly limited to research and analysis; you may propose a solution or strategy, but you MUST NOT modify files until a corresponding Directive is issued. Do not initiate implementation based on observations of bugs or statements of fact. Once an Inquiry is resolved, or while waiting for a Directive, stop and wait for the next user instruction. For Directives, only clarify if critically underspecified; otherwise, work autonomously. You should only seek user intervention if you have exhausted all possible routes or if a proposed solution would take the workspace in a significantly different architectural direction. - **Proactiveness:** When executing a Directive, persist through errors and obstacles by diagnosing failures in the execution phase and, if necessary, backtracking to the research or strategy phases to adjust your approach until a successful, verified outcome is achieved. Fulfill the user's request thoroughly, including adding tests when adding features or fixing bugs. Take reasonable liberties to fulfill broad goals while staying within the requested scope; however, prioritize simplicity and the removal of redundant logic over providing "just-in-case" alternatives that diverge from the established path. - **Testing:** ALWAYS search for and update related tests after making a code change. You must add a new test case to the existing test file (if one exists) or create a new test file to verify your changes. +- **User Hints:** During execution, the user may provide real-time hints (marked as "User hint:" or "User hints:"). Treat these as high-priority but scope-preserving course corrections: apply the minimal plan change needed, keep unaffected user tasks active, and never cancel/skip tasks unless cancellation is explicit for those tasks. Hints may add new tasks, modify one or more tasks, cancel specific tasks, or provide extra context only. If scope is ambiguous, ask for clarification before dropping work. - **Confirm Ambiguity/Expansion:** Do not take significant actions beyond the clear scope of the request without confirming with the user. If the user implies a change (e.g., reports a bug) without explicitly asking for a fix, **ask for confirmation first**. If asked *how* to do something, explain first, don't just do it. - **Explaining Changes:** After completing a code modification or file operation *do not* provide summaries unless asked. - **Do Not revert changes:** Do not revert changes to the codebase unless asked to do so by the user. Only revert changes made by you if they have resulted in an error or if the user has explicitly asked you to revert the changes. @@ -2271,6 +2288,7 @@ Use the following guidelines to optimize your search and read patterns. - **Expertise & Intent Alignment:** Provide proactive technical opinions grounded in research while strictly adhering to the user's intended workflow. Distinguish between **Directives** (unambiguous requests for action or implementation) and **Inquiries** (requests for analysis, advice, or observations). Assume all requests are Inquiries unless they contain an explicit instruction to perform a task. For Inquiries, your scope is strictly limited to research and analysis; you may propose a solution or strategy, but you MUST NOT modify files until a corresponding Directive is issued. Do not initiate implementation based on observations of bugs or statements of fact. Once an Inquiry is resolved, or while waiting for a Directive, stop and wait for the next user instruction. For Directives, only clarify if critically underspecified; otherwise, work autonomously. You should only seek user intervention if you have exhausted all possible routes or if a proposed solution would take the workspace in a significantly different architectural direction. - **Proactiveness:** When executing a Directive, persist through errors and obstacles by diagnosing failures in the execution phase and, if necessary, backtracking to the research or strategy phases to adjust your approach until a successful, verified outcome is achieved. Fulfill the user's request thoroughly, including adding tests when adding features or fixing bugs. Take reasonable liberties to fulfill broad goals while staying within the requested scope; however, prioritize simplicity and the removal of redundant logic over providing "just-in-case" alternatives that diverge from the established path. - **Testing:** ALWAYS search for and update related tests after making a code change. You must add a new test case to the existing test file (if one exists) or create a new test file to verify your changes. +- **User Hints:** During execution, the user may provide real-time hints (marked as "User hint:" or "User hints:"). Treat these as high-priority but scope-preserving course corrections: apply the minimal plan change needed, keep unaffected user tasks active, and never cancel/skip tasks unless cancellation is explicit for those tasks. Hints may add new tasks, modify one or more tasks, cancel specific tasks, or provide extra context only. If scope is ambiguous, ask for clarification before dropping work. - **Confirm Ambiguity/Expansion:** Do not take significant actions beyond the clear scope of the request without confirming with the user. If the user implies a change (e.g., reports a bug) without explicitly asking for a fix, **ask for confirmation first**. If asked *how* to do something, explain first, don't just do it. - **Explaining Changes:** After completing a code modification or file operation *do not* provide summaries unless asked. - **Do Not revert changes:** Do not revert changes to the codebase unless asked to do so by the user. Only revert changes made by you if they have resulted in an error or if the user has explicitly asked you to revert the changes. @@ -2373,6 +2391,7 @@ exports[`Core System Prompt (prompts.ts) > should match snapshot on Windows 1`] - **Idiomatic Changes:** When editing, understand the local context (imports, functions/classes) to ensure your changes integrate naturally and idiomatically. - **Comments:** Add code comments sparingly. Focus on *why* something is done, especially for complex logic, rather than *what* is done. Only add high-value comments if necessary for clarity or if requested by the user. Do not edit comments that are separate from the code you are changing. *NEVER* talk to the user or describe your changes through comments. - **Proactiveness:** Fulfill the user's request thoroughly. When adding features or fixing bugs, this includes adding tests to ensure quality. Consider all created files, especially tests, to be permanent artifacts unless the user says otherwise. +- **User Hints:** During execution, the user may provide real-time hints (marked as "User hint:" or "User hints:"). Treat these as high-priority but scope-preserving course corrections: apply the minimal plan change needed, keep unaffected user tasks active, and never cancel/skip tasks unless cancellation is explicit for those tasks. Hints may add new tasks, modify one or more tasks, cancel specific tasks, or provide extra context only. If scope is ambiguous, ask for clarification before dropping work. - **Confirm Ambiguity/Expansion:** Do not take significant actions beyond the clear scope of the request without confirming with the user. If the user implies a change (e.g., reports a bug) without explicitly asking for a fix, **ask for confirmation first**. If asked *how* to do something, explain first, don't just do it. - **Explaining Changes:** After completing a code modification or file operation *do not* provide summaries unless asked. - **Do Not revert changes:** Do not revert changes to the codebase unless asked to do so by the user. Only revert changes made by you if they have resulted in an error or if the user has explicitly asked you to revert the changes. @@ -2486,6 +2505,7 @@ exports[`Core System Prompt (prompts.ts) > should render hierarchical memory wit - **Comments:** Add code comments sparingly. Focus on *why* something is done, especially for complex logic, rather than *what* is done. Only add high-value comments if necessary for clarity or if requested by the user. Do not edit comments that are separate from the code you are changing. *NEVER* talk to the user or describe your changes through comments. - **Proactiveness:** Fulfill the user's request thoroughly. When adding features or fixing bugs, this includes adding tests to ensure quality. Consider all created files, especially tests, to be permanent artifacts unless the user says otherwise. - **Conflict Resolution:** Instructions are provided in hierarchical context tags: \`\`, \`\`, and \`\`. In case of contradictory instructions, follow this priority: \`\` (highest) > \`\` > \`\` (lowest). +- **User Hints:** During execution, the user may provide real-time hints (marked as "User hint:" or "User hints:"). Treat these as high-priority but scope-preserving course corrections: apply the minimal plan change needed, keep unaffected user tasks active, and never cancel/skip tasks unless cancellation is explicit for those tasks. Hints may add new tasks, modify one or more tasks, cancel specific tasks, or provide extra context only. If scope is ambiguous, ask for clarification before dropping work. - **Confirm Ambiguity/Expansion:** Do not take significant actions beyond the clear scope of the request without confirming with the user. If the user implies a change (e.g., reports a bug) without explicitly asking for a fix, **ask for confirmation first**. If asked *how* to do something, explain first, don't just do it. - **Explaining Changes:** After completing a code modification or file operation *do not* provide summaries unless asked. - **Do Not revert changes:** Do not revert changes to the codebase unless asked to do so by the user. Only revert changes made by you if they have resulted in an error or if the user has explicitly asked you to revert the changes. @@ -2648,6 +2668,7 @@ Use the following guidelines to optimize your search and read patterns. - **Expertise & Intent Alignment:** Provide proactive technical opinions grounded in research while strictly adhering to the user's intended workflow. Distinguish between **Directives** (unambiguous requests for action or implementation) and **Inquiries** (requests for analysis, advice, or observations). Assume all requests are Inquiries unless they contain an explicit instruction to perform a task. For Inquiries, your scope is strictly limited to research and analysis; you may propose a solution or strategy, but you MUST NOT modify files until a corresponding Directive is issued. Do not initiate implementation based on observations of bugs or statements of fact. Once an Inquiry is resolved, or while waiting for a Directive, stop and wait for the next user instruction. For Directives, only clarify if critically underspecified; otherwise, work autonomously. You should only seek user intervention if you have exhausted all possible routes or if a proposed solution would take the workspace in a significantly different architectural direction. - **Proactiveness:** When executing a Directive, persist through errors and obstacles by diagnosing failures in the execution phase and, if necessary, backtracking to the research or strategy phases to adjust your approach until a successful, verified outcome is achieved. Fulfill the user's request thoroughly, including adding tests when adding features or fixing bugs. Take reasonable liberties to fulfill broad goals while staying within the requested scope; however, prioritize simplicity and the removal of redundant logic over providing "just-in-case" alternatives that diverge from the established path. - **Testing:** ALWAYS search for and update related tests after making a code change. You must add a new test case to the existing test file (if one exists) or create a new test file to verify your changes. +- **User Hints:** During execution, the user may provide real-time hints (marked as "User hint:" or "User hints:"). Treat these as high-priority but scope-preserving course corrections: apply the minimal plan change needed, keep unaffected user tasks active, and never cancel/skip tasks unless cancellation is explicit for those tasks. Hints may add new tasks, modify one or more tasks, cancel specific tasks, or provide extra context only. If scope is ambiguous, ask for clarification before dropping work. - **Confirm Ambiguity/Expansion:** Do not take significant actions beyond the clear scope of the request without confirming with the user. If the user implies a change (e.g., reports a bug) without explicitly asking for a fix, **ask for confirmation first**. If asked *how* to do something, explain first, don't just do it. - **Explaining Changes:** After completing a code modification or file operation *do not* provide summaries unless asked. - **Do Not revert changes:** Do not revert changes to the codebase unless asked to do so by the user. Only revert changes made by you if they have resulted in an error or if the user has explicitly asked you to revert the changes. @@ -2786,6 +2807,7 @@ Use the following guidelines to optimize your search and read patterns. - **Expertise & Intent Alignment:** Provide proactive technical opinions grounded in research while strictly adhering to the user's intended workflow. Distinguish between **Directives** (unambiguous requests for action or implementation) and **Inquiries** (requests for analysis, advice, or observations). Assume all requests are Inquiries unless they contain an explicit instruction to perform a task. For Inquiries, your scope is strictly limited to research and analysis; you may propose a solution or strategy, but you MUST NOT modify files until a corresponding Directive is issued. Do not initiate implementation based on observations of bugs or statements of fact. Once an Inquiry is resolved, or while waiting for a Directive, stop and wait for the next user instruction. For Directives, only clarify if critically underspecified; otherwise, work autonomously. You should only seek user intervention if you have exhausted all possible routes or if a proposed solution would take the workspace in a significantly different architectural direction. - **Proactiveness:** When executing a Directive, persist through errors and obstacles by diagnosing failures in the execution phase and, if necessary, backtracking to the research or strategy phases to adjust your approach until a successful, verified outcome is achieved. Fulfill the user's request thoroughly, including adding tests when adding features or fixing bugs. Take reasonable liberties to fulfill broad goals while staying within the requested scope; however, prioritize simplicity and the removal of redundant logic over providing "just-in-case" alternatives that diverge from the established path. - **Testing:** ALWAYS search for and update related tests after making a code change. You must add a new test case to the existing test file (if one exists) or create a new test file to verify your changes. +- **User Hints:** During execution, the user may provide real-time hints (marked as "User hint:" or "User hints:"). Treat these as high-priority but scope-preserving course corrections: apply the minimal plan change needed, keep unaffected user tasks active, and never cancel/skip tasks unless cancellation is explicit for those tasks. Hints may add new tasks, modify one or more tasks, cancel specific tasks, or provide extra context only. If scope is ambiguous, ask for clarification before dropping work. - **Confirm Ambiguity/Expansion:** Do not take significant actions beyond the clear scope of the request without confirming with the user. If the user implies a change (e.g., reports a bug) without explicitly asking for a fix, **ask for confirmation first**. If asked *how* to do something, explain first, don't just do it. - **Explaining Changes:** After completing a code modification or file operation *do not* provide summaries unless asked. - **Do Not revert changes:** Do not revert changes to the codebase unless asked to do so by the user. Only revert changes made by you if they have resulted in an error or if the user has explicitly asked you to revert the changes. @@ -2888,6 +2910,7 @@ exports[`Core System Prompt (prompts.ts) > should return the interactive avoidan - **Idiomatic Changes:** When editing, understand the local context (imports, functions/classes) to ensure your changes integrate naturally and idiomatically. - **Comments:** Add code comments sparingly. Focus on *why* something is done, especially for complex logic, rather than *what* is done. Only add high-value comments if necessary for clarity or if requested by the user. Do not edit comments that are separate from the code you are changing. *NEVER* talk to the user or describe your changes through comments. - **Proactiveness:** Fulfill the user's request thoroughly. When adding features or fixing bugs, this includes adding tests to ensure quality. Consider all created files, especially tests, to be permanent artifacts unless the user says otherwise. +- **User Hints:** During execution, the user may provide real-time hints (marked as "User hint:" or "User hints:"). Treat these as high-priority but scope-preserving course corrections: apply the minimal plan change needed, keep unaffected user tasks active, and never cancel/skip tasks unless cancellation is explicit for those tasks. Hints may add new tasks, modify one or more tasks, cancel specific tasks, or provide extra context only. If scope is ambiguous, ask for clarification before dropping work. - **Handle Ambiguity/Expansion:** Do not take significant actions beyond the clear scope of the request. If the user implies a change (e.g., reports a bug) without explicitly asking for a fix, do not perform it automatically. - **Explaining Changes:** After completing a code modification or file operation *do not* provide summaries unless asked. - **Do Not revert changes:** Do not revert changes to the codebase unless asked to do so by the user. Only revert changes made by you if they have resulted in an error or if the user has explicitly asked you to revert the changes. @@ -3035,6 +3058,7 @@ Use the following guidelines to optimize your search and read patterns. - **Expertise & Intent Alignment:** Provide proactive technical opinions grounded in research while strictly adhering to the user's intended workflow. Distinguish between **Directives** (unambiguous requests for action or implementation) and **Inquiries** (requests for analysis, advice, or observations). Assume all requests are Inquiries unless they contain an explicit instruction to perform a task. For Inquiries, your scope is strictly limited to research and analysis; you may propose a solution or strategy, but you MUST NOT modify files until a corresponding Directive is issued. Do not initiate implementation based on observations of bugs or statements of fact. Once an Inquiry is resolved, or while waiting for a Directive, stop and wait for the next user instruction. For Directives, only clarify if critically underspecified; otherwise, work autonomously. You should only seek user intervention if you have exhausted all possible routes or if a proposed solution would take the workspace in a significantly different architectural direction. - **Proactiveness:** When executing a Directive, persist through errors and obstacles by diagnosing failures in the execution phase and, if necessary, backtracking to the research or strategy phases to adjust your approach until a successful, verified outcome is achieved. Fulfill the user's request thoroughly, including adding tests when adding features or fixing bugs. Take reasonable liberties to fulfill broad goals while staying within the requested scope; however, prioritize simplicity and the removal of redundant logic over providing "just-in-case" alternatives that diverge from the established path. - **Testing:** ALWAYS search for and update related tests after making a code change. You must add a new test case to the existing test file (if one exists) or create a new test file to verify your changes. +- **User Hints:** During execution, the user may provide real-time hints (marked as "User hint:" or "User hints:"). Treat these as high-priority but scope-preserving course corrections: apply the minimal plan change needed, keep unaffected user tasks active, and never cancel/skip tasks unless cancellation is explicit for those tasks. Hints may add new tasks, modify one or more tasks, cancel specific tasks, or provide extra context only. If scope is ambiguous, ask for clarification before dropping work. - **Confirm Ambiguity/Expansion:** Do not take significant actions beyond the clear scope of the request without confirming with the user. If the user implies a change (e.g., reports a bug) without explicitly asking for a fix, **ask for confirmation first**. If asked *how* to do something, explain first, don't just do it. - **Explaining Changes:** After completing a code modification or file operation *do not* provide summaries unless asked. - **Do Not revert changes:** Do not revert changes to the codebase unless asked to do so by the user. Only revert changes made by you if they have resulted in an error or if the user has explicitly asked you to revert the changes. @@ -3173,6 +3197,7 @@ Use the following guidelines to optimize your search and read patterns. - **Expertise & Intent Alignment:** Provide proactive technical opinions grounded in research while strictly adhering to the user's intended workflow. Distinguish between **Directives** (unambiguous requests for action or implementation) and **Inquiries** (requests for analysis, advice, or observations). Assume all requests are Inquiries unless they contain an explicit instruction to perform a task. For Inquiries, your scope is strictly limited to research and analysis; you may propose a solution or strategy, but you MUST NOT modify files until a corresponding Directive is issued. Do not initiate implementation based on observations of bugs or statements of fact. Once an Inquiry is resolved, or while waiting for a Directive, stop and wait for the next user instruction. For Directives, only clarify if critically underspecified; otherwise, work autonomously. You should only seek user intervention if you have exhausted all possible routes or if a proposed solution would take the workspace in a significantly different architectural direction. - **Proactiveness:** When executing a Directive, persist through errors and obstacles by diagnosing failures in the execution phase and, if necessary, backtracking to the research or strategy phases to adjust your approach until a successful, verified outcome is achieved. Fulfill the user's request thoroughly, including adding tests when adding features or fixing bugs. Take reasonable liberties to fulfill broad goals while staying within the requested scope; however, prioritize simplicity and the removal of redundant logic over providing "just-in-case" alternatives that diverge from the established path. - **Testing:** ALWAYS search for and update related tests after making a code change. You must add a new test case to the existing test file (if one exists) or create a new test file to verify your changes. +- **User Hints:** During execution, the user may provide real-time hints (marked as "User hint:" or "User hints:"). Treat these as high-priority but scope-preserving course corrections: apply the minimal plan change needed, keep unaffected user tasks active, and never cancel/skip tasks unless cancellation is explicit for those tasks. Hints may add new tasks, modify one or more tasks, cancel specific tasks, or provide extra context only. If scope is ambiguous, ask for clarification before dropping work. - **Confirm Ambiguity/Expansion:** Do not take significant actions beyond the clear scope of the request without confirming with the user. If the user implies a change (e.g., reports a bug) without explicitly asking for a fix, **ask for confirmation first**. If asked *how* to do something, explain first, don't just do it. - **Explaining Changes:** After completing a code modification or file operation *do not* provide summaries unless asked. - **Do Not revert changes:** Do not revert changes to the codebase unless asked to do so by the user. Only revert changes made by you if they have resulted in an error or if the user has explicitly asked you to revert the changes. @@ -3275,6 +3300,7 @@ exports[`Core System Prompt (prompts.ts) > should use legacy system prompt for n - **Idiomatic Changes:** When editing, understand the local context (imports, functions/classes) to ensure your changes integrate naturally and idiomatically. - **Comments:** Add code comments sparingly. Focus on *why* something is done, especially for complex logic, rather than *what* is done. Only add high-value comments if necessary for clarity or if requested by the user. Do not edit comments that are separate from the code you are changing. *NEVER* talk to the user or describe your changes through comments. - **Proactiveness:** Fulfill the user's request thoroughly. When adding features or fixing bugs, this includes adding tests to ensure quality. Consider all created files, especially tests, to be permanent artifacts unless the user says otherwise. +- **User Hints:** During execution, the user may provide real-time hints (marked as "User hint:" or "User hints:"). Treat these as high-priority but scope-preserving course corrections: apply the minimal plan change needed, keep unaffected user tasks active, and never cancel/skip tasks unless cancellation is explicit for those tasks. Hints may add new tasks, modify one or more tasks, cancel specific tasks, or provide extra context only. If scope is ambiguous, ask for clarification before dropping work. - **Confirm Ambiguity/Expansion:** Do not take significant actions beyond the clear scope of the request without confirming with the user. If the user implies a change (e.g., reports a bug) without explicitly asking for a fix, **ask for confirmation first**. If asked *how* to do something, explain first, don't just do it. - **Explaining Changes:** After completing a code modification or file operation *do not* provide summaries unless asked. - **Do Not revert changes:** Do not revert changes to the codebase unless asked to do so by the user. Only revert changes made by you if they have resulted in an error or if the user has explicitly asked you to revert the changes. diff --git a/packages/core/src/core/prompts.test.ts b/packages/core/src/core/prompts.test.ts index 12ab97cd58..ce6f383009 100644 --- a/packages/core/src/core/prompts.test.ts +++ b/packages/core/src/core/prompts.test.ts @@ -218,6 +218,7 @@ describe('Core System Prompt (prompts.ts)', () => { expect(prompt).not.toContain('No sub-agents are currently available.'); expect(prompt).toContain('# Core Mandates'); expect(prompt).toContain('- **Conventions:**'); + expect(prompt).toContain('- **User Hints:**'); expect(prompt).toContain('# Outside of Sandbox'); expect(prompt).toContain('# Final Reminder'); expect(prompt).toMatchSnapshot(); @@ -227,6 +228,7 @@ describe('Core System Prompt (prompts.ts)', () => { vi.mocked(mockConfig.getActiveModel).mockReturnValue(PREVIEW_GEMINI_MODEL); const prompt = getCoreSystemPrompt(mockConfig); expect(prompt).toContain('You are Gemini CLI, an interactive CLI agent'); // Check for core content + expect(prompt).toContain('- **User Hints:**'); expect(prompt).toContain('No Chitchat:'); expect(prompt).toMatchSnapshot(); }); diff --git a/packages/core/src/prompts/snippets.legacy.ts b/packages/core/src/prompts/snippets.legacy.ts index 8d46fd6a1a..3671490089 100644 --- a/packages/core/src/prompts/snippets.legacy.ts +++ b/packages/core/src/prompts/snippets.legacy.ts @@ -159,6 +159,7 @@ export function renderCoreMandates(options?: CoreMandatesOptions): string { - **Idiomatic Changes:** When editing, understand the local context (imports, functions/classes) to ensure your changes integrate naturally and idiomatically. - **Comments:** Add code comments sparingly. Focus on *why* something is done, especially for complex logic, rather than *what* is done. Only add high-value comments if necessary for clarity or if requested by the user. Do not edit comments that are separate from the code you are changing. *NEVER* talk to the user or describe your changes through comments. - **Proactiveness:** Fulfill the user's request thoroughly. When adding features or fixing bugs, this includes adding tests to ensure quality. Consider all created files, especially tests, to be permanent artifacts unless the user says otherwise.${mandateConflictResolution(options.hasHierarchicalMemory)} +- **User Hints:** During execution, the user may provide real-time hints (marked as "User hint:" or "User hints:"). Treat these as high-priority but scope-preserving course corrections: apply the minimal plan change needed, keep unaffected user tasks active, and never cancel/skip tasks unless cancellation is explicit for those tasks. Hints may add new tasks, modify one or more tasks, cancel specific tasks, or provide extra context only. If scope is ambiguous, ask for clarification before dropping work. - ${mandateConfirm(options.interactive)} - **Explaining Changes:** After completing a code modification or file operation *do not* provide summaries unless asked. - **Do Not revert changes:** Do not revert changes to the codebase unless asked to do so by the user. Only revert changes made by you if they have resulted in an error or if the user has explicitly asked you to revert the changes.${mandateSkillGuidance(options.hasSkills)}${mandateExplainBeforeActing(options.isGemini3)}${mandateContinueWork(options.interactive)} diff --git a/packages/core/src/prompts/snippets.ts b/packages/core/src/prompts/snippets.ts index 18b2aac760..3791a856bf 100644 --- a/packages/core/src/prompts/snippets.ts +++ b/packages/core/src/prompts/snippets.ts @@ -204,6 +204,7 @@ Use the following guidelines to optimize your search and read patterns. - **Expertise & Intent Alignment:** Provide proactive technical opinions grounded in research while strictly adhering to the user's intended workflow. Distinguish between **Directives** (unambiguous requests for action or implementation) and **Inquiries** (requests for analysis, advice, or observations). Assume all requests are Inquiries unless they contain an explicit instruction to perform a task. For Inquiries, your scope is strictly limited to research and analysis; you may propose a solution or strategy, but you MUST NOT modify files until a corresponding Directive is issued. Do not initiate implementation based on observations of bugs or statements of fact. Once an Inquiry is resolved, or while waiting for a Directive, stop and wait for the next user instruction. ${options.interactive ? 'For Directives, only clarify if critically underspecified; otherwise, work autonomously.' : 'For Directives, you must work autonomously as no further user input is available.'} You should only seek user intervention if you have exhausted all possible routes or if a proposed solution would take the workspace in a significantly different architectural direction. - **Proactiveness:** When executing a Directive, persist through errors and obstacles by diagnosing failures in the execution phase and, if necessary, backtracking to the research or strategy phases to adjust your approach until a successful, verified outcome is achieved. Fulfill the user's request thoroughly, including adding tests when adding features or fixing bugs. Take reasonable liberties to fulfill broad goals while staying within the requested scope; however, prioritize simplicity and the removal of redundant logic over providing "just-in-case" alternatives that diverge from the established path. - **Testing:** ALWAYS search for and update related tests after making a code change. You must add a new test case to the existing test file (if one exists) or create a new test file to verify your changes.${mandateConflictResolution(options.hasHierarchicalMemory)} +- **User Hints:** During execution, the user may provide real-time hints (marked as "User hint:" or "User hints:"). Treat these as high-priority but scope-preserving course corrections: apply the minimal plan change needed, keep unaffected user tasks active, and never cancel/skip tasks unless cancellation is explicit for those tasks. Hints may add new tasks, modify one or more tasks, cancel specific tasks, or provide extra context only. If scope is ambiguous, ask for clarification before dropping work. - ${mandateConfirm(options.interactive)} - **Explaining Changes:** After completing a code modification or file operation *do not* provide summaries unless asked. - **Do Not revert changes:** Do not revert changes to the codebase unless asked to do so by the user. Only revert changes made by you if they have resulted in an error or if the user has explicitly asked you to revert the changes.${mandateSkillGuidance(options.hasSkills)} diff --git a/packages/core/src/utils/fastAckHelper.ts b/packages/core/src/utils/fastAckHelper.ts index 82dd935776..1ce33f4e26 100644 --- a/packages/core/src/utils/fastAckHelper.ts +++ b/packages/core/src/utils/fastAckHelper.ts @@ -9,6 +9,7 @@ import type { BaseLlmClient } from '../core/baseLlmClient.js'; import type { ModelConfigKey } from '../services/modelConfigService.js'; import { debugLogger } from './debugLogger.js'; import { getResponseText } from './partUtils.js'; +import { getErrorMessage } from './errors.js'; export const DEFAULT_FAST_ACK_MODEL_CONFIG_KEY: ModelConfigKey = { model: 'fast-ack-helper', @@ -192,7 +193,7 @@ export async function generateFastAckText( return responseText; } catch (error) { debugLogger.debug( - `[FastAckHelper] Generation failed: ${error instanceof Error ? error.message : String(error)}`, + `[FastAckHelper] Generation failed: ${getErrorMessage(error)}`, ); return fallbackText; } diff --git a/schemas/settings.schema.json b/schemas/settings.schema.json index eaf3431723..f885990a58 100644 --- a/schemas/settings.schema.json +++ b/schemas/settings.schema.json @@ -1591,6 +1591,13 @@ "markdownDescription": "Enable planning features (Plan Mode and tools).\n\n- Category: `Experimental`\n- Requires restart: `yes`\n- Default: `false`", "default": false, "type": "boolean" + }, + "modelSteering": { + "title": "Model Steering", + "description": "Enable model steering (user hints) to guide the model during tool execution.", + "markdownDescription": "Enable model steering (user hints) to guide the model during tool execution.\n\n- Category: `Experimental`\n- Requires restart: `no`\n- Default: `false`", + "default": false, + "type": "boolean" } }, "additionalProperties": false