feat(core): experimental in-progress steering hints

This is a rebase / refactor of:
https://github.com/google-gemini/gemini-cli/pull/18783
This commit is contained in:
Your Name
2026-02-11 21:14:29 +00:00
parent ef02cec2cd
commit 5ed64c7130
45 changed files with 2090 additions and 136 deletions
@@ -0,0 +1,81 @@
/**
* @license
* Copyright 2026 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import { describe, it, afterEach } from 'vitest';
import { AppRig } from '../test-utils/AppRig.js';
import path from 'node:path';
import { fileURLToPath } from 'node:url';
import { PolicyDecision } from '@google/gemini-cli-core';
const __dirname = path.dirname(fileURLToPath(import.meta.url));
describe('Model Steering Integration', () => {
let rig: AppRig | undefined;
afterEach(async () => {
await rig?.unmount();
});
it('should steer the model using a hint during a tool turn', async () => {
const fakeResponsesPath = path.join(
__dirname,
'../test-utils/fixtures/steering.responses',
);
rig = new AppRig({ fakeResponsesPath });
await rig.initialize();
rig.render();
await rig.waitForIdle();
rig.setToolPolicy('list_directory', PolicyDecision.ASK_USER);
rig.setToolPolicy('read_file', PolicyDecision.ASK_USER);
rig.setMockCommands([
{
command: /list_directory/,
result: {
output: 'file1.txt\nfile2.js\nfile3.md',
exitCode: 0,
},
},
{
command: /read_file file1.txt/,
result: {
output: 'This is file1.txt content.',
exitCode: 0,
},
},
]);
// Start a long task
await rig.type('Start long task');
await rig.pressEnter();
// Wait for the model to call 'list_directory' (Confirming state)
await rig.waitForOutput('ReadFolder');
// Injected a hint while the model is in a tool turn
await rig.addUserHint('focus on .txt');
// Resolve list_directory (Proceed)
await rig.resolveTool('ReadFolder');
// Wait for the model to process the hint and output the next action
// Based on steering.responses, it should first acknowledge the hint
await rig.waitForOutput('ACK: I will focus on .txt files now.');
// Then it should proceed with the next action
await rig.waitForOutput(
/Since you want me to focus on .txt files,[\s\S]*I will read file1.txt/,
);
await rig.waitForOutput('ReadFile');
// Resolve read_file (Proceed)
await rig.resolveTool('ReadFile');
// Wait for final completion
await rig.waitForOutput('Task complete.');
});
});
@@ -0,0 +1,80 @@
/**
* @license
* Copyright 2026 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import { describe, it, afterEach, expect } from 'vitest';
import { AppRig } from './AppRig.js';
import path from 'node:path';
import { fileURLToPath } from 'node:url';
import { debugLogger } from '@google/gemini-cli-core';
const __dirname = path.dirname(fileURLToPath(import.meta.url));
describe('AppRig', () => {
let rig: AppRig | undefined;
afterEach(async () => {
await rig?.unmount();
});
it('should handle deterministic tool turns with breakpoints', async () => {
const fakeResponsesPath = path.join(
__dirname,
'fixtures',
'steering.responses',
);
rig = new AppRig({ fakeResponsesPath });
await rig.initialize();
rig.render();
await rig.waitForIdle();
// Set breakpoints on the canonical tool names
rig.setBreakpoint('list_directory');
rig.setBreakpoint('read_file');
// Start a task
debugLogger.log('[Test] Sending message: Start long task');
await rig.sendMessage('Start long task');
// Wait for the first breakpoint (list_directory)
const pending1 = await rig.waitForPendingConfirmation('list_directory');
expect(pending1.toolName).toBe('list_directory');
// Injected a hint
await rig.addUserHint('focus on .txt');
// Resolve and wait for the NEXT breakpoint (read_file)
// resolveTool will automatically remove the breakpoint policy for list_directory
await rig.resolveTool('list_directory');
const pending2 = await rig.waitForPendingConfirmation('read_file');
expect(pending2.toolName).toBe('read_file');
// Resolve and finish. Also removes read_file breakpoint.
await rig.resolveTool('read_file');
await rig.waitForOutput('Task complete.', 100000);
});
it('should render the app and handle a simple message', async () => {
const fakeResponsesPath = path.join(
__dirname,
'fixtures',
'simple.responses',
);
rig = new AppRig({ fakeResponsesPath });
await rig.initialize();
rig.render();
// Wait for initial render
await rig.waitForIdle();
// Type a message
await rig.type('Hello');
await rig.pressEnter();
// Wait for model response
await rig.waitForOutput('Hello! How can I help you today?');
});
});
+569
View File
@@ -0,0 +1,569 @@
/**
* @license
* Copyright 2026 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import { vi } from 'vitest';
import { act } from 'react';
import stripAnsi from 'strip-ansi';
import os from 'node:os';
import path from 'node:path';
import fs from 'node:fs';
import { AppContainer } from '../ui/AppContainer.js';
import { renderWithProviders } from './render.js';
import {
makeFakeConfig,
type Config,
type ConfigParameters,
ExtensionLoader,
AuthType,
ApprovalMode,
createPolicyEngineConfig,
PolicyDecision,
ToolConfirmationOutcome,
MessageBusType,
type ToolCallsUpdateMessage,
coreEvents,
ideContextStore,
createContentGenerator,
startupProfiler,
IdeClient,
debugLogger,
} from '@google/gemini-cli-core';
import {
type MockShellCommand,
MockShellExecutionService,
} from './MockShellExecutionService.js';
import { createMockSettings } from './settings.js';
import { type LoadedSettings } from '../config/settings.js';
import { AuthState } from '../ui/types.js';
// Mock core functions globally for tests using AppRig.
vi.mock('@google/gemini-cli-core', async (importOriginal) => {
const original =
await importOriginal<typeof import('@google/gemini-cli-core')>();
const { MockShellExecutionService: MockService } = await import(
'./MockShellExecutionService.js'
);
// Register the real execution logic so MockShellExecutionService can fall back to it
MockService.setOriginalImplementation(original.ShellExecutionService.execute);
return {
...original,
ShellExecutionService: MockService,
};
});
// Mock useAuthCommand to bypass authentication flows in tests
vi.mock('../ui/auth/useAuth.js', () => ({
useAuthCommand: () => ({
authState: AuthState.Authenticated,
setAuthState: vi.fn(),
authError: null,
onAuthError: vi.fn(),
apiKeyDefaultValue: 'test-api-key',
reloadApiKey: vi.fn().mockResolvedValue('test-api-key'),
}),
validateAuthMethodWithSettings: () => null,
}));
// A minimal mock ExtensionManager to satisfy AppContainer's forceful cast
class MockExtensionManager extends ExtensionLoader {
getExtensions = vi.fn().mockReturnValue([]);
setRequestConsent = vi.fn();
setRequestSetting = vi.fn();
}
export interface AppRigOptions {
fakeResponsesPath?: string;
terminalWidth?: number;
terminalHeight?: number;
configOverrides?: Partial<ConfigParameters>;
}
export interface PendingConfirmation {
toolName: string;
toolDisplayName?: string;
correlationId: string;
}
export class AppRig {
private renderResult: ReturnType<typeof renderWithProviders> | undefined;
private config: Config | undefined;
private settings: LoadedSettings | undefined;
private testDir: string;
private sessionId: string;
private pendingConfirmations = new Map<string, PendingConfirmation>();
private breakpointTools = new Set<string | undefined>();
private lastAwaitedConfirmation: PendingConfirmation | undefined;
constructor(private options: AppRigOptions = {}) {
this.testDir = fs.mkdtempSync(path.join(os.tmpdir(), 'gemini-app-rig-'));
this.sessionId = `test-session-${Math.random().toString(36).slice(2, 9)}`;
}
async initialize() {
this.setupEnvironment();
this.settings = this.createRigSettings();
const approvalMode =
this.options.configOverrides?.approvalMode ?? ApprovalMode.DEFAULT;
const policyEngineConfig = await createPolicyEngineConfig(
this.settings.merged,
approvalMode,
);
const configParams: ConfigParameters = {
sessionId: this.sessionId,
targetDir: this.testDir,
cwd: this.testDir,
debugMode: false,
model: 'test-model',
fakeResponses: this.options.fakeResponsesPath,
interactive: true,
approvalMode,
policyEngineConfig,
enableEventDrivenScheduler: true,
extensionLoader: new MockExtensionManager(),
excludeTools: this.options.configOverrides?.excludeTools,
...this.options.configOverrides,
};
this.config = makeFakeConfig(configParams);
if (this.options.fakeResponsesPath) {
this.stubRefreshAuth();
}
this.setupMessageBusListeners();
await act(async () => {
await this.config!.initialize();
// Since we mocked useAuthCommand, we must manually trigger the first
// refreshAuth to ensure contentGenerator is initialized.
await this.config!.refreshAuth(AuthType.USE_GEMINI);
});
}
private setupEnvironment() {
// Stub environment variables to avoid interference from developer's machine
vi.stubEnv('GEMINI_CLI_HOME', this.testDir);
if (this.options.fakeResponsesPath) {
vi.stubEnv('GEMINI_API_KEY', 'test-api-key');
MockShellExecutionService.setPassthrough(false);
} else {
if (!process.env['GEMINI_API_KEY']) {
throw new Error(
'GEMINI_API_KEY must be set in the environment for live model tests.',
);
}
// For live tests, we allow falling through to the real shell service if no mock matches
MockShellExecutionService.setPassthrough(true);
}
vi.stubEnv('GEMINI_DEFAULT_AUTH_TYPE', AuthType.USE_GEMINI);
}
private createRigSettings(): LoadedSettings {
return createMockSettings({
user: {
path: path.join(this.testDir, '.gemini', 'user_settings.json'),
settings: {
security: {
auth: {
selectedType: AuthType.USE_GEMINI,
useExternal: true,
},
folderTrust: {
enabled: true,
},
},
ide: {
enabled: false,
hasSeenNudge: true,
},
},
originalSettings: {},
},
merged: {
security: {
auth: {
selectedType: AuthType.USE_GEMINI,
useExternal: true,
},
folderTrust: {
enabled: true,
},
},
ide: {
enabled: false,
hasSeenNudge: true,
},
},
});
}
private stubRefreshAuth() {
// eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion, @typescript-eslint/no-explicit-any
const gcConfig = this.config as any;
gcConfig.refreshAuth = async (authMethod: AuthType) => {
gcConfig.modelAvailabilityService.reset();
const newContentGeneratorConfig = {
authType: authMethod,
proxy: gcConfig.getProxy(),
apiKey: process.env['GEMINI_API_KEY'] || 'test-api-key',
};
gcConfig.contentGenerator = await createContentGenerator(
newContentGeneratorConfig,
this.config!,
gcConfig.getSessionId(),
);
gcConfig.contentGeneratorConfig = newContentGeneratorConfig;
// Initialize BaseLlmClient now that the ContentGenerator is available
const { BaseLlmClient } = await import('@google/gemini-cli-core');
gcConfig.baseLlmClient = new BaseLlmClient(
gcConfig.contentGenerator,
this.config!,
);
};
}
private setupMessageBusListeners() {
if (!this.config) return;
const messageBus = this.config.getMessageBus();
messageBus.subscribe(
MessageBusType.TOOL_CALLS_UPDATE,
(message: ToolCallsUpdateMessage) => {
for (const call of message.toolCalls) {
if (call.status === 'awaiting_approval' && call.correlationId) {
const details = call.confirmationDetails;
const title = 'title' in details ? details.title : '';
const toolDisplayName =
call.tool?.displayName || title.replace(/^Confirm:\s*/, '');
if (!this.pendingConfirmations.has(call.correlationId)) {
this.pendingConfirmations.set(call.correlationId, {
toolName: call.request.name,
toolDisplayName,
correlationId: call.correlationId,
});
}
} else if (call.status !== 'awaiting_approval') {
for (const [
correlationId,
pending,
] of this.pendingConfirmations.entries()) {
if (pending.toolName === call.request.name) {
this.pendingConfirmations.delete(correlationId);
break;
}
}
}
}
},
);
}
render() {
if (!this.config || !this.settings)
throw new Error('AppRig not initialized');
act(() => {
this.renderResult = renderWithProviders(
<AppContainer
config={this.config!}
version="test-version"
initializationResult={{
authError: null,
themeError: null,
shouldOpenAuthDialog: false,
geminiMdFileCount: 0,
}}
/>,
{
config: this.config!,
settings: this.settings!,
width: this.options.terminalWidth ?? 120,
useAlternateBuffer: false,
uiState: {
terminalHeight: this.options.terminalHeight ?? 40,
},
},
);
});
}
setMockCommands(commands: MockShellCommand[]) {
MockShellExecutionService.setMockCommands(commands);
}
setToolPolicy(
toolName: string | undefined,
decision: PolicyDecision,
priority = 10,
) {
if (!this.config) throw new Error('AppRig not initialized');
this.config.getPolicyEngine().addRule({
toolName,
decision,
priority,
source: 'AppRig Override',
});
}
setBreakpoint(toolName: string | string[] | undefined) {
if (Array.isArray(toolName)) {
for (const name of toolName) {
this.setBreakpoint(name);
}
} else {
this.setToolPolicy(toolName, PolicyDecision.ASK_USER, 100);
this.breakpointTools.add(toolName);
}
}
removeToolPolicy(toolName?: string, source = 'AppRig Override') {
if (!this.config) throw new Error('AppRig not initialized');
this.config
.getPolicyEngine()
// eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion
.removeRulesForTool(toolName as string, source);
this.breakpointTools.delete(toolName);
}
getTestDir(): string {
return this.testDir;
}
getPendingConfirmations() {
return Array.from(this.pendingConfirmations.values());
}
private async waitUntil(
predicate: () => boolean | Promise<boolean>,
options: { timeout?: number; interval?: number; message?: string } = {},
) {
const {
timeout = 30000,
interval = 100,
message = 'Condition timed out',
} = options;
const start = Date.now();
while (true) {
if (await predicate()) return;
if (Date.now() - start > timeout) {
throw new Error(message);
}
await act(async () => {
await new Promise((resolve) => setTimeout(resolve, interval));
});
}
}
async waitForPendingConfirmation(
toolNameOrDisplayName?: string | RegExp,
timeout = 30000,
): Promise<PendingConfirmation> {
const matches = (p: PendingConfirmation) => {
if (!toolNameOrDisplayName) return true;
if (typeof toolNameOrDisplayName === 'string') {
return (
p.toolName === toolNameOrDisplayName ||
p.toolDisplayName === toolNameOrDisplayName
);
}
return (
toolNameOrDisplayName.test(p.toolName) ||
toolNameOrDisplayName.test(p.toolDisplayName || '')
);
};
let matched: PendingConfirmation | undefined;
await this.waitUntil(
() => {
matched = this.getPendingConfirmations().find(matches);
return !!matched;
},
{
timeout,
message: `Timed out waiting for pending confirmation: ${toolNameOrDisplayName || 'any'}. Current pending: ${this.getPendingConfirmations()
.map((p) => p.toolName)
.join(', ')}`,
},
);
this.lastAwaitedConfirmation = matched;
return matched!;
}
async resolveTool(
toolNameOrDisplayName: string | RegExp | PendingConfirmation,
outcome: ToolConfirmationOutcome = ToolConfirmationOutcome.ProceedOnce,
): Promise<void> {
if (!this.config) throw new Error('AppRig not initialized');
const messageBus = this.config.getMessageBus();
let pending: PendingConfirmation;
if (
typeof toolNameOrDisplayName === 'object' &&
'correlationId' in toolNameOrDisplayName
) {
pending = toolNameOrDisplayName;
} else {
pending = await this.waitForPendingConfirmation(toolNameOrDisplayName);
}
await act(async () => {
this.pendingConfirmations.delete(pending.correlationId);
if (this.breakpointTools.has(pending.toolName)) {
this.removeToolPolicy(pending.toolName);
}
// eslint-disable-next-line @typescript-eslint/no-floating-promises
messageBus.publish({
type: MessageBusType.TOOL_CONFIRMATION_RESPONSE,
correlationId: pending.correlationId,
confirmed: outcome !== ToolConfirmationOutcome.Cancel,
outcome,
});
});
await act(async () => {
await new Promise((resolve) => setTimeout(resolve, 100));
});
}
async resolveAwaitedTool(
outcome: ToolConfirmationOutcome = ToolConfirmationOutcome.ProceedOnce,
): Promise<void> {
if (!this.lastAwaitedConfirmation) {
throw new Error('No tool has been awaited yet');
}
await this.resolveTool(this.lastAwaitedConfirmation, outcome);
this.lastAwaitedConfirmation = undefined;
}
async addUserHint(hint: string) {
if (!this.config) throw new Error('AppRig not initialized');
await act(async () => {
this.config!.addUserHint(hint);
});
}
getConfig(): Config {
if (!this.config) throw new Error('AppRig not initialized');
return this.config;
}
async type(text: string) {
if (!this.renderResult) throw new Error('AppRig not initialized');
await act(async () => {
this.renderResult!.stdin.write(text);
});
await act(async () => {
await new Promise((resolve) => setTimeout(resolve, 50));
});
}
async pressEnter() {
await this.type('\r');
}
async pressKey(key: string) {
if (!this.renderResult) throw new Error('AppRig not initialized');
await act(async () => {
this.renderResult!.stdin.write(key);
});
await act(async () => {
await new Promise((resolve) => setTimeout(resolve, 50));
});
}
get lastFrame() {
if (!this.renderResult) return '';
return stripAnsi(this.renderResult.lastFrame() || '');
}
getStaticOutput() {
if (!this.renderResult) return '';
return stripAnsi(this.renderResult.stdout.lastFrame() || '');
}
async waitForOutput(pattern: string | RegExp, timeout = 30000) {
await this.waitUntil(
() => {
const frame = this.lastFrame;
return typeof pattern === 'string'
? frame.includes(pattern)
: pattern.test(frame);
},
{
timeout,
message: `Timed out waiting for output: ${pattern}\nLast frame:\n${this.lastFrame}`,
},
);
}
async waitForIdle(timeout = 20000) {
await this.waitForOutput('Type your message', timeout);
}
async sendMessage(text: string) {
await this.type(text);
await this.pressEnter();
}
async unmount() {
// Poison the chat recording service to prevent late writes to the test directory
if (this.config) {
const recordingService = this.config
.getGeminiClient()
?.getChatRecordingService();
if (recordingService) {
// eslint-disable-next-line @typescript-eslint/no-explicit-any, @typescript-eslint/no-unsafe-type-assertion
(recordingService as any).conversationFile = null;
}
}
if (this.renderResult) {
this.renderResult.unmount();
}
await act(async () => {
await new Promise((resolve) => setTimeout(resolve, 500));
});
vi.unstubAllEnvs();
coreEvents.removeAllListeners();
coreEvents.drainBacklogs();
MockShellExecutionService.reset();
ideContextStore.clear();
// Forcefully clear IdeClient singleton promise
// eslint-disable-next-line @typescript-eslint/no-explicit-any, @typescript-eslint/no-unsafe-type-assertion
(IdeClient as any).instancePromise = null;
startupProfiler.clear();
vi.clearAllMocks();
this.config = undefined;
this.renderResult = undefined;
if (this.testDir && fs.existsSync(this.testDir)) {
try {
fs.rmSync(this.testDir, { recursive: true, force: true });
} catch (e) {
debugLogger.warn(
`Failed to cleanup test directory ${this.testDir}:`,
e,
);
}
}
}
}
@@ -0,0 +1,140 @@
/**
* @license
* Copyright 2026 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import { vi } from 'vitest';
import type {
ShellExecutionHandle,
ShellExecutionResult,
ShellOutputEvent,
ShellExecutionConfig,
} from '@google/gemini-cli-core';
export interface MockShellCommand {
command: string | RegExp;
result: Partial<ShellExecutionResult>;
events?: ShellOutputEvent[];
}
type ShellExecutionServiceExecute = (
commandToExecute: string,
cwd: string,
onOutputEvent: (event: ShellOutputEvent) => void,
abortSignal: AbortSignal,
shouldUseNodePty: boolean,
shellExecutionConfig: ShellExecutionConfig,
) => Promise<ShellExecutionHandle>;
export class MockShellExecutionService {
private static mockCommands: MockShellCommand[] = [];
private static originalExecute: ShellExecutionServiceExecute | undefined;
private static passthroughEnabled = false;
/**
* Registers the original implementation to allow falling back to real shell execution.
*/
static setOriginalImplementation(
implementation: ShellExecutionServiceExecute,
) {
this.originalExecute = implementation;
}
/**
* Enables or disables passthrough to the real implementation when no mock matches.
*/
static setPassthrough(enabled: boolean) {
this.passthroughEnabled = enabled;
}
static setMockCommands(commands: MockShellCommand[]) {
this.mockCommands = commands;
}
static reset() {
this.mockCommands = [];
this.passthroughEnabled = false;
this.writeToPty.mockClear();
this.kill.mockClear();
this.background.mockClear();
this.resizePty.mockClear();
this.scrollPty.mockClear();
}
static async execute(
commandToExecute: string,
cwd: string,
onOutputEvent: (event: ShellOutputEvent) => void,
abortSignal: AbortSignal,
shouldUseNodePty: boolean,
shellExecutionConfig: ShellExecutionConfig,
): Promise<ShellExecutionHandle> {
const mock = this.mockCommands.find((m) =>
typeof m.command === 'string'
? m.command === commandToExecute
: m.command.test(commandToExecute),
);
const pid = Math.floor(Math.random() * 10000);
if (mock) {
if (mock.events) {
for (const event of mock.events) {
onOutputEvent(event);
}
}
const result: ShellExecutionResult = {
rawOutput: Buffer.from(mock.result.output || ''),
output: mock.result.output || '',
exitCode: mock.result.exitCode ?? 0,
signal: mock.result.signal ?? null,
error: mock.result.error ?? null,
aborted: false,
pid,
executionMethod: 'none',
...mock.result,
};
return {
pid,
result: Promise.resolve(result),
};
}
if (this.passthroughEnabled && this.originalExecute) {
return this.originalExecute(
commandToExecute,
cwd,
onOutputEvent,
abortSignal,
shouldUseNodePty,
shellExecutionConfig,
);
}
return {
pid,
result: Promise.resolve({
rawOutput: Buffer.from(''),
output: `Command not found: ${commandToExecute}`,
exitCode: 127,
signal: null,
error: null,
aborted: false,
pid,
executionMethod: 'none',
}),
};
}
static writeToPty = vi.fn();
static isPtyActive = vi.fn(() => false);
static onExit = vi.fn(() => () => {});
static kill = vi.fn();
static background = vi.fn();
static subscribe = vi.fn(() => () => {});
static resizePty = vi.fn();
static scrollPty = vi.fn();
}
@@ -0,0 +1 @@
{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"Hello! How can I help you today?"}],"role":"model"},"finishReason":"STOP"}]}]}
@@ -0,0 +1,4 @@
{"method":"generateContentStream","response":[{"candidates":[{"content":{"role":"model","parts":[{"text":"Starting a long task. First, I'll list the files."},{"functionCall":{"name":"list_directory","args":{"dir_path":"."}}}]},"finishReason":"STOP"}]}]}
{"method":"generateContent","response":{"candidates":[{"content":{"role":"model","parts":[{"text":"ACK: I will focus on .txt files now."}]},"finishReason":"STOP"}]}}
{"method":"generateContentStream","response":[{"candidates":[{"content":{"role":"model","parts":[{"text":"I see the files. Since you want me to focus on .txt files, I will read file1.txt."},{"functionCall":{"name":"read_file","args":{"file_path":"file1.txt"}}}]},"finishReason":"STOP"}]}]}
{"method":"generateContentStream","response":[{"candidates":[{"content":{"role":"model","parts":[{"text":"I have read file1.txt. Task complete."}]},"finishReason":"STOP"}]}]}
+43 -32
View File
@@ -33,6 +33,7 @@ import { makeFakeConfig, type Config } from '@google/gemini-cli-core';
import { FakePersistentState } from './persistentStateFake.js';
import { AppContext, type AppState } from '../ui/contexts/AppContext.js';
import { createMockSettings } from './settings.js';
import { SessionStatsProvider } from '../ui/contexts/SessionContext.js';
export const persistentStateMock = new FakePersistentState();
@@ -160,6 +161,8 @@ const baseMockUiState = {
proQuotaRequest: null,
validationRequest: null,
},
hintMode: false,
hintBuffer: '',
};
export const mockAppState: AppState = {
@@ -209,6 +212,10 @@ const mockUIActions: UIActions = {
setActiveBackgroundShellPid: vi.fn(),
setIsBackgroundShellListOpen: vi.fn(),
setAuthContext: vi.fn(),
onHintInput: vi.fn(),
onHintBackspace: vi.fn(),
onHintClear: vi.fn(),
onHintSubmit: vi.fn(),
handleRestart: vi.fn(),
handleNewAgentsSelect: vi.fn(),
};
@@ -306,39 +313,43 @@ export const renderWithProviders = (
<UIStateContext.Provider value={finalUiState}>
<VimModeProvider settings={finalSettings}>
<ShellFocusContext.Provider value={shellFocus}>
<StreamingContext.Provider value={finalUiState.streamingState}>
<UIActionsContext.Provider value={finalUIActions}>
<ToolActionsProvider
config={config}
toolCalls={allToolCalls}
>
<AskUserActionsProvider
request={null}
onSubmit={vi.fn()}
onCancel={vi.fn()}
<SessionStatsProvider>
<StreamingContext.Provider
value={finalUiState.streamingState}
>
<UIActionsContext.Provider value={finalUIActions}>
<ToolActionsProvider
config={config}
toolCalls={allToolCalls}
>
<KeypressProvider>
<MouseProvider
mouseEventsEnabled={mouseEventsEnabled}
>
<TerminalProvider>
<ScrollProvider>
<Box
width={terminalWidth}
flexShrink={0}
flexGrow={0}
flexDirection="column"
>
{component}
</Box>
</ScrollProvider>
</TerminalProvider>
</MouseProvider>
</KeypressProvider>
</AskUserActionsProvider>
</ToolActionsProvider>
</UIActionsContext.Provider>
</StreamingContext.Provider>
<AskUserActionsProvider
request={null}
onSubmit={vi.fn()}
onCancel={vi.fn()}
>
<KeypressProvider>
<MouseProvider
mouseEventsEnabled={mouseEventsEnabled}
>
<TerminalProvider>
<ScrollProvider>
<Box
width={terminalWidth}
flexShrink={0}
flexGrow={0}
flexDirection="column"
>
{component}
</Box>
</ScrollProvider>
</TerminalProvider>
</MouseProvider>
</KeypressProvider>
</AskUserActionsProvider>
</ToolActionsProvider>
</UIActionsContext.Provider>
</StreamingContext.Provider>
</SessionStatsProvider>
</ShellFocusContext.Provider>
</VimModeProvider>
</UIStateContext.Provider>
+95
View File
@@ -94,6 +94,10 @@ import { basename } from 'node:path';
import { computeTerminalTitle } from '../utils/windowTitle.js';
import { useTextBuffer } from './components/shared/text-buffer.js';
import { useLogger } from './hooks/useLogger.js';
import {
buildUserSteeringHintPrompt,
generateSteeringAckMessage,
} from '@google/gemini-cli-core';
import { useGeminiStream } from './hooks/useGeminiStream.js';
import { type BackgroundShell } from './hooks/shellCommandProcessor.js';
import { useVim } from './hooks/vim.js';
@@ -603,6 +607,7 @@ export const AppContainer = (props: AppContainerProps) => {
apiKeyDefaultValue,
reloadApiKey,
} = useAuthCommand(settings, config, initializationResult.authError);
const [authContext, setAuthContext] = useState<{ requiresRestart?: boolean }>(
{},
);
@@ -963,6 +968,19 @@ Logging in with Google... Restarting Gemini CLI to continue.
}
}, [pendingRestorePrompt, inputHistory, historyManager.history]);
const lastProcessedHintIndexRef = useRef<number>(-1);
const consumePendingHints = useCallback(() => {
const userHints = config.getUserHintsAfter(
lastProcessedHintIndexRef.current,
);
if (userHints.length === 0) {
return null;
}
lastProcessedHintIndexRef.current = config.getLatestHintIndex();
return userHints.join('\n');
}, [config]);
const {
streamingState,
submitQuery,
@@ -1001,6 +1019,7 @@ Logging in with Google... Restarting Gemini CLI to continue.
terminalWidth,
terminalHeight,
embeddedShellFocused,
consumePendingHints,
);
toggleBackgroundShellRef.current = toggleBackgroundShell;
@@ -1103,10 +1122,38 @@ Logging in with Google... Restarting Gemini CLI to continue.
],
);
const handleHintSubmit = useCallback(
(hint: string) => {
const trimmed = hint.trim();
if (!trimmed) {
return;
}
config.addUserHint(trimmed);
// Render hints with a distinct style.
historyManager.addItem({
type: 'hint',
text: trimmed,
} as Omit<HistoryItem, 'id'>);
},
[config, historyManager],
);
const handleFinalSubmit = useCallback(
async (submittedValue: string) => {
const isSlash = isSlashCommand(submittedValue.trim());
const isIdle = streamingState === StreamingState.Idle;
const isAgentRunning =
streamingState === StreamingState.Responding ||
isToolExecuting([
...pendingSlashCommandHistoryItems,
...pendingGeminiHistoryItems,
]);
if (isAgentRunning && !isSlash) {
handleHintSubmit(submittedValue);
addInput(submittedValue);
return;
}
if (isSlash || (isIdle && isMcpReady)) {
if (!isSlash) {
@@ -1148,7 +1195,10 @@ Logging in with Google... Restarting Gemini CLI to continue.
isMcpReady,
streamingState,
messageQueue.length,
pendingSlashCommandHistoryItems,
pendingGeminiHistoryItems,
config,
handleHintSubmit,
],
);
@@ -1814,6 +1864,45 @@ Logging in with Google... Restarting Gemini CLI to continue.
[pendingSlashCommandHistoryItems, pendingGeminiHistoryItems],
);
useEffect(() => {
if (
!isConfigInitialized ||
streamingState !== StreamingState.Idle ||
!isMcpReady ||
isToolAwaitingConfirmation(pendingHistoryItems)
) {
return;
}
const pendingHint = consumePendingHints();
if (!pendingHint) {
return;
}
const geminiClient = config.getGeminiClient();
void generateSteeringAckMessage(geminiClient, pendingHint).then(
(ackText) => {
historyManager.addItem({
type: 'info',
icon: '· ',
color: 'gray',
marginBottom: 1,
text: ackText,
} as Omit<HistoryItem, 'id'>);
},
);
void submitQuery([{ text: buildUserSteeringHintPrompt(pendingHint) }]);
}, [
config,
historyManager,
isConfigInitialized,
isMcpReady,
streamingState,
submitQuery,
consumePendingHints,
pendingHistoryItems,
]);
const allToolCalls = useMemo(
() =>
pendingHistoryItems
@@ -1975,6 +2064,8 @@ Logging in with Google... Restarting Gemini CLI to continue.
isBackgroundShellListOpen,
adminSettingsChanged,
newAgents,
hintMode: false,
hintBuffer: '',
}),
[
isThemeDialogOpen,
@@ -2137,6 +2228,10 @@ Logging in with Google... Restarting Gemini CLI to continue.
setActiveBackgroundShellPid,
setIsBackgroundShellListOpen,
setAuthContext,
onHintInput: () => {},
onHintBackspace: () => {},
onHintClear: () => {},
onHintSubmit: () => {},
handleRestart: async () => {
if (process.send) {
const remoteSettings = config.getRemoteAdminSettings();
@@ -50,6 +50,7 @@ export const DialogManager = ({
const uiState = useUIState();
const uiActions = useUIActions();
const {
constrainHeight,
terminalHeight,
+2 -1
View File
@@ -71,7 +71,8 @@ export const Footer: React.FC = () => {
const justifyContent = hideCWD && hideModelInfo ? 'center' : 'space-between';
const displayVimMode = vimEnabled ? vimMode : undefined;
const showDebugProfiler = debugMode || isDevelopment;
const showDebugProfiler =
debugMode || (isDevelopment && settings.merged.general.devtools);
return (
<Box
@@ -96,6 +96,7 @@ describe('<Header />', () => {
},
background: {
primary: '',
hintMode: '',
diff: { added: '', removed: '' },
},
border: {
@@ -44,6 +44,18 @@ describe('<HistoryItemDisplay />', () => {
expect(lastFrame()).toContain('Hello');
});
it('renders HintMessage for "hint" type', () => {
const item: HistoryItem = {
...baseItem,
type: 'hint',
text: 'Try using ripgrep first',
};
const { lastFrame } = renderWithProviders(
<HistoryItemDisplay {...baseItem} item={item} />,
);
expect(lastFrame()).toContain('Try using ripgrep first');
});
it('renders UserMessage for "user" type with slash command', () => {
const item: HistoryItem = {
...baseItem,
@@ -35,6 +35,7 @@ import { ChatList } from './views/ChatList.js';
import { HooksList } from './views/HooksList.js';
import { ModelMessage } from './messages/ModelMessage.js';
import { ThinkingMessage } from './messages/ThinkingMessage.js';
import { HintMessage } from './messages/HintMessage.js';
import { getInlineThinkingMode } from '../utils/inlineThinkingMode.js';
import { useSettings } from '../contexts/SettingsContext.js';
@@ -71,6 +72,9 @@ export const HistoryItemDisplay: React.FC<HistoryItemDisplayProps> = ({
{itemForDisplay.type === 'thinking' && inlineThinkingMode !== 'off' && (
<ThinkingMessage thought={itemForDisplay.thought} />
)}
{itemForDisplay.type === 'hint' && (
<HintMessage text={itemForDisplay.text} />
)}
{itemForDisplay.type === 'user' && (
<UserMessage text={itemForDisplay.text} width={terminalWidth} />
)}
@@ -102,6 +106,7 @@ export const HistoryItemDisplay: React.FC<HistoryItemDisplayProps> = ({
text={itemForDisplay.text}
icon={itemForDisplay.icon}
color={itemForDisplay.color}
marginBottom={itemForDisplay.marginBottom}
/>
)}
{itemForDisplay.type === 'warning' && (
@@ -238,7 +238,7 @@ export const InputPrompt: React.FC<InputPromptProps> = ({
]);
const [expandedSuggestionIndex, setExpandedSuggestionIndex] =
useState<number>(-1);
const shellHistory = useShellHistory(config.getProjectRoot());
const shellHistory = useShellHistory(config.getProjectRoot(), config.storage);
const shellHistoryData = shellHistory.history;
const completion = useCommandCompletion({
@@ -0,0 +1,53 @@
/**
* @license
* Copyright 2026 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import type React from 'react';
import { Text, Box } from 'ink';
import { theme } from '../../semantic-colors.js';
import { SCREEN_READER_USER_PREFIX } from '../../textConstants.js';
import { HalfLinePaddedBox } from '../shared/HalfLinePaddedBox.js';
import { useConfig } from '../../contexts/ConfigContext.js';
interface HintMessageProps {
text: string;
}
export const HintMessage: React.FC<HintMessageProps> = ({ text }) => {
const prefix = '💡 ';
const prefixWidth = prefix.length;
const config = useConfig();
const useBackgroundColor = config.getUseBackgroundColor();
return (
<HalfLinePaddedBox
backgroundBaseColor={theme.text.accent}
backgroundOpacity={0.1}
useBackgroundColor={useBackgroundColor}
>
<Box
flexDirection="row"
paddingY={0}
marginY={useBackgroundColor ? 0 : 1}
paddingX={useBackgroundColor ? 1 : 0}
alignSelf="flex-start"
>
<Box width={prefixWidth} flexShrink={0}>
<Text
color={theme.text.accent}
aria-label={SCREEN_READER_USER_PREFIX}
>
{prefix}
</Text>
</Box>
<Box flexGrow={1}>
<Text wrap="wrap" italic color={theme.text.accent}>
{`Steering Hint: ${text}`}
</Text>
</Box>
</Box>
</HalfLinePaddedBox>
);
};
@@ -13,19 +13,21 @@ interface InfoMessageProps {
text: string;
icon?: string;
color?: string;
marginBottom?: number;
}
export const InfoMessage: React.FC<InfoMessageProps> = ({
text,
icon,
color,
marginBottom,
}) => {
color ??= theme.status.warning;
const prefix = icon ?? ' ';
const prefixWidth = prefix.length;
return (
<Box flexDirection="row" marginTop={1}>
<Box flexDirection="row" marginTop={1} marginBottom={marginBottom ?? 0}>
<Box width={prefixWidth}>
<Text color={color}>{prefix}</Text>
</Box>
@@ -73,6 +73,10 @@ export interface UIActions {
setActiveBackgroundShellPid: (pid: number) => void;
setIsBackgroundShellListOpen: (isOpen: boolean) => void;
setAuthContext: (context: { requiresRestart?: boolean }) => void;
onHintInput: (char: string) => void;
onHintBackspace: () => void;
onHintClear: () => void;
onHintSubmit: (hint: string) => void;
handleRestart: () => void;
handleNewAgentsSelect: (choice: NewAgentsChoice) => Promise<void>;
}
@@ -173,6 +173,8 @@ export interface UIState {
isBackgroundShellListOpen: boolean;
adminSettingsChanged: boolean;
newAgents: AgentDefinition[] | null;
hintMode: boolean;
hintBuffer: string;
transientMessage: {
text: string;
type: TransientMessageType;
@@ -56,6 +56,11 @@ const MockedGeminiClientClass = vi.hoisted(() =>
this.startChat = mockStartChat;
this.sendMessageStream = mockSendMessageStream;
this.addHistory = vi.fn();
this.generateContent = vi.fn().mockResolvedValue({
candidates: [
{ content: { parts: [{ text: 'Got it. Focusing on tests only.' }] } },
],
});
this.getCurrentSequenceModel = vi.fn().mockReturnValue('test-model');
this.getChat = vi.fn().mockReturnValue({
recordCompletedToolCalls: vi.fn(),
@@ -152,13 +157,17 @@ vi.mock('./useLogger.js', () => ({
const mockStartNewPrompt = vi.fn();
const mockAddUsage = vi.fn();
vi.mock('../contexts/SessionContext.js', () => ({
useSessionStats: vi.fn(() => ({
startNewPrompt: mockStartNewPrompt,
addUsage: mockAddUsage,
getPromptCount: vi.fn(() => 5),
})),
}));
vi.mock('../contexts/SessionContext.js', async (importOriginal) => {
const actual = (await importOriginal()) as any;
return {
...actual,
useSessionStats: vi.fn(() => ({
startNewPrompt: mockStartNewPrompt,
addUsage: mockAddUsage,
getPromptCount: vi.fn(() => 5),
})),
};
});
vi.mock('./slashCommandProcessor.js', () => ({
handleSlashCommand: vi.fn().mockReturnValue(false),
@@ -661,6 +670,113 @@ describe('useGeminiStream', () => {
);
});
it('should inject steering hint prompt for continuation', async () => {
const toolCallResponseParts: Part[] = [{ text: 'tool final response' }];
const completedToolCalls: TrackedToolCall[] = [
{
request: {
callId: 'call1',
name: 'tool1',
args: {},
isClientInitiated: false,
prompt_id: 'prompt-id-ack',
},
status: 'success',
responseSubmittedToGemini: false,
response: {
callId: 'call1',
responseParts: toolCallResponseParts,
errorType: undefined,
},
tool: {
displayName: 'MockTool',
},
invocation: {
getDescription: () => `Mock description`,
} as unknown as AnyToolInvocation,
} as TrackedCompletedToolCall,
];
mockSendMessageStream.mockReturnValue(
(async function* () {
yield {
type: ServerGeminiEventType.Content,
value: 'Applied the requested adjustment.',
};
})(),
);
let capturedOnComplete:
| ((completedTools: TrackedToolCall[]) => Promise<void>)
| null = null;
mockUseToolScheduler.mockImplementation((onComplete) => {
capturedOnComplete = onComplete;
return [
[],
mockScheduleToolCalls,
mockMarkToolsAsSubmitted,
vi.fn(),
mockCancelAllToolCalls,
0,
];
});
renderHookWithProviders(() =>
useGeminiStream(
new MockedGeminiClientClass(mockConfig),
[],
mockAddItem,
mockConfig,
mockLoadedSettings,
mockOnDebugMessage,
mockHandleSlashCommand,
false,
() => 'vscode' as EditorType,
() => {},
() => Promise.resolve(),
false,
() => {},
() => {},
() => {},
80,
24,
undefined,
() => 'focus on tests only',
),
);
await act(async () => {
if (capturedOnComplete) {
await new Promise((resolve) => setTimeout(resolve, 0));
await capturedOnComplete(completedToolCalls);
}
});
await waitFor(() => {
expect(mockSendMessageStream).toHaveBeenCalledTimes(1);
});
const sentParts = mockSendMessageStream.mock.calls[0][0] as Part[];
const injectedHintPart = sentParts[0] as { text?: string };
expect(injectedHintPart.text).toContain(
'User steering update: "focus on tests only"',
);
expect(injectedHintPart.text).toContain(
'Classify it as ADD_TASK, MODIFY_TASK, CANCEL_TASK, or EXTRA_CONTEXT.',
);
expect(injectedHintPart.text).toContain(
'Do not cancel/skip tasks unless the user explicitly cancels them.',
);
expect(
mockAddItem.mock.calls.some(
([item]) =>
item?.type === 'info' &&
typeof item.text === 'string' &&
item.text.includes('Got it. Focusing on tests only.'),
),
).toBe(true);
});
it('should handle all tool calls being cancelled', async () => {
const cancelledToolCalls: TrackedToolCall[] = [
{
@@ -32,6 +32,8 @@ import {
ValidationRequiredError,
coreEvents,
CoreEvent,
buildUserSteeringHintPrompt,
generateSteeringAckMessage,
} from '@google/gemini-cli-core';
import type {
Config,
@@ -81,6 +83,7 @@ import path from 'node:path';
import { useSessionStats } from '../contexts/SessionContext.js';
import { useKeypress } from './useKeypress.js';
import type { LoadedSettings } from '../../config/settings.js';
import { theme } from '../semantic-colors.js';
type ToolResponseWithParts = ToolCallResponseInfo & {
llmContent?: PartListUnion;
@@ -185,6 +188,7 @@ export const useGeminiStream = (
terminalWidth: number,
terminalHeight: number,
isShellFocused?: boolean,
consumeUserHint?: () => string | null,
) => {
const [initError, setInitError] = useState<string | null>(null);
const [retryStatus, setRetryStatus] = useState<RetryAttemptPayload | null>(
@@ -1561,6 +1565,28 @@ export const useGeminiStream = (
const responsesToSend: Part[] = geminiTools.flatMap(
(toolCall) => toolCall.response.responseParts,
);
if (consumeUserHint) {
const userHint = consumeUserHint();
if (userHint && userHint.trim().length > 0) {
const hintText = userHint.trim();
responsesToSend.unshift({
text: buildUserSteeringHintPrompt(hintText),
});
void generateSteeringAckMessage(geminiClient, hintText).then(
(ackText) => {
addItem({
type: 'info',
icon: '· ',
color: theme.text.secondary,
marginBottom: 1,
text: ackText,
} as Omit<HistoryItem, 'id'>);
},
);
}
}
const callIdsToMarkAsSubmitted = geminiTools.map(
(toolCall) => toolCall.request.callId,
);
@@ -1593,6 +1619,7 @@ export const useGeminiStream = (
modelSwitchedFromQuotaError,
addItem,
registerBackgroundShell,
consumeUserHint,
],
);
+16 -4
View File
@@ -79,14 +79,26 @@ export function useShellHistory(
const [historyFilePath, setHistoryFilePath] = useState<string | null>(null);
useEffect(() => {
let isMounted = true;
async function loadHistory() {
const filePath = await getHistoryFilePath(projectRoot, storage);
setHistoryFilePath(filePath);
const loadedHistory = await readHistoryFile(filePath);
setHistory(loadedHistory.reverse()); // Newest first
try {
const filePath = await getHistoryFilePath(projectRoot, storage);
if (!isMounted) return;
setHistoryFilePath(filePath);
const loadedHistory = await readHistoryFile(filePath);
if (!isMounted) return;
setHistory(loadedHistory.reverse()); // Newest first
} catch (error) {
if (isMounted) {
debugLogger.error('Error loading shell history:', error);
}
}
}
// eslint-disable-next-line @typescript-eslint/no-floating-promises
loadHistory();
return () => {
isMounted = false;
};
}, [projectRoot, storage]);
const addCommandToHistory = useCallback(
+1
View File
@@ -36,6 +36,7 @@ const noColorSemanticColors: SemanticColors = {
},
background: {
primary: '',
hintMode: '',
diff: {
added: '',
removed: '',
@@ -16,6 +16,7 @@ export interface SemanticColors {
};
background: {
primary: string;
hintMode: string;
diff: {
added: string;
removed: string;
@@ -48,6 +49,7 @@ export const lightSemanticColors: SemanticColors = {
},
background: {
primary: lightTheme.Background,
hintMode: '#E8E0F0',
diff: {
added: lightTheme.DiffAdded,
removed: lightTheme.DiffRemoved,
@@ -80,6 +82,7 @@ export const darkSemanticColors: SemanticColors = {
},
background: {
primary: darkTheme.Background,
hintMode: '#352A45',
diff: {
added: darkTheme.DiffAdded,
removed: darkTheme.DiffRemoved,
+2
View File
@@ -131,6 +131,7 @@ export class Theme {
},
background: {
primary: this.colors.Background,
hintMode: this.type === 'light' ? '#E8E0F0' : '#352A45',
diff: {
added: this.colors.DiffAdded,
removed: this.colors.DiffRemoved,
@@ -400,6 +401,7 @@ export function createCustomTheme(customTheme: CustomTheme): Theme {
},
background: {
primary: customTheme.background?.primary ?? colors.Background,
hintMode: 'magenta',
diff: {
added: customTheme.background?.diff?.added ?? colors.DiffAdded,
removed: customTheme.background?.diff?.removed ?? colors.DiffRemoved,
+8
View File
@@ -123,6 +123,7 @@ export type HistoryItemInfo = HistoryItemBase & {
text: string;
icon?: string;
color?: string;
marginBottom?: number;
};
export type HistoryItemError = HistoryItemBase & {
@@ -225,6 +226,11 @@ export type HistoryItemThinking = HistoryItemBase & {
thought: ThoughtSummary;
};
export type HistoryItemHint = HistoryItemBase & {
type: 'hint';
text: string;
};
export type HistoryItemChatList = HistoryItemBase & {
type: 'chat_list';
chats: ChatDetail[];
@@ -349,6 +355,7 @@ export type HistoryItemWithoutId =
| HistoryItemMcpStatus
| HistoryItemChatList
| HistoryItemThinking
| HistoryItemHint
| HistoryItemHooksList;
export type HistoryItem = HistoryItemWithoutId & { id: number };
@@ -374,6 +381,7 @@ export enum MessageType {
MCP_STATUS = 'mcp_status',
CHAT_LIST = 'chat_list',
HOOKS_LIST = 'hooks_list',
HINT = 'hint',
}
// Simplified message structure for internal feedback
+26 -1
View File
@@ -59,6 +59,7 @@ import { getVersion } from '../utils/version.js';
import { getToolCallContext } from '../utils/toolCallContext.js';
import { scheduleAgentTools } from './agent-scheduler.js';
import { DeadlineTimer } from '../utils/deadlineTimer.js';
import { formatUserHintsForModel } from '../utils/flashLiteHelper.js';
/** A callback function to report on agent activity. */
export type ActivityCallback = (activity: SubagentActivityEvent) => void;
@@ -462,7 +463,17 @@ export class LocalAgentExecutor<TOutput extends z.ZodTypeAny> {
const query = this.definition.promptConfig.query
? templateString(this.definition.promptConfig.query, augmentedInputs)
: DEFAULT_QUERY_STRING;
let currentMessage: Content = { role: 'user', parts: [{ text: query }] };
let lastProcessedHintIndex = this.runtimeContext.getLatestHintIndex();
const initialHints = this.runtimeContext.getUserHints();
const formattedInitialHints = formatUserHintsForModel(initialHints);
let currentMessage: Content = formattedInitialHints
? {
role: 'user',
parts: [{ text: formattedInitialHints }, { text: query }],
}
: { role: 'user', parts: [{ text: query }] };
while (true) {
// Check for termination conditions like max turns.
@@ -501,6 +512,20 @@ export class LocalAgentExecutor<TOutput extends z.ZodTypeAny> {
// If status is 'continue', update message for the next loop
currentMessage = turnResult.nextMessage;
// Check for new user steering hints
const newHints = this.runtimeContext.getUserHintsAfter(
lastProcessedHintIndex,
);
if (newHints.length > 0) {
const formattedHints = formatUserHintsForModel(newHints);
if (formattedHints) {
// Append hints to the current message (next turn)
currentMessage.parts ??= [];
currentMessage.parts.unshift({ text: formattedHints });
}
lastProcessedHintIndex = this.runtimeContext.getLatestHintIndex();
}
}
// === UNIFIED RECOVERY BLOCK ===
+32 -2
View File
@@ -64,6 +64,8 @@ export class SubagentTool extends BaseDeclarativeTool<AgentInputs, ToolResult> {
}
}
import { formatUserHintsForModel } from '../utils/flashLiteHelper.js';
class SubAgentInvocation extends BaseToolInvocation<AgentInputs, ToolResult> {
constructor(
params: AgentInputs,
@@ -88,7 +90,10 @@ class SubAgentInvocation extends BaseToolInvocation<AgentInputs, ToolResult> {
override async shouldConfirmExecute(
abortSignal: AbortSignal,
): Promise<ToolCallConfirmationDetails | false> {
const invocation = this.buildSubInvocation(this.definition, this.params);
const invocation = this.buildSubInvocation(
this.definition,
this.withUserHints(this.params),
);
return invocation.shouldConfirmExecute(abortSignal);
}
@@ -107,11 +112,36 @@ class SubAgentInvocation extends BaseToolInvocation<AgentInputs, ToolResult> {
);
}
const invocation = this.buildSubInvocation(this.definition, this.params);
const invocation = this.buildSubInvocation(
this.definition,
this.withUserHints(this.params),
);
return invocation.execute(signal, updateOutput);
}
private withUserHints(agentArgs: AgentInputs): AgentInputs {
if (this.definition.kind !== 'remote') {
return agentArgs;
}
const userHints = this.config.getUserHints();
const formattedHints = formatUserHintsForModel(userHints);
if (!formattedHints) {
return agentArgs;
}
const query = agentArgs['query'];
if (typeof query !== 'string' || query.trim().length === 0) {
return agentArgs;
}
return {
...agentArgs,
query: `${formattedHints}\n\n${query}`,
};
}
private buildSubInvocation(
definition: AgentDefinition,
agentArgs: AgentInputs,
+41 -5
View File
@@ -41,6 +41,7 @@ import type { SkillDefinition } from '../skills/skillLoader.js';
import type { McpClientManager } from '../tools/mcp-client-manager.js';
import { DEFAULT_MODEL_CONFIGS } from './defaultModelConfigs.js';
import { DEFAULT_GEMINI_MODEL } from './models.js';
import { Storage } from './storage.js';
vi.mock('fs', async (importOriginal) => {
const actual = await importOriginal<typeof import('fs')>();
@@ -279,16 +280,21 @@ describe('Server Config (config.ts)', () => {
await expect(config.initialize()).resolves.toBeUndefined();
});
it('should throw an error if initialized more than once', async () => {
it('should deduplicate multiple calls to initialize', async () => {
const config = new Config({
...baseParams,
checkpointing: false,
});
await expect(config.initialize()).resolves.toBeUndefined();
await expect(config.initialize()).rejects.toThrow(
'Config was already initialized',
);
const storageSpy = vi.spyOn(Storage.prototype, 'initialize');
await Promise.all([
config.initialize(),
config.initialize(),
config.initialize(),
]);
expect(storageSpy).toHaveBeenCalledTimes(1);
});
it('should await MCP initialization in non-interactive mode', async () => {
@@ -2583,4 +2589,34 @@ describe('syncPlanModeTools', () => {
expect(setToolsSpy).toHaveBeenCalled();
});
describe('user hints', () => {
it('stores trimmed hints and exposes them via indexing', () => {
const config = new Config(baseParams);
config.addUserHint(' first hint ');
config.addUserHint('second hint');
config.addUserHint(' ');
expect(config.getUserHints()).toEqual(['first hint', 'second hint']);
expect(config.getLatestHintIndex()).toBe(1);
expect(config.getUserHintsAfter(-1)).toEqual([
'first hint',
'second hint',
]);
expect(config.getUserHintsAfter(0)).toEqual(['second hint']);
expect(config.getUserHintsAfter(1)).toEqual([]);
});
it('tracks the last hint timestamp', () => {
const config = new Config(baseParams);
expect(config.getLastUserHintAt()).toBeNull();
config.addUserHint('hint');
const timestamp = config.getLastUserHintAt();
expect(timestamp).not.toBeNull();
expect(typeof timestamp).toBe('number');
});
});
});
+113 -80
View File
@@ -615,7 +615,7 @@ export class Config {
private readonly enablePromptCompletion: boolean = false;
private readonly truncateToolOutputThreshold: number;
private compressionTruncationCounter = 0;
private initialized: boolean = false;
private initPromise: Promise<void> | undefined;
readonly storage: Storage;
private readonly fileExclusions: FileExclusions;
private readonly eventEmitter?: EventEmitter;
@@ -668,7 +668,7 @@ export class Config {
private remoteAdminSettings: AdminControlsSettings | undefined;
private latestApiRequest: GenerateContentParameters | undefined;
private lastModeSwitchTime: number = Date.now();
private userHints: Array<{ text: string; timestamp: number }> = [];
private approvedPlanPath: string | undefined;
constructor(params: ConfigParameters) {
@@ -909,97 +909,100 @@ export class Config {
* Must only be called once, throws if called again.
*/
async initialize(): Promise<void> {
if (this.initialized) {
throw Error('Config was already initialized');
}
this.initialized = true;
await this.storage.initialize();
// Add pending directories to workspace context
for (const dir of this.pendingIncludeDirectories) {
this.workspaceContext.addDirectory(dir);
if (this.initPromise) {
return this.initPromise;
}
// Add plans directory to workspace context for plan file storage
if (this.planEnabled) {
const plansDir = this.storage.getProjectTempPlansDir();
await fs.promises.mkdir(plansDir, { recursive: true });
this.workspaceContext.addDirectory(plansDir);
}
this.initPromise = (async () => {
await this.storage.initialize();
// Initialize centralized FileDiscoveryService
const discoverToolsHandle = startupProfiler.start('discover_tools');
this.getFileService();
if (this.getCheckpointingEnabled()) {
await this.getGitService();
}
this.promptRegistry = new PromptRegistry();
this.resourceRegistry = new ResourceRegistry();
this.agentRegistry = new AgentRegistry(this);
await this.agentRegistry.initialize();
coreEvents.on(CoreEvent.AgentsRefreshed, this.onAgentsRefreshed);
this.toolRegistry = await this.createToolRegistry();
discoverToolsHandle?.end();
this.mcpClientManager = new McpClientManager(
this.clientVersion,
this.toolRegistry,
this,
this.eventEmitter,
);
// We do not await this promise so that the CLI can start up even if
// MCP servers are slow to connect.
const mcpInitialization = Promise.allSettled([
this.mcpClientManager.startConfiguredMcpServers(),
this.getExtensionLoader().start(this),
]).then((results) => {
for (const result of results) {
if (result.status === 'rejected') {
debugLogger.error('Error initializing MCP clients:', result.reason);
}
// Add pending directories to workspace context
for (const dir of this.pendingIncludeDirectories) {
this.workspaceContext.addDirectory(dir);
}
});
if (!this.interactive) {
await mcpInitialization;
}
// Add plans directory to workspace context for plan file storage
if (this.planEnabled) {
const plansDir = this.storage.getProjectTempPlansDir();
await fs.promises.mkdir(plansDir, { recursive: true });
this.workspaceContext.addDirectory(plansDir);
}
if (this.skillsSupport) {
this.getSkillManager().setAdminSettings(this.adminSkillsEnabled);
if (this.adminSkillsEnabled) {
await this.getSkillManager().discoverSkills(
this.storage,
this.getExtensions(),
this.isTrustedFolder(),
);
this.getSkillManager().setDisabledSkills(this.disabledSkills);
// Initialize centralized FileDiscoveryService
const discoverToolsHandle = startupProfiler.start('discover_tools');
this.getFileService();
if (this.getCheckpointingEnabled()) {
await this.getGitService();
}
this.promptRegistry = new PromptRegistry();
this.resourceRegistry = new ResourceRegistry();
// Re-register ActivateSkillTool to update its schema with the discovered enabled skill enums
if (this.getSkillManager().getSkills().length > 0) {
this.getToolRegistry().unregisterTool(ActivateSkillTool.Name);
this.getToolRegistry().registerTool(
new ActivateSkillTool(this, this.messageBus),
this.agentRegistry = new AgentRegistry(this);
await this.agentRegistry.initialize();
coreEvents.on(CoreEvent.AgentsRefreshed, this.onAgentsRefreshed);
this.toolRegistry = await this.createToolRegistry();
discoverToolsHandle?.end();
this.mcpClientManager = new McpClientManager(
this.clientVersion,
this.toolRegistry,
this,
this.eventEmitter,
);
// We do not await this promise so that the CLI can start up even if
// MCP servers are slow to connect.
const mcpInitialization = Promise.allSettled([
this.mcpClientManager.startConfiguredMcpServers(),
this.getExtensionLoader().start(this),
]).then((results) => {
for (const result of results) {
if (result.status === 'rejected') {
debugLogger.error('Error initializing MCP clients:', result.reason);
}
}
});
if (!this.interactive) {
await mcpInitialization;
}
if (this.skillsSupport) {
this.getSkillManager().setAdminSettings(this.adminSkillsEnabled);
if (this.adminSkillsEnabled) {
await this.getSkillManager().discoverSkills(
this.storage,
this.getExtensions(),
this.isTrustedFolder(),
);
this.getSkillManager().setDisabledSkills(this.disabledSkills);
// Re-register ActivateSkillTool to update its schema with the discovered enabled skill enums
if (this.getSkillManager().getSkills().length > 0) {
this.getToolRegistry().unregisterTool(ActivateSkillTool.Name);
this.getToolRegistry().registerTool(
new ActivateSkillTool(this, this.messageBus),
);
}
}
}
}
// Initialize hook system if enabled
if (this.getEnableHooks()) {
this.hookSystem = new HookSystem(this);
await this.hookSystem.initialize();
}
// Initialize hook system if enabled
if (this.getEnableHooks()) {
this.hookSystem = new HookSystem(this);
await this.hookSystem.initialize();
}
if (this.experimentalJitContext) {
this.contextManager = new ContextManager(this);
await this.contextManager.refresh();
}
if (this.experimentalJitContext) {
this.contextManager = new ContextManager(this);
await this.contextManager.refresh();
}
await this.geminiClient.initialize();
this.syncPlanModeTools();
await this.geminiClient.initialize();
this.syncPlanModeTools();
})();
return this.initPromise;
}
getContentGenerator(): ContentGenerator {
@@ -2486,6 +2489,36 @@ export class Config {
return this.hookSystem;
}
addUserHint(hint: string): void {
const trimmed = hint.trim();
if (trimmed.length === 0) {
return;
}
this.userHints.push({ text: trimmed, timestamp: Date.now() });
}
getUserHints(): string[] {
return this.userHints.map((h) => h.text);
}
getUserHintsAfter(index: number): string[] {
if (index < 0) {
return this.getUserHints();
}
return this.userHints.slice(index + 1).map((h) => h.text);
}
getLatestHintIndex(): number {
return this.userHints.length - 1;
}
getLastUserHintAt(): number | null {
if (this.userHints.length === 0) {
return null;
}
return this.userHints[this.userHints.length - 1].timestamp;
}
/**
* Get hooks configuration
*/
@@ -121,6 +121,19 @@ export const DEFAULT_MODEL_CONFIGS: ModelConfigServiceConfig = {
},
},
},
'flash-lite-helper': {
extends: 'base',
modelConfig: {
model: 'gemini-2.5-flash-lite',
generateContentConfig: {
temperature: 0.2,
maxOutputTokens: 120,
thinkingConfig: {
thinkingBudget: 0,
},
},
},
},
'edit-corrector': {
extends: 'base',
modelConfig: {
@@ -11,6 +11,7 @@ exports[`Core System Prompt (prompts.ts) > ApprovalMode in System Prompt > Appro
- **Idiomatic Changes:** When editing, understand the local context (imports, functions/classes) to ensure your changes integrate naturally and idiomatically.
- **Comments:** Add code comments sparingly. Focus on *why* something is done, especially for complex logic, rather than *what* is done. Only add high-value comments if necessary for clarity or if requested by the user. Do not edit comments that are separate from the code you are changing. *NEVER* talk to the user or describe your changes through comments.
- **Proactiveness:** Fulfill the user's request thoroughly. When adding features or fixing bugs, this includes adding tests to ensure quality. Consider all created files, especially tests, to be permanent artifacts unless the user says otherwise.
- **User Hints:** During execution, the user may provide real-time hints (marked as "User hint:" or "User hints:"). Treat these as high-priority but scope-preserving course corrections: apply the minimal plan change needed, keep unaffected user tasks active, and never cancel/skip tasks unless cancellation is explicit for those tasks. Hints may add new tasks, modify one or more tasks, cancel specific tasks, or provide extra context only. If scope is ambiguous, ask for clarification before dropping work.
- **Confirm Ambiguity/Expansion:** Do not take significant actions beyond the clear scope of the request without confirming with the user. If the user implies a change (e.g., reports a bug) without explicitly asking for a fix, **ask for confirmation first**. If asked *how* to do something, explain first, don't just do it.
- **Explaining Changes:** After completing a code modification or file operation *do not* provide summaries unless asked.
- **Do Not revert changes:** Do not revert changes to the codebase unless asked to do so by the user. Only revert changes made by you if they have resulted in an error or if the user has explicitly asked you to revert the changes.
@@ -142,6 +143,7 @@ exports[`Core System Prompt (prompts.ts) > ApprovalMode in System Prompt > Appro
- **Idiomatic Changes:** When editing, understand the local context (imports, functions/classes) to ensure your changes integrate naturally and idiomatically.
- **Comments:** Add code comments sparingly. Focus on *why* something is done, especially for complex logic, rather than *what* is done. Only add high-value comments if necessary for clarity or if requested by the user. Do not edit comments that are separate from the code you are changing. *NEVER* talk to the user or describe your changes through comments.
- **Proactiveness:** Fulfill the user's request thoroughly. When adding features or fixing bugs, this includes adding tests to ensure quality. Consider all created files, especially tests, to be permanent artifacts unless the user says otherwise.
- **User Hints:** During execution, the user may provide real-time hints (marked as "User hint:" or "User hints:"). Treat these as high-priority but scope-preserving course corrections: apply the minimal plan change needed, keep unaffected user tasks active, and never cancel/skip tasks unless cancellation is explicit for those tasks. Hints may add new tasks, modify one or more tasks, cancel specific tasks, or provide extra context only. If scope is ambiguous, ask for clarification before dropping work.
- **Confirm Ambiguity/Expansion:** Do not take significant actions beyond the clear scope of the request without confirming with the user. If the user implies a change (e.g., reports a bug) without explicitly asking for a fix, **ask for confirmation first**. If asked *how* to do something, explain first, don't just do it.
- **Explaining Changes:** After completing a code modification or file operation *do not* provide summaries unless asked.
- **Do Not revert changes:** Do not revert changes to the codebase unless asked to do so by the user. Only revert changes made by you if they have resulted in an error or if the user has explicitly asked you to revert the changes.
@@ -278,6 +280,7 @@ exports[`Core System Prompt (prompts.ts) > ApprovalMode in System Prompt > shoul
- **Idiomatic Changes:** When editing, understand the local context (imports, functions/classes) to ensure your changes integrate naturally and idiomatically.
- **Comments:** Add code comments sparingly. Focus on *why* something is done, especially for complex logic, rather than *what* is done. Only add high-value comments if necessary for clarity or if requested by the user. Do not edit comments that are separate from the code you are changing. *NEVER* talk to the user or describe your changes through comments.
- **Proactiveness:** Fulfill the user's request thoroughly. When adding features or fixing bugs, this includes adding tests to ensure quality. Consider all created files, especially tests, to be permanent artifacts unless the user says otherwise.
- **User Hints:** During execution, the user may provide real-time hints (marked as "User hint:" or "User hints:"). Treat these as high-priority but scope-preserving course corrections: apply the minimal plan change needed, keep unaffected user tasks active, and never cancel/skip tasks unless cancellation is explicit for those tasks. Hints may add new tasks, modify one or more tasks, cancel specific tasks, or provide extra context only. If scope is ambiguous, ask for clarification before dropping work.
- **Confirm Ambiguity/Expansion:** Do not take significant actions beyond the clear scope of the request without confirming with the user. If the user implies a change (e.g., reports a bug) without explicitly asking for a fix, **ask for confirmation first**. If asked *how* to do something, explain first, don't just do it.
- **Explaining Changes:** After completing a code modification or file operation *do not* provide summaries unless asked.
- **Do Not revert changes:** Do not revert changes to the codebase unless asked to do so by the user. Only revert changes made by you if they have resulted in an error or if the user has explicitly asked you to revert the changes.
@@ -390,6 +393,7 @@ exports[`Core System Prompt (prompts.ts) > ApprovalMode in System Prompt > shoul
- **Idiomatic Changes:** When editing, understand the local context (imports, functions/classes) to ensure your changes integrate naturally and idiomatically.
- **Comments:** Add code comments sparingly. Focus on *why* something is done, especially for complex logic, rather than *what* is done. Only add high-value comments if necessary for clarity or if requested by the user. Do not edit comments that are separate from the code you are changing. *NEVER* talk to the user or describe your changes through comments.
- **Proactiveness:** Fulfill the user's request thoroughly. When adding features or fixing bugs, this includes adding tests to ensure quality. Consider all created files, especially tests, to be permanent artifacts unless the user says otherwise.
- **User Hints:** During execution, the user may provide real-time hints (marked as "User hint:" or "User hints:"). Treat these as high-priority but scope-preserving course corrections: apply the minimal plan change needed, keep unaffected user tasks active, and never cancel/skip tasks unless cancellation is explicit for those tasks. Hints may add new tasks, modify one or more tasks, cancel specific tasks, or provide extra context only. If scope is ambiguous, ask for clarification before dropping work.
- **Confirm Ambiguity/Expansion:** Do not take significant actions beyond the clear scope of the request without confirming with the user. If the user implies a change (e.g., reports a bug) without explicitly asking for a fix, **ask for confirmation first**. If asked *how* to do something, explain first, don't just do it.
- **Explaining Changes:** After completing a code modification or file operation *do not* provide summaries unless asked.
- **Do Not revert changes:** Do not revert changes to the codebase unless asked to do so by the user. Only revert changes made by you if they have resulted in an error or if the user has explicitly asked you to revert the changes.
@@ -526,6 +530,7 @@ exports[`Core System Prompt (prompts.ts) > should append userMemory with separat
- **Technical Integrity:** You are responsible for the entire lifecycle: implementation, testing, and validation. Within the scope of your changes, prioritize readability and long-term maintainability by consolidating logic into clean abstractions rather than threading state across unrelated layers. Align strictly with the requested architectural direction, ensuring the final implementation is focused and free of redundant "just-in-case" alternatives. Validation is not merely running tests; it is the exhaustive process of ensuring that every aspect of your change—behavioral, structural, and stylistic—is correct and fully compatible with the broader project. For bug fixes, you must empirically reproduce the failure with a new test case or reproduction script before applying the fix.
- **Expertise & Intent Alignment:** Provide proactive technical opinions grounded in research while strictly adhering to the user's intended workflow. Distinguish between **Directives** (unambiguous requests for action or implementation) and **Inquiries** (requests for analysis, advice, or observations). Assume all requests are Inquiries unless they contain an explicit instruction to perform a task. For Inquiries, your scope is strictly limited to research and analysis; you may propose a solution or strategy, but you MUST NOT modify files until a corresponding Directive is issued. Do not initiate implementation based on observations of bugs or statements of fact. Once an Inquiry is resolved, or while waiting for a Directive, stop and wait for the next user instruction. For Directives, only clarify if critically underspecified; otherwise, work autonomously. You should only seek user intervention if you have exhausted all possible routes or if a proposed solution would take the workspace in a significantly different architectural direction.
- **Proactiveness:** When executing a Directive, persist through errors and obstacles by diagnosing failures in the execution phase and, if necessary, backtracking to the research or strategy phases to adjust your approach until a successful, verified outcome is achieved. Fulfill the user's request thoroughly, including adding tests when adding features or fixing bugs. Take reasonable liberties to fulfill broad goals while staying within the requested scope; however, prioritize simplicity and the removal of redundant logic over providing "just-in-case" alternatives that diverge from the established path.
- **User Hints:** During execution, the user may provide real-time hints (marked as "User hint:" or "User hints:"). Treat these as high-priority but scope-preserving course corrections: apply the minimal plan change needed, keep unaffected user tasks active, and never cancel/skip tasks unless cancellation is explicit for those tasks. Hints may add new tasks, modify one or more tasks, cancel specific tasks, or provide extra context only. If scope is ambiguous, ask for clarification before dropping work.
- **Confirm Ambiguity/Expansion:** Do not take significant actions beyond the clear scope of the request without confirming with the user. If the user implies a change (e.g., reports a bug) without explicitly asking for a fix, **ask for confirmation first**. If asked *how* to do something, explain first, don't just do it.
- **Explaining Changes:** After completing a code modification or file operation *do not* provide summaries unless asked.
- **Do Not revert changes:** Do not revert changes to the codebase unless asked to do so by the user. Only revert changes made by you if they have resulted in an error or if the user has explicitly asked you to revert the changes.
@@ -652,6 +657,7 @@ exports[`Core System Prompt (prompts.ts) > should handle CodebaseInvestigator wi
- **Technical Integrity:** You are responsible for the entire lifecycle: implementation, testing, and validation. Within the scope of your changes, prioritize readability and long-term maintainability by consolidating logic into clean abstractions rather than threading state across unrelated layers. Align strictly with the requested architectural direction, ensuring the final implementation is focused and free of redundant "just-in-case" alternatives. Validation is not merely running tests; it is the exhaustive process of ensuring that every aspect of your change—behavioral, structural, and stylistic—is correct and fully compatible with the broader project. For bug fixes, you must empirically reproduce the failure with a new test case or reproduction script before applying the fix.
- **Expertise & Intent Alignment:** Provide proactive technical opinions grounded in research while strictly adhering to the user's intended workflow. Distinguish between **Directives** (unambiguous requests for action or implementation) and **Inquiries** (requests for analysis, advice, or observations). Assume all requests are Inquiries unless they contain an explicit instruction to perform a task. For Inquiries, your scope is strictly limited to research and analysis; you may propose a solution or strategy, but you MUST NOT modify files until a corresponding Directive is issued. Do not initiate implementation based on observations of bugs or statements of fact. Once an Inquiry is resolved, or while waiting for a Directive, stop and wait for the next user instruction. For Directives, you must work autonomously as no further user input is available. You should only seek user intervention if you have exhausted all possible routes or if a proposed solution would take the workspace in a significantly different architectural direction.
- **Proactiveness:** When executing a Directive, persist through errors and obstacles by diagnosing failures in the execution phase and, if necessary, backtracking to the research or strategy phases to adjust your approach until a successful, verified outcome is achieved. Fulfill the user's request thoroughly, including adding tests when adding features or fixing bugs. Take reasonable liberties to fulfill broad goals while staying within the requested scope; however, prioritize simplicity and the removal of redundant logic over providing "just-in-case" alternatives that diverge from the established path.
- **User Hints:** During execution, the user may provide real-time hints (marked as "User hint:" or "User hints:"). Treat these as high-priority but scope-preserving course corrections: apply the minimal plan change needed, keep unaffected user tasks active, and never cancel/skip tasks unless cancellation is explicit for those tasks. Hints may add new tasks, modify one or more tasks, cancel specific tasks, or provide extra context only. If scope is ambiguous, ask for clarification before dropping work.
- **Handle Ambiguity/Expansion:** Do not take significant actions beyond the clear scope of the request. If the user implies a change (e.g., reports a bug) without explicitly asking for a fix, do not perform it automatically.
- **Explaining Changes:** After completing a code modification or file operation *do not* provide summaries unless asked.
- **Do Not revert changes:** Do not revert changes to the codebase unless asked to do so by the user. Only revert changes made by you if they have resulted in an error or if the user has explicitly asked you to revert the changes.
@@ -743,6 +749,7 @@ exports[`Core System Prompt (prompts.ts) > should handle CodebaseInvestigator wi
- **Technical Integrity:** You are responsible for the entire lifecycle: implementation, testing, and validation. Within the scope of your changes, prioritize readability and long-term maintainability by consolidating logic into clean abstractions rather than threading state across unrelated layers. Align strictly with the requested architectural direction, ensuring the final implementation is focused and free of redundant "just-in-case" alternatives. Validation is not merely running tests; it is the exhaustive process of ensuring that every aspect of your change—behavioral, structural, and stylistic—is correct and fully compatible with the broader project. For bug fixes, you must empirically reproduce the failure with a new test case or reproduction script before applying the fix.
- **Expertise & Intent Alignment:** Provide proactive technical opinions grounded in research while strictly adhering to the user's intended workflow. Distinguish between **Directives** (unambiguous requests for action or implementation) and **Inquiries** (requests for analysis, advice, or observations). Assume all requests are Inquiries unless they contain an explicit instruction to perform a task. For Inquiries, your scope is strictly limited to research and analysis; you may propose a solution or strategy, but you MUST NOT modify files until a corresponding Directive is issued. Do not initiate implementation based on observations of bugs or statements of fact. Once an Inquiry is resolved, or while waiting for a Directive, stop and wait for the next user instruction. For Directives, you must work autonomously as no further user input is available. You should only seek user intervention if you have exhausted all possible routes or if a proposed solution would take the workspace in a significantly different architectural direction.
- **Proactiveness:** When executing a Directive, persist through errors and obstacles by diagnosing failures in the execution phase and, if necessary, backtracking to the research or strategy phases to adjust your approach until a successful, verified outcome is achieved. Fulfill the user's request thoroughly, including adding tests when adding features or fixing bugs. Take reasonable liberties to fulfill broad goals while staying within the requested scope; however, prioritize simplicity and the removal of redundant logic over providing "just-in-case" alternatives that diverge from the established path.
- **User Hints:** During execution, the user may provide real-time hints (marked as "User hint:" or "User hints:"). Treat these as high-priority but scope-preserving course corrections: apply the minimal plan change needed, keep unaffected user tasks active, and never cancel/skip tasks unless cancellation is explicit for those tasks. Hints may add new tasks, modify one or more tasks, cancel specific tasks, or provide extra context only. If scope is ambiguous, ask for clarification before dropping work.
- **Handle Ambiguity/Expansion:** Do not take significant actions beyond the clear scope of the request. If the user implies a change (e.g., reports a bug) without explicitly asking for a fix, do not perform it automatically.
- **Explaining Changes:** After completing a code modification or file operation *do not* provide summaries unless asked.
- **Do Not revert changes:** Do not revert changes to the codebase unless asked to do so by the user. Only revert changes made by you if they have resulted in an error or if the user has explicitly asked you to revert the changes.
@@ -829,6 +836,7 @@ exports[`Core System Prompt (prompts.ts) > should handle git instructions when i
- **Idiomatic Changes:** When editing, understand the local context (imports, functions/classes) to ensure your changes integrate naturally and idiomatically.
- **Comments:** Add code comments sparingly. Focus on *why* something is done, especially for complex logic, rather than *what* is done. Only add high-value comments if necessary for clarity or if requested by the user. Do not edit comments that are separate from the code you are changing. *NEVER* talk to the user or describe your changes through comments.
- **Proactiveness:** Fulfill the user's request thoroughly. When adding features or fixing bugs, this includes adding tests to ensure quality. Consider all created files, especially tests, to be permanent artifacts unless the user says otherwise.
- **User Hints:** During execution, the user may provide real-time hints (marked as "User hint:" or "User hints:"). Treat these as high-priority but scope-preserving course corrections: apply the minimal plan change needed, keep unaffected user tasks active, and never cancel/skip tasks unless cancellation is explicit for those tasks. Hints may add new tasks, modify one or more tasks, cancel specific tasks, or provide extra context only. If scope is ambiguous, ask for clarification before dropping work.
- **Confirm Ambiguity/Expansion:** Do not take significant actions beyond the clear scope of the request without confirming with the user. If the user implies a change (e.g., reports a bug) without explicitly asking for a fix, **ask for confirmation first**. If asked *how* to do something, explain first, don't just do it.
- **Explaining Changes:** After completing a code modification or file operation *do not* provide summaries unless asked.
- **Do Not revert changes:** Do not revert changes to the codebase unless asked to do so by the user. Only revert changes made by you if they have resulted in an error or if the user has explicitly asked you to revert the changes.
@@ -941,6 +949,7 @@ exports[`Core System Prompt (prompts.ts) > should handle git instructions when i
- **Idiomatic Changes:** When editing, understand the local context (imports, functions/classes) to ensure your changes integrate naturally and idiomatically.
- **Comments:** Add code comments sparingly. Focus on *why* something is done, especially for complex logic, rather than *what* is done. Only add high-value comments if necessary for clarity or if requested by the user. Do not edit comments that are separate from the code you are changing. *NEVER* talk to the user or describe your changes through comments.
- **Proactiveness:** Fulfill the user's request thoroughly. When adding features or fixing bugs, this includes adding tests to ensure quality. Consider all created files, especially tests, to be permanent artifacts unless the user says otherwise.
- **User Hints:** During execution, the user may provide real-time hints (marked as "User hint:" or "User hints:"). Treat these as high-priority but scope-preserving course corrections: apply the minimal plan change needed, keep unaffected user tasks active, and never cancel/skip tasks unless cancellation is explicit for those tasks. Hints may add new tasks, modify one or more tasks, cancel specific tasks, or provide extra context only. If scope is ambiguous, ask for clarification before dropping work.
- **Confirm Ambiguity/Expansion:** Do not take significant actions beyond the clear scope of the request without confirming with the user. If the user implies a change (e.g., reports a bug) without explicitly asking for a fix, **ask for confirmation first**. If asked *how* to do something, explain first, don't just do it.
- **Explaining Changes:** After completing a code modification or file operation *do not* provide summaries unless asked.
- **Do Not revert changes:** Do not revert changes to the codebase unless asked to do so by the user. Only revert changes made by you if they have resulted in an error or if the user has explicitly asked you to revert the changes.
@@ -1071,6 +1080,7 @@ exports[`Core System Prompt (prompts.ts) > should include approved plan instruct
- **Idiomatic Changes:** When editing, understand the local context (imports, functions/classes) to ensure your changes integrate naturally and idiomatically.
- **Comments:** Add code comments sparingly. Focus on *why* something is done, especially for complex logic, rather than *what* is done. Only add high-value comments if necessary for clarity or if requested by the user. Do not edit comments that are separate from the code you are changing. *NEVER* talk to the user or describe your changes through comments.
- **Proactiveness:** Fulfill the user's request thoroughly. When adding features or fixing bugs, this includes adding tests to ensure quality. Consider all created files, especially tests, to be permanent artifacts unless the user says otherwise.
- **User Hints:** During execution, the user may provide real-time hints (marked as "User hint:" or "User hints:"). Treat these as high-priority but scope-preserving course corrections: apply the minimal plan change needed, keep unaffected user tasks active, and never cancel/skip tasks unless cancellation is explicit for those tasks. Hints may add new tasks, modify one or more tasks, cancel specific tasks, or provide extra context only. If scope is ambiguous, ask for clarification before dropping work.
- **Confirm Ambiguity/Expansion:** Do not take significant actions beyond the clear scope of the request without confirming with the user. If the user implies a change (e.g., reports a bug) without explicitly asking for a fix, **ask for confirmation first**. If asked *how* to do something, explain first, don't just do it.
- **Explaining Changes:** After completing a code modification or file operation *do not* provide summaries unless asked.
- **Do Not revert changes:** Do not revert changes to the codebase unless asked to do so by the user. Only revert changes made by you if they have resulted in an error or if the user has explicitly asked you to revert the changes.
@@ -1173,6 +1183,7 @@ exports[`Core System Prompt (prompts.ts) > should include available_skills when
- **Idiomatic Changes:** When editing, understand the local context (imports, functions/classes) to ensure your changes integrate naturally and idiomatically.
- **Comments:** Add code comments sparingly. Focus on *why* something is done, especially for complex logic, rather than *what* is done. Only add high-value comments if necessary for clarity or if requested by the user. Do not edit comments that are separate from the code you are changing. *NEVER* talk to the user or describe your changes through comments.
- **Proactiveness:** Fulfill the user's request thoroughly. When adding features or fixing bugs, this includes adding tests to ensure quality. Consider all created files, especially tests, to be permanent artifacts unless the user says otherwise.
- **User Hints:** During execution, the user may provide real-time hints (marked as "User hint:" or "User hints:"). Treat these as high-priority but scope-preserving course corrections: apply the minimal plan change needed, keep unaffected user tasks active, and never cancel/skip tasks unless cancellation is explicit for those tasks. Hints may add new tasks, modify one or more tasks, cancel specific tasks, or provide extra context only. If scope is ambiguous, ask for clarification before dropping work.
- **Confirm Ambiguity/Expansion:** Do not take significant actions beyond the clear scope of the request without confirming with the user. If the user implies a change (e.g., reports a bug) without explicitly asking for a fix, **ask for confirmation first**. If asked *how* to do something, explain first, don't just do it.
- **Explaining Changes:** After completing a code modification or file operation *do not* provide summaries unless asked.
- **Do Not revert changes:** Do not revert changes to the codebase unless asked to do so by the user. Only revert changes made by you if they have resulted in an error or if the user has explicitly asked you to revert the changes.
@@ -1303,6 +1314,7 @@ exports[`Core System Prompt (prompts.ts) > should include correct sandbox instru
- **Technical Integrity:** You are responsible for the entire lifecycle: implementation, testing, and validation. Within the scope of your changes, prioritize readability and long-term maintainability by consolidating logic into clean abstractions rather than threading state across unrelated layers. Align strictly with the requested architectural direction, ensuring the final implementation is focused and free of redundant "just-in-case" alternatives. Validation is not merely running tests; it is the exhaustive process of ensuring that every aspect of your change—behavioral, structural, and stylistic—is correct and fully compatible with the broader project. For bug fixes, you must empirically reproduce the failure with a new test case or reproduction script before applying the fix.
- **Expertise & Intent Alignment:** Provide proactive technical opinions grounded in research while strictly adhering to the user's intended workflow. Distinguish between **Directives** (unambiguous requests for action or implementation) and **Inquiries** (requests for analysis, advice, or observations). Assume all requests are Inquiries unless they contain an explicit instruction to perform a task. For Inquiries, your scope is strictly limited to research and analysis; you may propose a solution or strategy, but you MUST NOT modify files until a corresponding Directive is issued. Do not initiate implementation based on observations of bugs or statements of fact. Once an Inquiry is resolved, or while waiting for a Directive, stop and wait for the next user instruction. For Directives, only clarify if critically underspecified; otherwise, work autonomously. You should only seek user intervention if you have exhausted all possible routes or if a proposed solution would take the workspace in a significantly different architectural direction.
- **Proactiveness:** When executing a Directive, persist through errors and obstacles by diagnosing failures in the execution phase and, if necessary, backtracking to the research or strategy phases to adjust your approach until a successful, verified outcome is achieved. Fulfill the user's request thoroughly, including adding tests when adding features or fixing bugs. Take reasonable liberties to fulfill broad goals while staying within the requested scope; however, prioritize simplicity and the removal of redundant logic over providing "just-in-case" alternatives that diverge from the established path.
- **User Hints:** During execution, the user may provide real-time hints (marked as "User hint:" or "User hints:"). Treat these as high-priority but scope-preserving course corrections: apply the minimal plan change needed, keep unaffected user tasks active, and never cancel/skip tasks unless cancellation is explicit for those tasks. Hints may add new tasks, modify one or more tasks, cancel specific tasks, or provide extra context only. If scope is ambiguous, ask for clarification before dropping work.
- **Confirm Ambiguity/Expansion:** Do not take significant actions beyond the clear scope of the request without confirming with the user. If the user implies a change (e.g., reports a bug) without explicitly asking for a fix, **ask for confirmation first**. If asked *how* to do something, explain first, don't just do it.
- **Explaining Changes:** After completing a code modification or file operation *do not* provide summaries unless asked.
- **Do Not revert changes:** Do not revert changes to the codebase unless asked to do so by the user. Only revert changes made by you if they have resulted in an error or if the user has explicitly asked you to revert the changes.
@@ -1416,6 +1428,7 @@ exports[`Core System Prompt (prompts.ts) > should include correct sandbox instru
- **Technical Integrity:** You are responsible for the entire lifecycle: implementation, testing, and validation. Within the scope of your changes, prioritize readability and long-term maintainability by consolidating logic into clean abstractions rather than threading state across unrelated layers. Align strictly with the requested architectural direction, ensuring the final implementation is focused and free of redundant "just-in-case" alternatives. Validation is not merely running tests; it is the exhaustive process of ensuring that every aspect of your change—behavioral, structural, and stylistic—is correct and fully compatible with the broader project. For bug fixes, you must empirically reproduce the failure with a new test case or reproduction script before applying the fix.
- **Expertise & Intent Alignment:** Provide proactive technical opinions grounded in research while strictly adhering to the user's intended workflow. Distinguish between **Directives** (unambiguous requests for action or implementation) and **Inquiries** (requests for analysis, advice, or observations). Assume all requests are Inquiries unless they contain an explicit instruction to perform a task. For Inquiries, your scope is strictly limited to research and analysis; you may propose a solution or strategy, but you MUST NOT modify files until a corresponding Directive is issued. Do not initiate implementation based on observations of bugs or statements of fact. Once an Inquiry is resolved, or while waiting for a Directive, stop and wait for the next user instruction. For Directives, only clarify if critically underspecified; otherwise, work autonomously. You should only seek user intervention if you have exhausted all possible routes or if a proposed solution would take the workspace in a significantly different architectural direction.
- **Proactiveness:** When executing a Directive, persist through errors and obstacles by diagnosing failures in the execution phase and, if necessary, backtracking to the research or strategy phases to adjust your approach until a successful, verified outcome is achieved. Fulfill the user's request thoroughly, including adding tests when adding features or fixing bugs. Take reasonable liberties to fulfill broad goals while staying within the requested scope; however, prioritize simplicity and the removal of redundant logic over providing "just-in-case" alternatives that diverge from the established path.
- **User Hints:** During execution, the user may provide real-time hints (marked as "User hint:" or "User hints:"). Treat these as high-priority but scope-preserving course corrections: apply the minimal plan change needed, keep unaffected user tasks active, and never cancel/skip tasks unless cancellation is explicit for those tasks. Hints may add new tasks, modify one or more tasks, cancel specific tasks, or provide extra context only. If scope is ambiguous, ask for clarification before dropping work.
- **Confirm Ambiguity/Expansion:** Do not take significant actions beyond the clear scope of the request without confirming with the user. If the user implies a change (e.g., reports a bug) without explicitly asking for a fix, **ask for confirmation first**. If asked *how* to do something, explain first, don't just do it.
- **Explaining Changes:** After completing a code modification or file operation *do not* provide summaries unless asked.
- **Do Not revert changes:** Do not revert changes to the codebase unless asked to do so by the user. Only revert changes made by you if they have resulted in an error or if the user has explicitly asked you to revert the changes.
@@ -1529,6 +1542,7 @@ exports[`Core System Prompt (prompts.ts) > should include correct sandbox instru
- **Technical Integrity:** You are responsible for the entire lifecycle: implementation, testing, and validation. Within the scope of your changes, prioritize readability and long-term maintainability by consolidating logic into clean abstractions rather than threading state across unrelated layers. Align strictly with the requested architectural direction, ensuring the final implementation is focused and free of redundant "just-in-case" alternatives. Validation is not merely running tests; it is the exhaustive process of ensuring that every aspect of your change—behavioral, structural, and stylistic—is correct and fully compatible with the broader project. For bug fixes, you must empirically reproduce the failure with a new test case or reproduction script before applying the fix.
- **Expertise & Intent Alignment:** Provide proactive technical opinions grounded in research while strictly adhering to the user's intended workflow. Distinguish between **Directives** (unambiguous requests for action or implementation) and **Inquiries** (requests for analysis, advice, or observations). Assume all requests are Inquiries unless they contain an explicit instruction to perform a task. For Inquiries, your scope is strictly limited to research and analysis; you may propose a solution or strategy, but you MUST NOT modify files until a corresponding Directive is issued. Do not initiate implementation based on observations of bugs or statements of fact. Once an Inquiry is resolved, or while waiting for a Directive, stop and wait for the next user instruction. For Directives, only clarify if critically underspecified; otherwise, work autonomously. You should only seek user intervention if you have exhausted all possible routes or if a proposed solution would take the workspace in a significantly different architectural direction.
- **Proactiveness:** When executing a Directive, persist through errors and obstacles by diagnosing failures in the execution phase and, if necessary, backtracking to the research or strategy phases to adjust your approach until a successful, verified outcome is achieved. Fulfill the user's request thoroughly, including adding tests when adding features or fixing bugs. Take reasonable liberties to fulfill broad goals while staying within the requested scope; however, prioritize simplicity and the removal of redundant logic over providing "just-in-case" alternatives that diverge from the established path.
- **User Hints:** During execution, the user may provide real-time hints (marked as "User hint:" or "User hints:"). Treat these as high-priority but scope-preserving course corrections: apply the minimal plan change needed, keep unaffected user tasks active, and never cancel/skip tasks unless cancellation is explicit for those tasks. Hints may add new tasks, modify one or more tasks, cancel specific tasks, or provide extra context only. If scope is ambiguous, ask for clarification before dropping work.
- **Confirm Ambiguity/Expansion:** Do not take significant actions beyond the clear scope of the request without confirming with the user. If the user implies a change (e.g., reports a bug) without explicitly asking for a fix, **ask for confirmation first**. If asked *how* to do something, explain first, don't just do it.
- **Explaining Changes:** After completing a code modification or file operation *do not* provide summaries unless asked.
- **Do Not revert changes:** Do not revert changes to the codebase unless asked to do so by the user. Only revert changes made by you if they have resulted in an error or if the user has explicitly asked you to revert the changes.
@@ -1638,6 +1652,7 @@ exports[`Core System Prompt (prompts.ts) > should include planning phase suggest
- **Technical Integrity:** You are responsible for the entire lifecycle: implementation, testing, and validation. Within the scope of your changes, prioritize readability and long-term maintainability by consolidating logic into clean abstractions rather than threading state across unrelated layers. Align strictly with the requested architectural direction, ensuring the final implementation is focused and free of redundant "just-in-case" alternatives. Validation is not merely running tests; it is the exhaustive process of ensuring that every aspect of your change—behavioral, structural, and stylistic—is correct and fully compatible with the broader project. For bug fixes, you must empirically reproduce the failure with a new test case or reproduction script before applying the fix.
- **Expertise & Intent Alignment:** Provide proactive technical opinions grounded in research while strictly adhering to the user's intended workflow. Distinguish between **Directives** (unambiguous requests for action or implementation) and **Inquiries** (requests for analysis, advice, or observations). Assume all requests are Inquiries unless they contain an explicit instruction to perform a task. For Inquiries, your scope is strictly limited to research and analysis; you may propose a solution or strategy, but you MUST NOT modify files until a corresponding Directive is issued. Do not initiate implementation based on observations of bugs or statements of fact. Once an Inquiry is resolved, or while waiting for a Directive, stop and wait for the next user instruction. For Directives, only clarify if critically underspecified; otherwise, work autonomously. You should only seek user intervention if you have exhausted all possible routes or if a proposed solution would take the workspace in a significantly different architectural direction.
- **Proactiveness:** When executing a Directive, persist through errors and obstacles by diagnosing failures in the execution phase and, if necessary, backtracking to the research or strategy phases to adjust your approach until a successful, verified outcome is achieved. Fulfill the user's request thoroughly, including adding tests when adding features or fixing bugs. Take reasonable liberties to fulfill broad goals while staying within the requested scope; however, prioritize simplicity and the removal of redundant logic over providing "just-in-case" alternatives that diverge from the established path.
- **User Hints:** During execution, the user may provide real-time hints (marked as "User hint:" or "User hints:"). Treat these as high-priority but scope-preserving course corrections: apply the minimal plan change needed, keep unaffected user tasks active, and never cancel/skip tasks unless cancellation is explicit for those tasks. Hints may add new tasks, modify one or more tasks, cancel specific tasks, or provide extra context only. If scope is ambiguous, ask for clarification before dropping work.
- **Confirm Ambiguity/Expansion:** Do not take significant actions beyond the clear scope of the request without confirming with the user. If the user implies a change (e.g., reports a bug) without explicitly asking for a fix, **ask for confirmation first**. If asked *how* to do something, explain first, don't just do it.
- **Explaining Changes:** After completing a code modification or file operation *do not* provide summaries unless asked.
- **Do Not revert changes:** Do not revert changes to the codebase unless asked to do so by the user. Only revert changes made by you if they have resulted in an error or if the user has explicitly asked you to revert the changes.
@@ -1747,6 +1762,7 @@ exports[`Core System Prompt (prompts.ts) > should include sub-agents in XML for
- **Technical Integrity:** You are responsible for the entire lifecycle: implementation, testing, and validation. Within the scope of your changes, prioritize readability and long-term maintainability by consolidating logic into clean abstractions rather than threading state across unrelated layers. Align strictly with the requested architectural direction, ensuring the final implementation is focused and free of redundant "just-in-case" alternatives. Validation is not merely running tests; it is the exhaustive process of ensuring that every aspect of your change—behavioral, structural, and stylistic—is correct and fully compatible with the broader project. For bug fixes, you must empirically reproduce the failure with a new test case or reproduction script before applying the fix.
- **Expertise & Intent Alignment:** Provide proactive technical opinions grounded in research while strictly adhering to the user's intended workflow. Distinguish between **Directives** (unambiguous requests for action or implementation) and **Inquiries** (requests for analysis, advice, or observations). Assume all requests are Inquiries unless they contain an explicit instruction to perform a task. For Inquiries, your scope is strictly limited to research and analysis; you may propose a solution or strategy, but you MUST NOT modify files until a corresponding Directive is issued. Do not initiate implementation based on observations of bugs or statements of fact. Once an Inquiry is resolved, or while waiting for a Directive, stop and wait for the next user instruction. For Directives, only clarify if critically underspecified; otherwise, work autonomously. You should only seek user intervention if you have exhausted all possible routes or if a proposed solution would take the workspace in a significantly different architectural direction.
- **Proactiveness:** When executing a Directive, persist through errors and obstacles by diagnosing failures in the execution phase and, if necessary, backtracking to the research or strategy phases to adjust your approach until a successful, verified outcome is achieved. Fulfill the user's request thoroughly, including adding tests when adding features or fixing bugs. Take reasonable liberties to fulfill broad goals while staying within the requested scope; however, prioritize simplicity and the removal of redundant logic over providing "just-in-case" alternatives that diverge from the established path.
- **User Hints:** During execution, the user may provide real-time hints (marked as "User hint:" or "User hints:"). Treat these as high-priority but scope-preserving course corrections: apply the minimal plan change needed, keep unaffected user tasks active, and never cancel/skip tasks unless cancellation is explicit for those tasks. Hints may add new tasks, modify one or more tasks, cancel specific tasks, or provide extra context only. If scope is ambiguous, ask for clarification before dropping work.
- **Confirm Ambiguity/Expansion:** Do not take significant actions beyond the clear scope of the request without confirming with the user. If the user implies a change (e.g., reports a bug) without explicitly asking for a fix, **ask for confirmation first**. If asked *how* to do something, explain first, don't just do it.
- **Explaining Changes:** After completing a code modification or file operation *do not* provide summaries unless asked.
- **Do Not revert changes:** Do not revert changes to the codebase unless asked to do so by the user. Only revert changes made by you if they have resulted in an error or if the user has explicitly asked you to revert the changes.
@@ -1851,6 +1867,7 @@ exports[`Core System Prompt (prompts.ts) > should match snapshot on Windows 1`]
- **Idiomatic Changes:** When editing, understand the local context (imports, functions/classes) to ensure your changes integrate naturally and idiomatically.
- **Comments:** Add code comments sparingly. Focus on *why* something is done, especially for complex logic, rather than *what* is done. Only add high-value comments if necessary for clarity or if requested by the user. Do not edit comments that are separate from the code you are changing. *NEVER* talk to the user or describe your changes through comments.
- **Proactiveness:** Fulfill the user's request thoroughly. When adding features or fixing bugs, this includes adding tests to ensure quality. Consider all created files, especially tests, to be permanent artifacts unless the user says otherwise.
- **User Hints:** During execution, the user may provide real-time hints (marked as "User hint:" or "User hints:"). Treat these as high-priority but scope-preserving course corrections: apply the minimal plan change needed, keep unaffected user tasks active, and never cancel/skip tasks unless cancellation is explicit for those tasks. Hints may add new tasks, modify one or more tasks, cancel specific tasks, or provide extra context only. If scope is ambiguous, ask for clarification before dropping work.
- **Confirm Ambiguity/Expansion:** Do not take significant actions beyond the clear scope of the request without confirming with the user. If the user implies a change (e.g., reports a bug) without explicitly asking for a fix, **ask for confirmation first**. If asked *how* to do something, explain first, don't just do it.
- **Explaining Changes:** After completing a code modification or file operation *do not* provide summaries unless asked.
- **Do Not revert changes:** Do not revert changes to the codebase unless asked to do so by the user. Only revert changes made by you if they have resulted in an error or if the user has explicitly asked you to revert the changes.
@@ -1964,6 +1981,7 @@ exports[`Core System Prompt (prompts.ts) > should render hierarchical memory wit
- **Comments:** Add code comments sparingly. Focus on *why* something is done, especially for complex logic, rather than *what* is done. Only add high-value comments if necessary for clarity or if requested by the user. Do not edit comments that are separate from the code you are changing. *NEVER* talk to the user or describe your changes through comments.
- **Proactiveness:** Fulfill the user's request thoroughly. When adding features or fixing bugs, this includes adding tests to ensure quality. Consider all created files, especially tests, to be permanent artifacts unless the user says otherwise.
- **Conflict Resolution:** Instructions are provided in hierarchical context tags: \`<global_context>\`, \`<extension_context>\`, and \`<project_context>\`. In case of contradictory instructions, follow this priority: \`<project_context>\` (highest) > \`<extension_context>\` > \`<global_context>\` (lowest).
- **User Hints:** During execution, the user may provide real-time hints (marked as "User hint:" or "User hints:"). Treat these as high-priority but scope-preserving course corrections: apply the minimal plan change needed, keep unaffected user tasks active, and never cancel/skip tasks unless cancellation is explicit for those tasks. Hints may add new tasks, modify one or more tasks, cancel specific tasks, or provide extra context only. If scope is ambiguous, ask for clarification before dropping work.
- **Confirm Ambiguity/Expansion:** Do not take significant actions beyond the clear scope of the request without confirming with the user. If the user implies a change (e.g., reports a bug) without explicitly asking for a fix, **ask for confirmation first**. If asked *how* to do something, explain first, don't just do it.
- **Explaining Changes:** After completing a code modification or file operation *do not* provide summaries unless asked.
- **Do Not revert changes:** Do not revert changes to the codebase unless asked to do so by the user. Only revert changes made by you if they have resulted in an error or if the user has explicitly asked you to revert the changes.
@@ -2095,6 +2113,7 @@ exports[`Core System Prompt (prompts.ts) > should return the base prompt when us
- **Technical Integrity:** You are responsible for the entire lifecycle: implementation, testing, and validation. Within the scope of your changes, prioritize readability and long-term maintainability by consolidating logic into clean abstractions rather than threading state across unrelated layers. Align strictly with the requested architectural direction, ensuring the final implementation is focused and free of redundant "just-in-case" alternatives. Validation is not merely running tests; it is the exhaustive process of ensuring that every aspect of your change—behavioral, structural, and stylistic—is correct and fully compatible with the broader project. For bug fixes, you must empirically reproduce the failure with a new test case or reproduction script before applying the fix.
- **Expertise & Intent Alignment:** Provide proactive technical opinions grounded in research while strictly adhering to the user's intended workflow. Distinguish between **Directives** (unambiguous requests for action or implementation) and **Inquiries** (requests for analysis, advice, or observations). Assume all requests are Inquiries unless they contain an explicit instruction to perform a task. For Inquiries, your scope is strictly limited to research and analysis; you may propose a solution or strategy, but you MUST NOT modify files until a corresponding Directive is issued. Do not initiate implementation based on observations of bugs or statements of fact. Once an Inquiry is resolved, or while waiting for a Directive, stop and wait for the next user instruction. For Directives, only clarify if critically underspecified; otherwise, work autonomously. You should only seek user intervention if you have exhausted all possible routes or if a proposed solution would take the workspace in a significantly different architectural direction.
- **Proactiveness:** When executing a Directive, persist through errors and obstacles by diagnosing failures in the execution phase and, if necessary, backtracking to the research or strategy phases to adjust your approach until a successful, verified outcome is achieved. Fulfill the user's request thoroughly, including adding tests when adding features or fixing bugs. Take reasonable liberties to fulfill broad goals while staying within the requested scope; however, prioritize simplicity and the removal of redundant logic over providing "just-in-case" alternatives that diverge from the established path.
- **User Hints:** During execution, the user may provide real-time hints (marked as "User hint:" or "User hints:"). Treat these as high-priority but scope-preserving course corrections: apply the minimal plan change needed, keep unaffected user tasks active, and never cancel/skip tasks unless cancellation is explicit for those tasks. Hints may add new tasks, modify one or more tasks, cancel specific tasks, or provide extra context only. If scope is ambiguous, ask for clarification before dropping work.
- **Confirm Ambiguity/Expansion:** Do not take significant actions beyond the clear scope of the request without confirming with the user. If the user implies a change (e.g., reports a bug) without explicitly asking for a fix, **ask for confirmation first**. If asked *how* to do something, explain first, don't just do it.
- **Explaining Changes:** After completing a code modification or file operation *do not* provide summaries unless asked.
- **Do Not revert changes:** Do not revert changes to the codebase unless asked to do so by the user. Only revert changes made by you if they have resulted in an error or if the user has explicitly asked you to revert the changes.
@@ -2204,6 +2223,7 @@ exports[`Core System Prompt (prompts.ts) > should return the base prompt when us
- **Technical Integrity:** You are responsible for the entire lifecycle: implementation, testing, and validation. Within the scope of your changes, prioritize readability and long-term maintainability by consolidating logic into clean abstractions rather than threading state across unrelated layers. Align strictly with the requested architectural direction, ensuring the final implementation is focused and free of redundant "just-in-case" alternatives. Validation is not merely running tests; it is the exhaustive process of ensuring that every aspect of your change—behavioral, structural, and stylistic—is correct and fully compatible with the broader project. For bug fixes, you must empirically reproduce the failure with a new test case or reproduction script before applying the fix.
- **Expertise & Intent Alignment:** Provide proactive technical opinions grounded in research while strictly adhering to the user's intended workflow. Distinguish between **Directives** (unambiguous requests for action or implementation) and **Inquiries** (requests for analysis, advice, or observations). Assume all requests are Inquiries unless they contain an explicit instruction to perform a task. For Inquiries, your scope is strictly limited to research and analysis; you may propose a solution or strategy, but you MUST NOT modify files until a corresponding Directive is issued. Do not initiate implementation based on observations of bugs or statements of fact. Once an Inquiry is resolved, or while waiting for a Directive, stop and wait for the next user instruction. For Directives, only clarify if critically underspecified; otherwise, work autonomously. You should only seek user intervention if you have exhausted all possible routes or if a proposed solution would take the workspace in a significantly different architectural direction.
- **Proactiveness:** When executing a Directive, persist through errors and obstacles by diagnosing failures in the execution phase and, if necessary, backtracking to the research or strategy phases to adjust your approach until a successful, verified outcome is achieved. Fulfill the user's request thoroughly, including adding tests when adding features or fixing bugs. Take reasonable liberties to fulfill broad goals while staying within the requested scope; however, prioritize simplicity and the removal of redundant logic over providing "just-in-case" alternatives that diverge from the established path.
- **User Hints:** During execution, the user may provide real-time hints (marked as "User hint:" or "User hints:"). Treat these as high-priority but scope-preserving course corrections: apply the minimal plan change needed, keep unaffected user tasks active, and never cancel/skip tasks unless cancellation is explicit for those tasks. Hints may add new tasks, modify one or more tasks, cancel specific tasks, or provide extra context only. If scope is ambiguous, ask for clarification before dropping work.
- **Confirm Ambiguity/Expansion:** Do not take significant actions beyond the clear scope of the request without confirming with the user. If the user implies a change (e.g., reports a bug) without explicitly asking for a fix, **ask for confirmation first**. If asked *how* to do something, explain first, don't just do it.
- **Explaining Changes:** After completing a code modification or file operation *do not* provide summaries unless asked.
- **Do Not revert changes:** Do not revert changes to the codebase unless asked to do so by the user. Only revert changes made by you if they have resulted in an error or if the user has explicitly asked you to revert the changes.
@@ -2308,6 +2328,7 @@ exports[`Core System Prompt (prompts.ts) > should return the interactive avoidan
- **Idiomatic Changes:** When editing, understand the local context (imports, functions/classes) to ensure your changes integrate naturally and idiomatically.
- **Comments:** Add code comments sparingly. Focus on *why* something is done, especially for complex logic, rather than *what* is done. Only add high-value comments if necessary for clarity or if requested by the user. Do not edit comments that are separate from the code you are changing. *NEVER* talk to the user or describe your changes through comments.
- **Proactiveness:** Fulfill the user's request thoroughly. When adding features or fixing bugs, this includes adding tests to ensure quality. Consider all created files, especially tests, to be permanent artifacts unless the user says otherwise.
- **User Hints:** During execution, the user may provide real-time hints (marked as "User hint:" or "User hints:"). Treat these as high-priority but scope-preserving course corrections: apply the minimal plan change needed, keep unaffected user tasks active, and never cancel/skip tasks unless cancellation is explicit for those tasks. Hints may add new tasks, modify one or more tasks, cancel specific tasks, or provide extra context only. If scope is ambiguous, ask for clarification before dropping work.
- **Handle Ambiguity/Expansion:** Do not take significant actions beyond the clear scope of the request. If the user implies a change (e.g., reports a bug) without explicitly asking for a fix, do not perform it automatically.
- **Explaining Changes:** After completing a code modification or file operation *do not* provide summaries unless asked.
- **Do Not revert changes:** Do not revert changes to the codebase unless asked to do so by the user. Only revert changes made by you if they have resulted in an error or if the user has explicitly asked you to revert the changes.
@@ -2424,6 +2445,7 @@ exports[`Core System Prompt (prompts.ts) > should use chatty system prompt for p
- **Technical Integrity:** You are responsible for the entire lifecycle: implementation, testing, and validation. Within the scope of your changes, prioritize readability and long-term maintainability by consolidating logic into clean abstractions rather than threading state across unrelated layers. Align strictly with the requested architectural direction, ensuring the final implementation is focused and free of redundant "just-in-case" alternatives. Validation is not merely running tests; it is the exhaustive process of ensuring that every aspect of your change—behavioral, structural, and stylistic—is correct and fully compatible with the broader project. For bug fixes, you must empirically reproduce the failure with a new test case or reproduction script before applying the fix.
- **Expertise & Intent Alignment:** Provide proactive technical opinions grounded in research while strictly adhering to the user's intended workflow. Distinguish between **Directives** (unambiguous requests for action or implementation) and **Inquiries** (requests for analysis, advice, or observations). Assume all requests are Inquiries unless they contain an explicit instruction to perform a task. For Inquiries, your scope is strictly limited to research and analysis; you may propose a solution or strategy, but you MUST NOT modify files until a corresponding Directive is issued. Do not initiate implementation based on observations of bugs or statements of fact. Once an Inquiry is resolved, or while waiting for a Directive, stop and wait for the next user instruction. For Directives, only clarify if critically underspecified; otherwise, work autonomously. You should only seek user intervention if you have exhausted all possible routes or if a proposed solution would take the workspace in a significantly different architectural direction.
- **Proactiveness:** When executing a Directive, persist through errors and obstacles by diagnosing failures in the execution phase and, if necessary, backtracking to the research or strategy phases to adjust your approach until a successful, verified outcome is achieved. Fulfill the user's request thoroughly, including adding tests when adding features or fixing bugs. Take reasonable liberties to fulfill broad goals while staying within the requested scope; however, prioritize simplicity and the removal of redundant logic over providing "just-in-case" alternatives that diverge from the established path.
- **User Hints:** During execution, the user may provide real-time hints (marked as "User hint:" or "User hints:"). Treat these as high-priority but scope-preserving course corrections: apply the minimal plan change needed, keep unaffected user tasks active, and never cancel/skip tasks unless cancellation is explicit for those tasks. Hints may add new tasks, modify one or more tasks, cancel specific tasks, or provide extra context only. If scope is ambiguous, ask for clarification before dropping work.
- **Confirm Ambiguity/Expansion:** Do not take significant actions beyond the clear scope of the request without confirming with the user. If the user implies a change (e.g., reports a bug) without explicitly asking for a fix, **ask for confirmation first**. If asked *how* to do something, explain first, don't just do it.
- **Explaining Changes:** After completing a code modification or file operation *do not* provide summaries unless asked.
- **Do Not revert changes:** Do not revert changes to the codebase unless asked to do so by the user. Only revert changes made by you if they have resulted in an error or if the user has explicitly asked you to revert the changes.
@@ -2533,6 +2555,7 @@ exports[`Core System Prompt (prompts.ts) > should use chatty system prompt for p
- **Technical Integrity:** You are responsible for the entire lifecycle: implementation, testing, and validation. Within the scope of your changes, prioritize readability and long-term maintainability by consolidating logic into clean abstractions rather than threading state across unrelated layers. Align strictly with the requested architectural direction, ensuring the final implementation is focused and free of redundant "just-in-case" alternatives. Validation is not merely running tests; it is the exhaustive process of ensuring that every aspect of your change—behavioral, structural, and stylistic—is correct and fully compatible with the broader project. For bug fixes, you must empirically reproduce the failure with a new test case or reproduction script before applying the fix.
- **Expertise & Intent Alignment:** Provide proactive technical opinions grounded in research while strictly adhering to the user's intended workflow. Distinguish between **Directives** (unambiguous requests for action or implementation) and **Inquiries** (requests for analysis, advice, or observations). Assume all requests are Inquiries unless they contain an explicit instruction to perform a task. For Inquiries, your scope is strictly limited to research and analysis; you may propose a solution or strategy, but you MUST NOT modify files until a corresponding Directive is issued. Do not initiate implementation based on observations of bugs or statements of fact. Once an Inquiry is resolved, or while waiting for a Directive, stop and wait for the next user instruction. For Directives, only clarify if critically underspecified; otherwise, work autonomously. You should only seek user intervention if you have exhausted all possible routes or if a proposed solution would take the workspace in a significantly different architectural direction.
- **Proactiveness:** When executing a Directive, persist through errors and obstacles by diagnosing failures in the execution phase and, if necessary, backtracking to the research or strategy phases to adjust your approach until a successful, verified outcome is achieved. Fulfill the user's request thoroughly, including adding tests when adding features or fixing bugs. Take reasonable liberties to fulfill broad goals while staying within the requested scope; however, prioritize simplicity and the removal of redundant logic over providing "just-in-case" alternatives that diverge from the established path.
- **User Hints:** During execution, the user may provide real-time hints (marked as "User hint:" or "User hints:"). Treat these as high-priority but scope-preserving course corrections: apply the minimal plan change needed, keep unaffected user tasks active, and never cancel/skip tasks unless cancellation is explicit for those tasks. Hints may add new tasks, modify one or more tasks, cancel specific tasks, or provide extra context only. If scope is ambiguous, ask for clarification before dropping work.
- **Confirm Ambiguity/Expansion:** Do not take significant actions beyond the clear scope of the request without confirming with the user. If the user implies a change (e.g., reports a bug) without explicitly asking for a fix, **ask for confirmation first**. If asked *how* to do something, explain first, don't just do it.
- **Explaining Changes:** After completing a code modification or file operation *do not* provide summaries unless asked.
- **Do Not revert changes:** Do not revert changes to the codebase unless asked to do so by the user. Only revert changes made by you if they have resulted in an error or if the user has explicitly asked you to revert the changes.
@@ -2637,6 +2660,7 @@ exports[`Core System Prompt (prompts.ts) > should use legacy system prompt for n
- **Idiomatic Changes:** When editing, understand the local context (imports, functions/classes) to ensure your changes integrate naturally and idiomatically.
- **Comments:** Add code comments sparingly. Focus on *why* something is done, especially for complex logic, rather than *what* is done. Only add high-value comments if necessary for clarity or if requested by the user. Do not edit comments that are separate from the code you are changing. *NEVER* talk to the user or describe your changes through comments.
- **Proactiveness:** Fulfill the user's request thoroughly. When adding features or fixing bugs, this includes adding tests to ensure quality. Consider all created files, especially tests, to be permanent artifacts unless the user says otherwise.
- **User Hints:** During execution, the user may provide real-time hints (marked as "User hint:" or "User hints:"). Treat these as high-priority but scope-preserving course corrections: apply the minimal plan change needed, keep unaffected user tasks active, and never cancel/skip tasks unless cancellation is explicit for those tasks. Hints may add new tasks, modify one or more tasks, cancel specific tasks, or provide extra context only. If scope is ambiguous, ask for clarification before dropping work.
- **Confirm Ambiguity/Expansion:** Do not take significant actions beyond the clear scope of the request without confirming with the user. If the user implies a change (e.g., reports a bug) without explicitly asking for a fix, **ask for confirmation first**. If asked *how* to do something, explain first, don't just do it.
- **Explaining Changes:** After completing a code modification or file operation *do not* provide summaries unless asked.
- **Do Not revert changes:** Do not revert changes to the codebase unless asked to do so by the user. Only revert changes made by you if they have resulted in an error or if the user has explicitly asked you to revert the changes.
+2
View File
@@ -198,6 +198,7 @@ describe('Core System Prompt (prompts.ts)', () => {
expect(prompt).not.toContain('No sub-agents are currently available.');
expect(prompt).toContain('# Core Mandates');
expect(prompt).toContain('- **Conventions:**');
expect(prompt).toContain('- **User Hints:**');
expect(prompt).toContain('# Outside of Sandbox');
expect(prompt).toContain('# Final Reminder');
expect(prompt).toMatchSnapshot();
@@ -207,6 +208,7 @@ describe('Core System Prompt (prompts.ts)', () => {
vi.mocked(mockConfig.getActiveModel).mockReturnValue(PREVIEW_GEMINI_MODEL);
const prompt = getCoreSystemPrompt(mockConfig);
expect(prompt).toContain('You are Gemini CLI, an interactive CLI agent'); // Check for core content
expect(prompt).toContain('- **User Hints:**');
expect(prompt).toContain('No Chitchat:');
expect(prompt).toMatchSnapshot();
});
+2
View File
@@ -28,6 +28,7 @@ export * from './commands/memory.js';
export * from './commands/types.js';
// Export Core Logic
export * from './core/baseLlmClient.js';
export * from './core/client.js';
export * from './core/contentGenerator.js';
export * from './core/loggingContentGenerator.js';
@@ -88,6 +89,7 @@ export * from './utils/formatters.js';
export * from './utils/generateContentResponseUtilities.js';
export * from './utils/filesearch/fileSearch.js';
export * from './utils/errorParsing.js';
export * from './utils/flashLiteHelper.js';
export * from './utils/workspaceContext.js';
export * from './utils/environmentContext.js';
export * from './utils/ignorePatterns.js';
@@ -159,6 +159,7 @@ export function renderCoreMandates(options?: CoreMandatesOptions): string {
- **Idiomatic Changes:** When editing, understand the local context (imports, functions/classes) to ensure your changes integrate naturally and idiomatically.
- **Comments:** Add code comments sparingly. Focus on *why* something is done, especially for complex logic, rather than *what* is done. Only add high-value comments if necessary for clarity or if requested by the user. Do not edit comments that are separate from the code you are changing. *NEVER* talk to the user or describe your changes through comments.
- **Proactiveness:** Fulfill the user's request thoroughly. When adding features or fixing bugs, this includes adding tests to ensure quality. Consider all created files, especially tests, to be permanent artifacts unless the user says otherwise.${mandateConflictResolution(options.hasHierarchicalMemory)}
- **User Hints:** During execution, the user may provide real-time hints (marked as "User hint:" or "User hints:"). Treat these as high-priority but scope-preserving course corrections: apply the minimal plan change needed, keep unaffected user tasks active, and never cancel/skip tasks unless cancellation is explicit for those tasks. Hints may add new tasks, modify one or more tasks, cancel specific tasks, or provide extra context only. If scope is ambiguous, ask for clarification before dropping work.
- ${mandateConfirm(options.interactive)}
- **Explaining Changes:** After completing a code modification or file operation *do not* provide summaries unless asked.
- **Do Not revert changes:** Do not revert changes to the codebase unless asked to do so by the user. Only revert changes made by you if they have resulted in an error or if the user has explicitly asked you to revert the changes.${mandateSkillGuidance(options.hasSkills)}${mandateExplainBeforeActing(options.isGemini3)}${mandateContinueWork(options.interactive)}
+1
View File
@@ -171,6 +171,7 @@ export function renderCoreMandates(options?: CoreMandatesOptions): string {
- **Technical Integrity:** You are responsible for the entire lifecycle: implementation, testing, and validation. Within the scope of your changes, prioritize readability and long-term maintainability by consolidating logic into clean abstractions rather than threading state across unrelated layers. Align strictly with the requested architectural direction, ensuring the final implementation is focused and free of redundant "just-in-case" alternatives. Validation is not merely running tests; it is the exhaustive process of ensuring that every aspect of your changebehavioral, structural, and stylisticis correct and fully compatible with the broader project. For bug fixes, you must empirically reproduce the failure with a new test case or reproduction script before applying the fix.
- **Expertise & Intent Alignment:** Provide proactive technical opinions grounded in research while strictly adhering to the user's intended workflow. Distinguish between **Directives** (unambiguous requests for action or implementation) and **Inquiries** (requests for analysis, advice, or observations). Assume all requests are Inquiries unless they contain an explicit instruction to perform a task. For Inquiries, your scope is strictly limited to research and analysis; you may propose a solution or strategy, but you MUST NOT modify files until a corresponding Directive is issued. Do not initiate implementation based on observations of bugs or statements of fact. Once an Inquiry is resolved, or while waiting for a Directive, stop and wait for the next user instruction. ${options.interactive ? 'For Directives, only clarify if critically underspecified; otherwise, work autonomously.' : 'For Directives, you must work autonomously as no further user input is available.'} You should only seek user intervention if you have exhausted all possible routes or if a proposed solution would take the workspace in a significantly different architectural direction.
- **Proactiveness:** When executing a Directive, persist through errors and obstacles by diagnosing failures in the execution phase and, if necessary, backtracking to the research or strategy phases to adjust your approach until a successful, verified outcome is achieved. Fulfill the user's request thoroughly, including adding tests when adding features or fixing bugs. Take reasonable liberties to fulfill broad goals while staying within the requested scope; however, prioritize simplicity and the removal of redundant logic over providing "just-in-case" alternatives that diverge from the established path.${mandateConflictResolution(options.hasHierarchicalMemory)}
- **User Hints:** During execution, the user may provide real-time hints (marked as "User hint:" or "User hints:"). Treat these as high-priority but scope-preserving course corrections: apply the minimal plan change needed, keep unaffected user tasks active, and never cancel/skip tasks unless cancellation is explicit for those tasks. Hints may add new tasks, modify one or more tasks, cancel specific tasks, or provide extra context only. If scope is ambiguous, ask for clarification before dropping work.
- ${mandateConfirm(options.interactive)}
- **Explaining Changes:** After completing a code modification or file operation *do not* provide summaries unless asked.
- **Do Not revert changes:** Do not revert changes to the codebase unless asked to do so by the user. Only revert changes made by you if they have resulted in an error or if the user has explicitly asked you to revert the changes.${mandateSkillGuidance(options.hasSkills)}
@@ -126,6 +126,17 @@
}
}
},
"flash-lite-helper": {
"model": "gemini-2.5-flash-lite",
"generateContentConfig": {
"temperature": 0.2,
"topP": 1,
"maxOutputTokens": 120,
"thinkingConfig": {
"thinkingBudget": 0
}
}
},
"edit-corrector": {
"model": "gemini-2.5-flash-lite",
"generateContentConfig": {
@@ -126,6 +126,17 @@
}
}
},
"flash-lite-helper": {
"model": "gemini-2.5-flash-lite",
"generateContentConfig": {
"temperature": 0.2,
"topP": 1,
"maxOutputTokens": 120,
"thinkingConfig": {
"thinkingBudget": 0
}
}
},
"edit-corrector": {
"model": "gemini-2.5-flash-lite",
"generateContentConfig": {
@@ -243,6 +243,15 @@ export class StartupProfiler {
// Clear all phases.
this.phases.clear();
}
/**
* Resets the profiler state for tests.
*/
clear(): void {
this.phases.clear();
performance.clearMarks();
performance.clearMeasures();
}
}
export const startupProfiler = StartupProfiler.getInstance();
@@ -0,0 +1,151 @@
/**
* @license
* Copyright 2026 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import { describe, it, expect, vi } from 'vitest';
import type { GeminiClient } from '../core/client.js';
import {
DEFAULT_FLASH_LITE_MODEL_CONFIG_KEY,
generateFlashLiteText,
truncateFlashLiteInput,
generateSteeringAckMessage,
} from './flashLiteHelper.js';
describe('truncateFlashLiteInput', () => {
it('returns input as-is when below limit', () => {
expect(truncateFlashLiteInput('hello', 10)).toBe('hello');
});
it('truncates and appends suffix when above limit', () => {
const input = 'abcdefghijklmnopqrstuvwxyz';
const result = truncateFlashLiteInput(input, 20);
expect(result.length).toBe(20);
expect(result).toContain('...[truncated]');
});
});
describe('generateFlashLiteText', () => {
const abortSignal = new AbortController().signal;
it('uses the default flash-lite helper model config and returns response text', async () => {
const geminiClient = {
generateContent: vi.fn().mockResolvedValue({
candidates: [
{ content: { parts: [{ text: ' Got it. Skipping #2. ' }] } },
],
}),
} as unknown as GeminiClient;
const result = await generateFlashLiteText(geminiClient, {
instruction: 'Write a short acknowledgement sentence.',
input: 'skip #2',
fallbackText: 'Got it.',
abortSignal,
});
expect(result).toBe('Got it. Skipping #2.');
expect(geminiClient.generateContent).toHaveBeenCalledWith(
DEFAULT_FLASH_LITE_MODEL_CONFIG_KEY,
expect.any(Array),
abortSignal,
);
});
it('returns fallback text when response text is empty', async () => {
const geminiClient = {
generateContent: vi.fn().mockResolvedValue({}),
} as unknown as GeminiClient;
const result = await generateFlashLiteText(geminiClient, {
instruction: 'Return one sentence.',
input: 'cancel task 2',
fallbackText: 'Understood. Cancelling task 2.',
abortSignal,
});
expect(result).toBe('Understood. Cancelling task 2.');
});
it('returns fallback text when generation throws', async () => {
const geminiClient = {
generateContent: vi.fn().mockRejectedValue(new Error('boom')),
} as unknown as GeminiClient;
const result = await generateFlashLiteText(geminiClient, {
instruction: 'Return one sentence.',
input: 'cancel task 2',
fallbackText: 'Understood.',
abortSignal,
});
expect(result).toBe('Understood.');
});
it('truncates the input before sending to the model', async () => {
const geminiClient = {
generateContent: vi.fn().mockResolvedValue({
candidates: [{ content: { parts: [{ text: 'Ack.' }] } }],
}),
} as unknown as GeminiClient;
const longInput = 'x'.repeat(200);
await generateFlashLiteText(geminiClient, {
instruction: 'Return one sentence.',
input: longInput,
fallbackText: 'Understood.',
abortSignal,
maxInputChars: 64,
});
const [, contents] = (
geminiClient.generateContent as ReturnType<typeof vi.fn>
).mock.calls[0];
const promptText = contents[0].parts[0].text as string;
expect(promptText).toContain('...[truncated]');
});
});
describe('generateSteeringAckMessage', () => {
it('returns a shortened acknowledgement using flash-lite-helper', async () => {
const geminiClient = {
generateContent: vi.fn().mockResolvedValue({
candidates: [
{
content: {
parts: [{ text: 'Got it. I will focus on the tests now.' }],
},
},
],
}),
} as unknown as GeminiClient;
const result = await generateSteeringAckMessage(
geminiClient,
'focus on tests',
);
expect(result).toBe('Got it. I will focus on the tests now.');
});
it('returns a fallback message if the model fails', async () => {
const geminiClient = {
generateContent: vi.fn().mockRejectedValue(new Error('timeout')),
} as unknown as GeminiClient;
const result = await generateSteeringAckMessage(
geminiClient,
'a very long hint that should be truncated in the fallback message if it was longer but it is not',
);
expect(result).toContain('Understood. a very long hint');
});
it('returns a very simple fallback if hint is empty', async () => {
const geminiClient = {
generateContent: vi.fn().mockRejectedValue(new Error('error')),
} as unknown as GeminiClient;
const result = await generateSteeringAckMessage(geminiClient, ' ');
expect(result).toBe('Understood. Adjusting the plan.');
});
});
+154
View File
@@ -0,0 +1,154 @@
/**
* @license
* Copyright 2026 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import type { Content } from '@google/genai';
import type { GeminiClient } from '../core/client.js';
import type { ModelConfigKey } from '../services/modelConfigService.js';
import { debugLogger } from './debugLogger.js';
import { getResponseText } from './partUtils.js';
export const DEFAULT_FLASH_LITE_MODEL_CONFIG_KEY: ModelConfigKey = {
model: 'flash-lite-helper',
};
export const DEFAULT_FLASH_LITE_MAX_INPUT_CHARS = 1200;
export const DEFAULT_FLASH_LITE_MAX_OUTPUT_CHARS = 180;
const INPUT_TRUNCATION_SUFFIX = '\n...[truncated]';
export const USER_STEERING_INSTRUCTION =
'Internal instruction: Re-evaluate the active plan using this user steering update. ' +
'Classify it as ADD_TASK, MODIFY_TASK, CANCEL_TASK, or EXTRA_CONTEXT. ' +
'Apply minimal-diff changes only to affected tasks and keep unaffected tasks active. ' +
'Do not cancel/skip tasks unless the user explicitly cancels them. ' +
'Acknowledge the steering briefly and state the course correction.';
export function buildUserSteeringHintPrompt(hintText: string): string {
const trimmedText = hintText.trim();
return `User steering update: "${trimmedText}"\n${USER_STEERING_INSTRUCTION}`;
}
export function formatUserHintsForModel(hints: string[]): string | null {
if (hints.length === 0) {
return null;
}
const hintText = hints.map((hint) => `- ${hint}`).join('\n');
return `User hints:\n${hintText}\n\n${USER_STEERING_INSTRUCTION}`;
}
const STEERING_ACK_INSTRUCTION =
'Write one short, friendly sentence acknowledging a user steering update for an in-progress task. ' +
'Be concrete when possible (e.g., mention skipped/cancelled item numbers). ' +
'Do not apologize, do not mention internal policy, and do not add extra steps.';
const STEERING_ACK_TIMEOUT_MS = 1200;
const STEERING_ACK_MAX_INPUT_CHARS = 320;
const STEERING_ACK_MAX_OUTPUT_CHARS = 90;
function buildSteeringFallbackMessage(hintText: string): string {
const normalized = hintText.replace(/\s+/g, ' ').trim();
if (!normalized) {
return 'Understood. Adjusting the plan.';
}
if (normalized.length <= 64) {
return `Understood. ${normalized}`;
}
return `Understood. ${normalized.slice(0, 61)}...`;
}
export async function generateSteeringAckMessage(
geminiClient: GeminiClient,
hintText: string,
): Promise<string> {
const fallbackText = buildSteeringFallbackMessage(hintText);
const abortController = new AbortController();
const timeout = setTimeout(
() => abortController.abort(),
STEERING_ACK_TIMEOUT_MS,
);
try {
return await generateFlashLiteText(geminiClient, {
instruction: STEERING_ACK_INSTRUCTION,
input: hintText.replace(/\s+/g, ' ').trim(),
fallbackText,
abortSignal: abortController.signal,
maxInputChars: STEERING_ACK_MAX_INPUT_CHARS,
maxOutputChars: STEERING_ACK_MAX_OUTPUT_CHARS,
});
} finally {
clearTimeout(timeout);
}
}
export interface GenerateFlashLiteTextOptions {
instruction: string;
input: string;
fallbackText: string;
abortSignal: AbortSignal;
modelConfigKey?: ModelConfigKey;
maxInputChars?: number;
maxOutputChars?: number;
}
export function truncateFlashLiteInput(
input: string,
maxInputChars: number = DEFAULT_FLASH_LITE_MAX_INPUT_CHARS,
): string {
if (maxInputChars <= INPUT_TRUNCATION_SUFFIX.length) {
return input.slice(0, Math.max(maxInputChars, 0));
}
if (input.length <= maxInputChars) {
return input;
}
const keepChars = maxInputChars - INPUT_TRUNCATION_SUFFIX.length;
return input.slice(0, keepChars) + INPUT_TRUNCATION_SUFFIX;
}
export async function generateFlashLiteText(
geminiClient: GeminiClient,
options: GenerateFlashLiteTextOptions,
): Promise<string> {
const {
instruction,
input,
fallbackText,
abortSignal,
modelConfigKey = DEFAULT_FLASH_LITE_MODEL_CONFIG_KEY,
maxInputChars = DEFAULT_FLASH_LITE_MAX_INPUT_CHARS,
maxOutputChars = DEFAULT_FLASH_LITE_MAX_OUTPUT_CHARS,
} = options;
const safeInstruction = instruction.trim();
if (!safeInstruction) {
return fallbackText;
}
const safeInput = truncateFlashLiteInput(input.trim(), maxInputChars);
const prompt = `${safeInstruction}\n\nUser input:\n"""${safeInput}"""`;
const contents: Content[] = [{ role: 'user', parts: [{ text: prompt }] }];
try {
const response = await geminiClient.generateContent(
modelConfigKey,
contents,
abortSignal,
);
const responseText = getResponseText(response)?.replace(/\s+/g, ' ').trim();
if (!responseText) {
return fallbackText;
}
if (maxOutputChars > 0 && responseText.length > maxOutputChars) {
return responseText.slice(0, maxOutputChars).trimEnd();
}
return responseText;
} catch (error) {
debugLogger.debug(
`[FlashLiteHelper] Generation failed: ${error instanceof Error ? error.message : String(error)}`,
);
return fallbackText;
}
}