From 611d934829647d38e158e7831a4278967c045952 Mon Sep 17 00:00:00 2001 From: joshualitt Date: Thu, 26 Feb 2026 08:38:49 -0800 Subject: [PATCH 01/41] feat(core): Enable generalist agent (#19665) --- evals/generalist_delegation.eval.ts | 165 +++++++++++++++ packages/cli/src/test-utils/AppRig.tsx | 178 +++++++++++++++- packages/core/src/agents/generalist-agent.ts | 3 +- packages/core/src/agents/registry.test.ts | 6 +- .../core/__snapshots__/prompts.test.ts.snap | 192 ++++++++++++++++++ packages/core/src/prompts/snippets.ts | 12 ++ 6 files changed, 547 insertions(+), 9 deletions(-) create mode 100644 evals/generalist_delegation.eval.ts diff --git a/evals/generalist_delegation.eval.ts b/evals/generalist_delegation.eval.ts new file mode 100644 index 0000000000..7e6358ae1f --- /dev/null +++ b/evals/generalist_delegation.eval.ts @@ -0,0 +1,165 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { describe, expect } from 'vitest'; +import { appEvalTest } from './app-test-helper.js'; + +describe('generalist_delegation', () => { + // --- Positive Evals (Should Delegate) --- + + appEvalTest('USUALLY_PASSES', { + name: 'should delegate batch error fixing to generalist agent', + configOverrides: { + agents: { + overrides: { + generalist: { enabled: true }, + }, + }, + experimental: { + enableAgents: true, + }, + excludeTools: ['run_shell_command'], + }, + files: { + 'file1.ts': 'console.log("no semi")', + 'file2.ts': 'console.log("no semi")', + 'file3.ts': 'console.log("no semi")', + 'file4.ts': 'console.log("no semi")', + 'file5.ts': 'console.log("no semi")', + 'file6.ts': 'console.log("no semi")', + 'file7.ts': 'console.log("no semi")', + 'file8.ts': 'console.log("no semi")', + 'file9.ts': 'console.log("no semi")', + 'file10.ts': 'console.log("no semi")', + }, + prompt: + 'I have 10 files (file1.ts to file10.ts) that are missing semicolons. Can you fix them?', + setup: async (rig) => { + rig.setBreakpoint(['generalist']); + }, + assert: async (rig) => { + const confirmation = await rig.waitForPendingConfirmation( + 'generalist', + 60000, + ); + expect( + confirmation, + 'Expected a tool call for generalist agent', + ).toBeTruthy(); + await rig.resolveTool(confirmation); + await rig.waitForIdle(60000); + }, + }); + + appEvalTest('USUALLY_PASSES', { + name: 'should autonomously delegate complex batch task to generalist agent', + configOverrides: { + agents: { + overrides: { + generalist: { enabled: true }, + }, + }, + experimental: { + enableAgents: true, + }, + excludeTools: ['run_shell_command'], + }, + files: { + 'src/a.ts': 'export const a = 1;', + 'src/b.ts': 'export const b = 2;', + 'src/c.ts': 'export const c = 3;', + 'src/d.ts': 'export const d = 4;', + 'src/e.ts': 'export const e = 5;', + }, + prompt: + 'Please update all files in the src directory. For each file, add a comment at the top that says "Processed by Gemini".', + setup: async (rig) => { + rig.setBreakpoint(['generalist']); + }, + assert: async (rig) => { + const confirmation = await rig.waitForPendingConfirmation( + 'generalist', + 60000, + ); + expect( + confirmation, + 'Expected autonomously delegate to generalist for batch task', + ).toBeTruthy(); + await rig.resolveTool(confirmation); + await rig.waitForIdle(60000); + }, + }); + + // --- Negative Evals (Should NOT Delegate - Assertive Handling) --- + + appEvalTest('USUALLY_PASSES', { + name: 'should NOT delegate simple read and fix to generalist agent', + configOverrides: { + agents: { + overrides: { + generalist: { enabled: true }, + }, + }, + experimental: { + enableAgents: true, + }, + excludeTools: ['run_shell_command'], + }, + files: { + 'README.md': 'This is a proyect.', + }, + prompt: + 'There is a typo in README.md ("proyect"). Please fix it to "project".', + setup: async (rig) => { + // Break on everything to see what it calls + rig.setBreakpoint(['*']); + }, + assert: async (rig) => { + await rig.drainBreakpointsUntilIdle((confirmation) => { + expect( + confirmation.toolName, + `Agent should NOT have delegated to generalist.`, + ).not.toBe('generalist'); + }); + + const output = rig.getStaticOutput(); + expect(output).toMatch(/project/i); + }, + }); + + appEvalTest('USUALLY_PASSES', { + name: 'should NOT delegate simple direct question to generalist agent', + configOverrides: { + agents: { + overrides: { + generalist: { enabled: true }, + }, + }, + experimental: { + enableAgents: true, + }, + excludeTools: ['run_shell_command'], + }, + files: { + 'src/VERSION': '1.2.3', + }, + prompt: 'Can you tell me the version number in the src folder?', + setup: async (rig) => { + rig.setBreakpoint(['*']); + }, + assert: async (rig) => { + await rig.drainBreakpointsUntilIdle((confirmation) => { + expect( + confirmation.toolName, + `Agent should NOT have delegated to generalist.`, + ).not.toBe('generalist'); + }); + + const output = rig.getStaticOutput(); + expect(output).toMatch(/1\.2\.3/); + }, + }); +}); diff --git a/packages/cli/src/test-utils/AppRig.tsx b/packages/cli/src/test-utils/AppRig.tsx index 018ce1502b..d953be0ff6 100644 --- a/packages/cli/src/test-utils/AppRig.tsx +++ b/packages/cli/src/test-utils/AppRig.tsx @@ -29,6 +29,7 @@ import { createContentGenerator, IdeClient, debugLogger, + CoreToolCallStatus, } from '@google/gemini-cli-core'; import { type MockShellCommand, @@ -36,7 +37,47 @@ import { } from './MockShellExecutionService.js'; import { createMockSettings } from './settings.js'; import { type LoadedSettings } from '../config/settings.js'; -import { AuthState } from '../ui/types.js'; +import { AuthState, StreamingState } from '../ui/types.js'; +import { randomUUID } from 'node:crypto'; +import type { + TrackedCancelledToolCall, + TrackedCompletedToolCall, + TrackedToolCall, +} from '../ui/hooks/useToolScheduler.js'; + +// Global state observer for React-based signals +const sessionStateMap = new Map(); +const activeRigs = new Map(); + +// Mock StreamingContext to report state changes back to the observer +vi.mock('../ui/contexts/StreamingContext.js', async (importOriginal) => { + const original = + await importOriginal(); + const { useConfig } = await import('../ui/contexts/ConfigContext.js'); + const React = await import('react'); + + return { + ...original, + useStreamingContext: () => { + const state = original.useStreamingContext(); + const config = useConfig(); + const sessionId = config.getSessionId(); + + React.useEffect(() => { + sessionStateMap.set(sessionId, state); + // If we see activity, we are no longer "awaiting" the start of a response + if (state !== StreamingState.Idle) { + const rig = activeRigs.get(sessionId); + if (rig) { + rig.awaitingResponse = false; + } + } + }, [sessionId, state]); + + return state; + }, + }; +}); // Mock core functions globally for tests using AppRig. vi.mock('@google/gemini-cli-core', async (importOriginal) => { @@ -112,9 +153,18 @@ export class AppRig { private breakpointTools = new Set(); private lastAwaitedConfirmation: PendingConfirmation | undefined; + /** + * True if a message was just sent but React hasn't yet reported a non-idle state. + */ + awaitingResponse = false; + constructor(private options: AppRigOptions = {}) { - this.testDir = fs.mkdtempSync(path.join(os.tmpdir(), 'gemini-app-rig-')); - this.sessionId = `test-session-${Math.random().toString(36).slice(2, 9)}`; + const uniqueId = randomUUID(); + this.testDir = fs.mkdtempSync( + path.join(os.tmpdir(), `gemini-app-rig-${uniqueId.slice(0, 8)}-`), + ); + this.sessionId = `test-session-${uniqueId}`; + activeRigs.set(this.sessionId, this); } async initialize() { @@ -245,6 +295,8 @@ export class AppRig { }; } + private toolCalls: TrackedToolCall[] = []; + private setupMessageBusListeners() { if (!this.config) return; const messageBus = this.config.getMessageBus(); @@ -252,6 +304,7 @@ export class AppRig { messageBus.subscribe( MessageBusType.TOOL_CALLS_UPDATE, (message: ToolCallsUpdateMessage) => { + this.toolCalls = message.toolCalls; for (const call of message.toolCalls) { if (call.status === 'awaiting_approval' && call.correlationId) { const details = call.confirmationDetails; @@ -281,6 +334,48 @@ export class AppRig { ); } + /** + * Returns true if the agent is currently busy (responding or executing tools). + */ + isBusy(): boolean { + if (this.awaitingResponse) { + return true; + } + + const reactState = sessionStateMap.get(this.sessionId); + // If we have a React-based state, use it as the definitive signal. + // 'responding' and 'waiting-for-confirmation' both count as busy for the overall task. + if (reactState !== undefined) { + return reactState !== StreamingState.Idle; + } + + // Fallback to tool tracking if React hasn't reported yet + const isAnyToolActive = this.toolCalls.some((tc) => { + if ( + tc.status === CoreToolCallStatus.Executing || + tc.status === CoreToolCallStatus.Scheduled || + tc.status === CoreToolCallStatus.Validating + ) { + return true; + } + if ( + tc.status === CoreToolCallStatus.Success || + tc.status === CoreToolCallStatus.Error || + tc.status === CoreToolCallStatus.Cancelled + ) { + return !(tc as TrackedCompletedToolCall | TrackedCancelledToolCall) + .responseSubmittedToGemini; + } + return false; + }); + + const isAwaitingConfirmation = this.toolCalls.some( + (tc) => tc.status === CoreToolCallStatus.AwaitingApproval, + ); + + return isAnyToolActive || isAwaitingConfirmation; + } + render() { if (!this.config || !this.settings) throw new Error('AppRig not initialized'); @@ -334,17 +429,21 @@ export class AppRig { this.setBreakpoint(name); } } else { - this.setToolPolicy(toolName, PolicyDecision.ASK_USER, 100); + // Use undefined toolName to create a global rule if '*' is provided + const actualToolName = toolName === '*' ? undefined : toolName; + this.setToolPolicy(actualToolName, PolicyDecision.ASK_USER, 100); this.breakpointTools.add(toolName); } } removeToolPolicy(toolName?: string, source = 'AppRig Override') { if (!this.config) throw new Error('AppRig not initialized'); + // Map '*' back to undefined for policy removal + const actualToolName = toolName === '*' ? undefined : toolName; this.config .getPolicyEngine() // eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion - .removeRulesForTool(toolName as string, source); + .removeRulesForTool(actualToolName as string, source); this.breakpointTools.delete(toolName); } @@ -416,6 +515,44 @@ export class AppRig { return matched!; } + /** + * Waits for either a tool confirmation request OR for the agent to go idle. + */ + async waitForNextEvent( + timeout = 60000, + ): Promise< + | { type: 'confirmation'; confirmation: PendingConfirmation } + | { type: 'idle' } + > { + let confirmation: PendingConfirmation | undefined; + let isIdle = false; + + await this.waitUntil( + async () => { + await act(async () => { + await new Promise((resolve) => setTimeout(resolve, 0)); + }); + confirmation = this.getPendingConfirmations()[0]; + // Now that we have a code-powered signal, this should be perfectly deterministic. + isIdle = !this.isBusy(); + return !!confirmation || isIdle; + }, + { + timeout, + message: 'Timed out waiting for next event (confirmation or idle).', + }, + ); + + if (confirmation) { + this.lastAwaitedConfirmation = confirmation; + return { type: 'confirmation', confirmation }; + } + + // Ensure all renders are flushed before returning 'idle' + await this.renderResult?.waitUntilReady(); + return { type: 'idle' }; + } + async resolveTool( toolNameOrDisplayName: string | RegExp | PendingConfirmation, outcome: ToolConfirmationOutcome = ToolConfirmationOutcome.ProceedOnce, @@ -471,6 +608,32 @@ export class AppRig { }); } + /** + * Drains all pending tool calls that hit a breakpoint until the agent is idle. + * Useful for negative tests to ensure no unwanted tools (like generalist) are called. + * + * @param onConfirmation Optional callback to inspect each confirmation before resolving. + * Return true to skip the default resolveTool call (e.g. if you handled it). + */ + async drainBreakpointsUntilIdle( + onConfirmation?: (confirmation: PendingConfirmation) => void | boolean, + timeout = 60000, + ) { + while (true) { + const event = await this.waitForNextEvent(timeout); + if (event.type === 'idle') { + break; + } + + const confirmation = event.confirmation; + const handled = onConfirmation?.(confirmation); + + if (!handled) { + await this.resolveTool(confirmation); + } + } + } + getConfig(): Config { if (!this.config) throw new Error('AppRig not initialized'); return this.config; @@ -530,11 +693,16 @@ export class AppRig { } async sendMessage(text: string) { + this.awaitingResponse = true; await this.type(text); await this.pressEnter(); } async unmount() { + // Clean up global state for this session + sessionStateMap.delete(this.sessionId); + activeRigs.delete(this.sessionId); + // Poison the chat recording service to prevent late writes to the test directory if (this.config) { const recordingService = this.config diff --git a/packages/core/src/agents/generalist-agent.ts b/packages/core/src/agents/generalist-agent.ts index 4f9040a7b0..412880b089 100644 --- a/packages/core/src/agents/generalist-agent.ts +++ b/packages/core/src/agents/generalist-agent.ts @@ -24,8 +24,7 @@ export const GeneralistAgent = ( name: 'generalist', displayName: 'Generalist Agent', description: - "A general-purpose AI agent with access to all tools. Use it for complex tasks that don't fit into other specialized agents.", - experimental: true, + 'A general-purpose AI agent with access to all tools. Highly recommended for tasks that are turn-intensive or involve processing large amounts of data. Use this to keep the main session history lean and efficient. Excellent for: batch refactoring/error fixing across multiple files, running commands with high-volume output, and speculative investigations.', inputConfig: { inputSchema: { type: 'object', diff --git a/packages/core/src/agents/registry.test.ts b/packages/core/src/agents/registry.test.ts index 8cc45a9a5a..c5f2faa06f 100644 --- a/packages/core/src/agents/registry.test.ts +++ b/packages/core/src/agents/registry.test.ts @@ -50,6 +50,7 @@ function makeMockedConfig(params?: Partial): Config { } as unknown as ToolRegistry); vi.spyOn(config, 'getAgentRegistry').mockReturnValue({ getDirectoryContext: () => 'mock directory context', + getAllDefinitions: () => [], } as unknown as AgentRegistry); return config; } @@ -262,6 +263,7 @@ describe('AgentRegistry', () => { overrides: { codebase_investigator: { enabled: false }, cli_help: { enabled: false }, + generalist: { enabled: false }, }, }, }); @@ -299,13 +301,13 @@ describe('AgentRegistry', () => { expect(registry.getDefinition('cli_help')).toBeUndefined(); }); - it('should NOT register generalist agent by default (because it is experimental)', async () => { + it('should register generalist agent by default', async () => { const config = makeMockedConfig(); const registry = new TestableAgentRegistry(config); await registry.initialize(); - expect(registry.getDefinition('generalist')).toBeUndefined(); + expect(registry.getDefinition('generalist')).toBeDefined(); }); it('should register generalist agent if explicitly enabled via override', async () => { diff --git a/packages/core/src/core/__snapshots__/prompts.test.ts.snap b/packages/core/src/core/__snapshots__/prompts.test.ts.snap index e789efa5d6..f1979ef00b 100644 --- a/packages/core/src/core/__snapshots__/prompts.test.ts.snap +++ b/packages/core/src/core/__snapshots__/prompts.test.ts.snap @@ -57,6 +57,18 @@ Use the following guidelines to optimize your search and read patterns. Sub-agents are specialized expert agents. Each sub-agent is available as a tool of the same name. You MUST delegate tasks to the sub-agent with the most relevant expertise. +### Strategic Orchestration & Delegation +Operate as a **strategic orchestrator**. Your own context window is your most precious resource. Every turn you take adds to the permanent session history. To keep the session fast and efficient, use sub-agents to "compress" complex or repetitive work. + +When you delegate, the sub-agent's entire execution is consolidated into a single summary in your history, keeping your main loop lean. + +**High-Impact Delegation Candidates:** +- **Repetitive Batch Tasks:** Tasks involving more than 3 files or repeated steps (e.g., "Add license headers to all files in src/", "Fix all lint errors in the project"). +- **High-Volume Output:** Commands or tools expected to return large amounts of data (e.g., verbose builds, exhaustive file searches). +- **Speculative Research:** Investigations that require many "trial and error" steps before a clear path is found. + +**Assertive Action:** Continue to handle "surgical" tasks directly—simple reads, single-file edits, or direct questions that can be resolved in 1-2 turns. Delegation is an efficiency tool, not a way to avoid direct action when it is the fastest path. + mock-agent @@ -209,6 +221,18 @@ Use the following guidelines to optimize your search and read patterns. Sub-agents are specialized expert agents. Each sub-agent is available as a tool of the same name. You MUST delegate tasks to the sub-agent with the most relevant expertise. +### Strategic Orchestration & Delegation +Operate as a **strategic orchestrator**. Your own context window is your most precious resource. Every turn you take adds to the permanent session history. To keep the session fast and efficient, use sub-agents to "compress" complex or repetitive work. + +When you delegate, the sub-agent's entire execution is consolidated into a single summary in your history, keeping your main loop lean. + +**High-Impact Delegation Candidates:** +- **Repetitive Batch Tasks:** Tasks involving more than 3 files or repeated steps (e.g., "Add license headers to all files in src/", "Fix all lint errors in the project"). +- **High-Volume Output:** Commands or tools expected to return large amounts of data (e.g., verbose builds, exhaustive file searches). +- **Speculative Research:** Investigations that require many "trial and error" steps before a clear path is found. + +**Assertive Action:** Continue to handle "surgical" tasks directly—simple reads, single-file edits, or direct questions that can be resolved in 1-2 turns. Delegation is an efficiency tool, not a way to avoid direct action when it is the fastest path. + mock-agent @@ -480,6 +504,18 @@ Use the following guidelines to optimize your search and read patterns. Sub-agents are specialized expert agents. Each sub-agent is available as a tool of the same name. You MUST delegate tasks to the sub-agent with the most relevant expertise. +### Strategic Orchestration & Delegation +Operate as a **strategic orchestrator**. Your own context window is your most precious resource. Every turn you take adds to the permanent session history. To keep the session fast and efficient, use sub-agents to "compress" complex or repetitive work. + +When you delegate, the sub-agent's entire execution is consolidated into a single summary in your history, keeping your main loop lean. + +**High-Impact Delegation Candidates:** +- **Repetitive Batch Tasks:** Tasks involving more than 3 files or repeated steps (e.g., "Add license headers to all files in src/", "Fix all lint errors in the project"). +- **High-Volume Output:** Commands or tools expected to return large amounts of data (e.g., verbose builds, exhaustive file searches). +- **Speculative Research:** Investigations that require many "trial and error" steps before a clear path is found. + +**Assertive Action:** Continue to handle "surgical" tasks directly—simple reads, single-file edits, or direct questions that can be resolved in 1-2 turns. Delegation is an efficiency tool, not a way to avoid direct action when it is the fastest path. + mock-agent @@ -632,6 +668,18 @@ Use the following guidelines to optimize your search and read patterns. Sub-agents are specialized expert agents. Each sub-agent is available as a tool of the same name. You MUST delegate tasks to the sub-agent with the most relevant expertise. +### Strategic Orchestration & Delegation +Operate as a **strategic orchestrator**. Your own context window is your most precious resource. Every turn you take adds to the permanent session history. To keep the session fast and efficient, use sub-agents to "compress" complex or repetitive work. + +When you delegate, the sub-agent's entire execution is consolidated into a single summary in your history, keeping your main loop lean. + +**High-Impact Delegation Candidates:** +- **Repetitive Batch Tasks:** Tasks involving more than 3 files or repeated steps (e.g., "Add license headers to all files in src/", "Fix all lint errors in the project"). +- **High-Volume Output:** Commands or tools expected to return large amounts of data (e.g., verbose builds, exhaustive file searches). +- **Speculative Research:** Investigations that require many "trial and error" steps before a clear path is found. + +**Assertive Action:** Continue to handle "surgical" tasks directly—simple reads, single-file edits, or direct questions that can be resolved in 1-2 turns. Delegation is an efficiency tool, not a way to avoid direct action when it is the fastest path. + mock-agent @@ -1506,6 +1554,18 @@ Use the following guidelines to optimize your search and read patterns. Sub-agents are specialized expert agents. Each sub-agent is available as a tool of the same name. You MUST delegate tasks to the sub-agent with the most relevant expertise. +### Strategic Orchestration & Delegation +Operate as a **strategic orchestrator**. Your own context window is your most precious resource. Every turn you take adds to the permanent session history. To keep the session fast and efficient, use sub-agents to "compress" complex or repetitive work. + +When you delegate, the sub-agent's entire execution is consolidated into a single summary in your history, keeping your main loop lean. + +**High-Impact Delegation Candidates:** +- **Repetitive Batch Tasks:** Tasks involving more than 3 files or repeated steps (e.g., "Add license headers to all files in src/", "Fix all lint errors in the project"). +- **High-Volume Output:** Commands or tools expected to return large amounts of data (e.g., verbose builds, exhaustive file searches). +- **Speculative Research:** Investigations that require many "trial and error" steps before a clear path is found. + +**Assertive Action:** Continue to handle "surgical" tasks directly—simple reads, single-file edits, or direct questions that can be resolved in 1-2 turns. Delegation is an efficiency tool, not a way to avoid direct action when it is the fastest path. + mock-agent @@ -1657,6 +1717,18 @@ Use the following guidelines to optimize your search and read patterns. Sub-agents are specialized expert agents. Each sub-agent is available as a tool of the same name. You MUST delegate tasks to the sub-agent with the most relevant expertise. +### Strategic Orchestration & Delegation +Operate as a **strategic orchestrator**. Your own context window is your most precious resource. Every turn you take adds to the permanent session history. To keep the session fast and efficient, use sub-agents to "compress" complex or repetitive work. + +When you delegate, the sub-agent's entire execution is consolidated into a single summary in your history, keeping your main loop lean. + +**High-Impact Delegation Candidates:** +- **Repetitive Batch Tasks:** Tasks involving more than 3 files or repeated steps (e.g., "Add license headers to all files in src/", "Fix all lint errors in the project"). +- **High-Volume Output:** Commands or tools expected to return large amounts of data (e.g., verbose builds, exhaustive file searches). +- **Speculative Research:** Investigations that require many "trial and error" steps before a clear path is found. + +**Assertive Action:** Continue to handle "surgical" tasks directly—simple reads, single-file edits, or direct questions that can be resolved in 1-2 turns. Delegation is an efficiency tool, not a way to avoid direct action when it is the fastest path. + mock-agent @@ -1800,6 +1872,18 @@ Use the following guidelines to optimize your search and read patterns. Sub-agents are specialized expert agents. Each sub-agent is available as a tool of the same name. You MUST delegate tasks to the sub-agent with the most relevant expertise. +### Strategic Orchestration & Delegation +Operate as a **strategic orchestrator**. Your own context window is your most precious resource. Every turn you take adds to the permanent session history. To keep the session fast and efficient, use sub-agents to "compress" complex or repetitive work. + +When you delegate, the sub-agent's entire execution is consolidated into a single summary in your history, keeping your main loop lean. + +**High-Impact Delegation Candidates:** +- **Repetitive Batch Tasks:** Tasks involving more than 3 files or repeated steps (e.g., "Add license headers to all files in src/", "Fix all lint errors in the project"). +- **High-Volume Output:** Commands or tools expected to return large amounts of data (e.g., verbose builds, exhaustive file searches). +- **Speculative Research:** Investigations that require many "trial and error" steps before a clear path is found. + +**Assertive Action:** Continue to handle "surgical" tasks directly—simple reads, single-file edits, or direct questions that can be resolved in 1-2 turns. Delegation is an efficiency tool, not a way to avoid direct action when it is the fastest path. + mock-agent @@ -1943,6 +2027,18 @@ Use the following guidelines to optimize your search and read patterns. Sub-agents are specialized expert agents. Each sub-agent is available as a tool of the same name. You MUST delegate tasks to the sub-agent with the most relevant expertise. +### Strategic Orchestration & Delegation +Operate as a **strategic orchestrator**. Your own context window is your most precious resource. Every turn you take adds to the permanent session history. To keep the session fast and efficient, use sub-agents to "compress" complex or repetitive work. + +When you delegate, the sub-agent's entire execution is consolidated into a single summary in your history, keeping your main loop lean. + +**High-Impact Delegation Candidates:** +- **Repetitive Batch Tasks:** Tasks involving more than 3 files or repeated steps (e.g., "Add license headers to all files in src/", "Fix all lint errors in the project"). +- **High-Volume Output:** Commands or tools expected to return large amounts of data (e.g., verbose builds, exhaustive file searches). +- **Speculative Research:** Investigations that require many "trial and error" steps before a clear path is found. + +**Assertive Action:** Continue to handle "surgical" tasks directly—simple reads, single-file edits, or direct questions that can be resolved in 1-2 turns. Delegation is an efficiency tool, not a way to avoid direct action when it is the fastest path. + mock-agent @@ -2082,6 +2178,18 @@ Use the following guidelines to optimize your search and read patterns. Sub-agents are specialized expert agents. Each sub-agent is available as a tool of the same name. You MUST delegate tasks to the sub-agent with the most relevant expertise. +### Strategic Orchestration & Delegation +Operate as a **strategic orchestrator**. Your own context window is your most precious resource. Every turn you take adds to the permanent session history. To keep the session fast and efficient, use sub-agents to "compress" complex or repetitive work. + +When you delegate, the sub-agent's entire execution is consolidated into a single summary in your history, keeping your main loop lean. + +**High-Impact Delegation Candidates:** +- **Repetitive Batch Tasks:** Tasks involving more than 3 files or repeated steps (e.g., "Add license headers to all files in src/", "Fix all lint errors in the project"). +- **High-Volume Output:** Commands or tools expected to return large amounts of data (e.g., verbose builds, exhaustive file searches). +- **Speculative Research:** Investigations that require many "trial and error" steps before a clear path is found. + +**Assertive Action:** Continue to handle "surgical" tasks directly—simple reads, single-file edits, or direct questions that can be resolved in 1-2 turns. Delegation is an efficiency tool, not a way to avoid direct action when it is the fastest path. + mock-agent @@ -2221,6 +2329,18 @@ Use the following guidelines to optimize your search and read patterns. Sub-agents are specialized expert agents. Each sub-agent is available as a tool of the same name. You MUST delegate tasks to the sub-agent with the most relevant expertise. +### Strategic Orchestration & Delegation +Operate as a **strategic orchestrator**. Your own context window is your most precious resource. Every turn you take adds to the permanent session history. To keep the session fast and efficient, use sub-agents to "compress" complex or repetitive work. + +When you delegate, the sub-agent's entire execution is consolidated into a single summary in your history, keeping your main loop lean. + +**High-Impact Delegation Candidates:** +- **Repetitive Batch Tasks:** Tasks involving more than 3 files or repeated steps (e.g., "Add license headers to all files in src/", "Fix all lint errors in the project"). +- **High-Volume Output:** Commands or tools expected to return large amounts of data (e.g., verbose builds, exhaustive file searches). +- **Speculative Research:** Investigations that require many "trial and error" steps before a clear path is found. + +**Assertive Action:** Continue to handle "surgical" tasks directly—simple reads, single-file edits, or direct questions that can be resolved in 1-2 turns. Delegation is an efficiency tool, not a way to avoid direct action when it is the fastest path. + mock-agent @@ -2352,6 +2472,18 @@ Use the following guidelines to optimize your search and read patterns. Sub-agents are specialized expert agents. Each sub-agent is available as a tool of the same name. You MUST delegate tasks to the sub-agent with the most relevant expertise. +### Strategic Orchestration & Delegation +Operate as a **strategic orchestrator**. Your own context window is your most precious resource. Every turn you take adds to the permanent session history. To keep the session fast and efficient, use sub-agents to "compress" complex or repetitive work. + +When you delegate, the sub-agent's entire execution is consolidated into a single summary in your history, keeping your main loop lean. + +**High-Impact Delegation Candidates:** +- **Repetitive Batch Tasks:** Tasks involving more than 3 files or repeated steps (e.g., "Add license headers to all files in src/", "Fix all lint errors in the project"). +- **High-Volume Output:** Commands or tools expected to return large amounts of data (e.g., verbose builds, exhaustive file searches). +- **Speculative Research:** Investigations that require many "trial and error" steps before a clear path is found. + +**Assertive Action:** Continue to handle "surgical" tasks directly—simple reads, single-file edits, or direct questions that can be resolved in 1-2 turns. Delegation is an efficiency tool, not a way to avoid direct action when it is the fastest path. + mock-agent @@ -2490,6 +2622,18 @@ Use the following guidelines to optimize your search and read patterns. Sub-agents are specialized expert agents. Each sub-agent is available as a tool of the same name. You MUST delegate tasks to the sub-agent with the most relevant expertise. +### Strategic Orchestration & Delegation +Operate as a **strategic orchestrator**. Your own context window is your most precious resource. Every turn you take adds to the permanent session history. To keep the session fast and efficient, use sub-agents to "compress" complex or repetitive work. + +When you delegate, the sub-agent's entire execution is consolidated into a single summary in your history, keeping your main loop lean. + +**High-Impact Delegation Candidates:** +- **Repetitive Batch Tasks:** Tasks involving more than 3 files or repeated steps (e.g., "Add license headers to all files in src/", "Fix all lint errors in the project"). +- **High-Volume Output:** Commands or tools expected to return large amounts of data (e.g., verbose builds, exhaustive file searches). +- **Speculative Research:** Investigations that require many "trial and error" steps before a clear path is found. + +**Assertive Action:** Continue to handle "surgical" tasks directly—simple reads, single-file edits, or direct questions that can be resolved in 1-2 turns. Delegation is an efficiency tool, not a way to avoid direct action when it is the fastest path. + test-agent @@ -2870,6 +3014,18 @@ Use the following guidelines to optimize your search and read patterns. Sub-agents are specialized expert agents. Each sub-agent is available as a tool of the same name. You MUST delegate tasks to the sub-agent with the most relevant expertise. +### Strategic Orchestration & Delegation +Operate as a **strategic orchestrator**. Your own context window is your most precious resource. Every turn you take adds to the permanent session history. To keep the session fast and efficient, use sub-agents to "compress" complex or repetitive work. + +When you delegate, the sub-agent's entire execution is consolidated into a single summary in your history, keeping your main loop lean. + +**High-Impact Delegation Candidates:** +- **Repetitive Batch Tasks:** Tasks involving more than 3 files or repeated steps (e.g., "Add license headers to all files in src/", "Fix all lint errors in the project"). +- **High-Volume Output:** Commands or tools expected to return large amounts of data (e.g., verbose builds, exhaustive file searches). +- **Speculative Research:** Investigations that require many "trial and error" steps before a clear path is found. + +**Assertive Action:** Continue to handle "surgical" tasks directly—simple reads, single-file edits, or direct questions that can be resolved in 1-2 turns. Delegation is an efficiency tool, not a way to avoid direct action when it is the fastest path. + mock-agent @@ -3009,6 +3165,18 @@ Use the following guidelines to optimize your search and read patterns. Sub-agents are specialized expert agents. Each sub-agent is available as a tool of the same name. You MUST delegate tasks to the sub-agent with the most relevant expertise. +### Strategic Orchestration & Delegation +Operate as a **strategic orchestrator**. Your own context window is your most precious resource. Every turn you take adds to the permanent session history. To keep the session fast and efficient, use sub-agents to "compress" complex or repetitive work. + +When you delegate, the sub-agent's entire execution is consolidated into a single summary in your history, keeping your main loop lean. + +**High-Impact Delegation Candidates:** +- **Repetitive Batch Tasks:** Tasks involving more than 3 files or repeated steps (e.g., "Add license headers to all files in src/", "Fix all lint errors in the project"). +- **High-Volume Output:** Commands or tools expected to return large amounts of data (e.g., verbose builds, exhaustive file searches). +- **Speculative Research:** Investigations that require many "trial and error" steps before a clear path is found. + +**Assertive Action:** Continue to handle "surgical" tasks directly—simple reads, single-file edits, or direct questions that can be resolved in 1-2 turns. Delegation is an efficiency tool, not a way to avoid direct action when it is the fastest path. + mock-agent @@ -3260,6 +3428,18 @@ Use the following guidelines to optimize your search and read patterns. Sub-agents are specialized expert agents. Each sub-agent is available as a tool of the same name. You MUST delegate tasks to the sub-agent with the most relevant expertise. +### Strategic Orchestration & Delegation +Operate as a **strategic orchestrator**. Your own context window is your most precious resource. Every turn you take adds to the permanent session history. To keep the session fast and efficient, use sub-agents to "compress" complex or repetitive work. + +When you delegate, the sub-agent's entire execution is consolidated into a single summary in your history, keeping your main loop lean. + +**High-Impact Delegation Candidates:** +- **Repetitive Batch Tasks:** Tasks involving more than 3 files or repeated steps (e.g., "Add license headers to all files in src/", "Fix all lint errors in the project"). +- **High-Volume Output:** Commands or tools expected to return large amounts of data (e.g., verbose builds, exhaustive file searches). +- **Speculative Research:** Investigations that require many "trial and error" steps before a clear path is found. + +**Assertive Action:** Continue to handle "surgical" tasks directly—simple reads, single-file edits, or direct questions that can be resolved in 1-2 turns. Delegation is an efficiency tool, not a way to avoid direct action when it is the fastest path. + mock-agent @@ -3399,6 +3579,18 @@ Use the following guidelines to optimize your search and read patterns. Sub-agents are specialized expert agents. Each sub-agent is available as a tool of the same name. You MUST delegate tasks to the sub-agent with the most relevant expertise. +### Strategic Orchestration & Delegation +Operate as a **strategic orchestrator**. Your own context window is your most precious resource. Every turn you take adds to the permanent session history. To keep the session fast and efficient, use sub-agents to "compress" complex or repetitive work. + +When you delegate, the sub-agent's entire execution is consolidated into a single summary in your history, keeping your main loop lean. + +**High-Impact Delegation Candidates:** +- **Repetitive Batch Tasks:** Tasks involving more than 3 files or repeated steps (e.g., "Add license headers to all files in src/", "Fix all lint errors in the project"). +- **High-Volume Output:** Commands or tools expected to return large amounts of data (e.g., verbose builds, exhaustive file searches). +- **Speculative Research:** Investigations that require many "trial and error" steps before a clear path is found. + +**Assertive Action:** Continue to handle "surgical" tasks directly—simple reads, single-file edits, or direct questions that can be resolved in 1-2 turns. Delegation is an efficiency tool, not a way to avoid direct action when it is the fastest path. + mock-agent diff --git a/packages/core/src/prompts/snippets.ts b/packages/core/src/prompts/snippets.ts index a96842c3f1..3ab3b68e0a 100644 --- a/packages/core/src/prompts/snippets.ts +++ b/packages/core/src/prompts/snippets.ts @@ -228,6 +228,18 @@ export function renderSubAgents(subAgents?: SubAgentOptions[]): string { Sub-agents are specialized expert agents. Each sub-agent is available as a tool of the same name. You MUST delegate tasks to the sub-agent with the most relevant expertise. +### Strategic Orchestration & Delegation +Operate as a **strategic orchestrator**. Your own context window is your most precious resource. Every turn you take adds to the permanent session history. To keep the session fast and efficient, use sub-agents to "compress" complex or repetitive work. + +When you delegate, the sub-agent's entire execution is consolidated into a single summary in your history, keeping your main loop lean. + +**High-Impact Delegation Candidates:** +- **Repetitive Batch Tasks:** Tasks involving more than 3 files or repeated steps (e.g., "Add license headers to all files in src/", "Fix all lint errors in the project"). +- **High-Volume Output:** Commands or tools expected to return large amounts of data (e.g., verbose builds, exhaustive file searches). +- **Speculative Research:** Investigations that require many "trial and error" steps before a clear path is found. + +**Assertive Action:** Continue to handle "surgical" tasks directly—simple reads, single-file edits, or direct questions that can be resolved in 1-2 turns. Delegation is an efficiency tool, not a way to avoid direct action when it is the fastest path. + ${subAgentsXml} From c7a70e6421bea0ce8e623e0b418f1778c2ca333f Mon Sep 17 00:00:00 2001 From: Dev Randalpura Date: Thu, 26 Feb 2026 09:36:11 -0800 Subject: [PATCH 02/41] Updated tests in TableRenderer.test.tsx to use SVG snapshots (#20450) --- integration-tests/plan-mode.test.ts | 2 +- .../cli/src/ui/utils/TableRenderer.test.tsx | 56 ++- ...dles-non-ASCII-characters-emojis-.snap.svg | 32 ++ ...d-headers-without-showing-markers.snap.svg | 47 ++ ...rer-renders-a-3x3-table-correctly.snap.svg | 39 ++ ...h-mixed-content-lengths-correctly.snap.svg | 401 ++++++++++++++++++ ...g-headers-and-4-columns-correctly.snap.svg | 63 +++ ...ers-a-table-with-mixed-emojis-As-.snap.svg | 32 ++ ...rs-a-table-with-only-Asian-chara-.snap.svg | 32 ++ ...ers-a-table-with-only-emojis-and-.snap.svg | 32 ++ ...rs-correctly-when-headers-are-em-.snap.svg | 19 + ...rs-correctly-when-there-are-more-.snap.svg | 24 ++ ...eaders-and-renders-them-correctly.snap.svg | 25 ++ ...-wraps-all-long-columns-correctly.snap.svg | 52 +++ ...olumns-with-punctuation-correctly.snap.svg | 51 +++ ...wraps-long-cell-content-correctly.snap.svg | 35 ++ ...-long-and-short-columns-correctly.snap.svg | 36 ++ 17 files changed, 955 insertions(+), 23 deletions(-) create mode 100644 packages/cli/src/ui/utils/__snapshots__/TableRenderer-TableRenderer-handles-non-ASCII-characters-emojis-.snap.svg create mode 100644 packages/cli/src/ui/utils/__snapshots__/TableRenderer-TableRenderer-handles-wrapped-bold-headers-without-showing-markers.snap.svg create mode 100644 packages/cli/src/ui/utils/__snapshots__/TableRenderer-TableRenderer-renders-a-3x3-table-correctly.snap.svg create mode 100644 packages/cli/src/ui/utils/__snapshots__/TableRenderer-TableRenderer-renders-a-complex-table-with-mixed-content-lengths-correctly.snap.svg create mode 100644 packages/cli/src/ui/utils/__snapshots__/TableRenderer-TableRenderer-renders-a-table-with-long-headers-and-4-columns-correctly.snap.svg create mode 100644 packages/cli/src/ui/utils/__snapshots__/TableRenderer-TableRenderer-renders-a-table-with-mixed-emojis-As-.snap.svg create mode 100644 packages/cli/src/ui/utils/__snapshots__/TableRenderer-TableRenderer-renders-a-table-with-only-Asian-chara-.snap.svg create mode 100644 packages/cli/src/ui/utils/__snapshots__/TableRenderer-TableRenderer-renders-a-table-with-only-emojis-and-.snap.svg create mode 100644 packages/cli/src/ui/utils/__snapshots__/TableRenderer-TableRenderer-renders-correctly-when-headers-are-em-.snap.svg create mode 100644 packages/cli/src/ui/utils/__snapshots__/TableRenderer-TableRenderer-renders-correctly-when-there-are-more-.snap.svg create mode 100644 packages/cli/src/ui/utils/__snapshots__/TableRenderer-TableRenderer-strips-bold-markers-from-headers-and-renders-them-correctly.snap.svg create mode 100644 packages/cli/src/ui/utils/__snapshots__/TableRenderer-TableRenderer-wraps-all-long-columns-correctly.snap.svg create mode 100644 packages/cli/src/ui/utils/__snapshots__/TableRenderer-TableRenderer-wraps-columns-with-punctuation-correctly.snap.svg create mode 100644 packages/cli/src/ui/utils/__snapshots__/TableRenderer-TableRenderer-wraps-long-cell-content-correctly.snap.svg create mode 100644 packages/cli/src/ui/utils/__snapshots__/TableRenderer-TableRenderer-wraps-mixed-long-and-short-columns-correctly.snap.svg diff --git a/integration-tests/plan-mode.test.ts b/integration-tests/plan-mode.test.ts index 784bb890a0..f71006a36c 100644 --- a/integration-tests/plan-mode.test.ts +++ b/integration-tests/plan-mode.test.ts @@ -62,7 +62,7 @@ describe('Plan Mode', () => { }); }); - it('should allow write_file only in the plans directory in plan mode', async () => { + it.skip('should allow write_file only in the plans directory in plan mode', async () => { await rig.setup( 'should allow write_file only in the plans directory in plan mode', { diff --git a/packages/cli/src/ui/utils/TableRenderer.test.tsx b/packages/cli/src/ui/utils/TableRenderer.test.tsx index 9d22d5d301..e9d84e6649 100644 --- a/packages/cli/src/ui/utils/TableRenderer.test.tsx +++ b/packages/cli/src/ui/utils/TableRenderer.test.tsx @@ -17,20 +17,21 @@ describe('TableRenderer', () => { ]; const terminalWidth = 80; - const { lastFrame, waitUntilReady, unmount } = renderWithProviders( + const renderResult = renderWithProviders( , ); + const { lastFrame, waitUntilReady, unmount } = renderResult; await waitUntilReady(); const output = lastFrame(); expect(output).toContain('Header 1'); expect(output).toContain('Row 1, Col 1'); expect(output).toContain('Row 3, Col 3'); - expect(output).toMatchSnapshot(); + await expect(renderResult).toMatchSvgSnapshot(); unmount(); }); @@ -48,13 +49,14 @@ describe('TableRenderer', () => { ]; const terminalWidth = 80; - const { lastFrame, waitUntilReady, unmount } = renderWithProviders( + const renderResult = renderWithProviders( , ); + const { lastFrame, waitUntilReady, unmount } = renderResult; await waitUntilReady(); const output = lastFrame(); @@ -62,7 +64,7 @@ describe('TableRenderer', () => { // We just check for some of the content. expect(output).toContain('Data 1.1'); expect(output).toContain('Data 3.4'); - expect(output).toMatchSnapshot(); + await expect(renderResult).toMatchSvgSnapshot(); unmount(); }); @@ -77,19 +79,20 @@ describe('TableRenderer', () => { ]; const terminalWidth = 50; - const { lastFrame, waitUntilReady, unmount } = renderWithProviders( + const renderResult = renderWithProviders( , ); + const { lastFrame, waitUntilReady, unmount } = renderResult; await waitUntilReady(); const output = lastFrame(); expect(output).toContain('This is a very'); expect(output).toContain('long cell'); - expect(output).toMatchSnapshot(); + await expect(renderResult).toMatchSvgSnapshot(); unmount(); }); @@ -104,18 +107,19 @@ describe('TableRenderer', () => { ]; const terminalWidth = 60; - const { lastFrame, waitUntilReady, unmount } = renderWithProviders( + const renderResult = renderWithProviders( , ); + const { lastFrame, waitUntilReady, unmount } = renderResult; await waitUntilReady(); const output = lastFrame(); expect(output).toContain('wrapping in'); - expect(output).toMatchSnapshot(); + await expect(renderResult).toMatchSvgSnapshot(); unmount(); }); @@ -130,19 +134,20 @@ describe('TableRenderer', () => { ]; const terminalWidth = 50; - const { lastFrame, waitUntilReady, unmount } = renderWithProviders( + const renderResult = renderWithProviders( , ); + const { lastFrame, waitUntilReady, unmount } = renderResult; await waitUntilReady(); const output = lastFrame(); expect(output).toContain('Tiny'); expect(output).toContain('definitely needs'); - expect(output).toMatchSnapshot(); + await expect(renderResult).toMatchSvgSnapshot(); unmount(); }); @@ -158,18 +163,19 @@ describe('TableRenderer', () => { ]; const terminalWidth = 60; - const { lastFrame, waitUntilReady, unmount } = renderWithProviders( + const renderResult = renderWithProviders( , ); + const { lastFrame, waitUntilReady, unmount } = renderResult; await waitUntilReady(); const output = lastFrame(); expect(output).toContain('Start. Stop.'); - expect(output).toMatchSnapshot(); + await expect(renderResult).toMatchSvgSnapshot(); unmount(); }); @@ -178,20 +184,21 @@ describe('TableRenderer', () => { const rows = [['Data 1', 'Data 2', 'Data 3']]; const terminalWidth = 50; - const { lastFrame, waitUntilReady, unmount } = renderWithProviders( + const renderResult = renderWithProviders( , ); + const { lastFrame, waitUntilReady, unmount } = renderResult; await waitUntilReady(); const output = lastFrame(); // The output should NOT contain the literal '**' expect(output).not.toContain('**Bold Header**'); expect(output).toContain('Bold Header'); - expect(output).toMatchSnapshot(); + await expect(renderResult).toMatchSvgSnapshot(); unmount(); }); @@ -204,20 +211,21 @@ describe('TableRenderer', () => { const rows = [['Data 1', 'Data 2', 'Data 3']]; const terminalWidth = 40; - const { lastFrame, waitUntilReady, unmount } = renderWithProviders( + const renderResult = renderWithProviders( , ); + const { lastFrame, waitUntilReady, unmount } = renderResult; await waitUntilReady(); const output = lastFrame(); // Markers should be gone expect(output).not.toContain('**'); expect(output).toContain('Very Long'); - expect(output).toMatchSnapshot(); + await expect(renderResult).toMatchSvgSnapshot(); unmount(); }); @@ -247,7 +255,7 @@ describe('TableRenderer', () => { const terminalWidth = 160; - const { lastFrame, waitUntilReady, unmount } = renderWithProviders( + const renderResult = renderWithProviders( { />, { width: terminalWidth }, ); + const { lastFrame, waitUntilReady, unmount } = renderResult; await waitUntilReady(); const output = lastFrame(); @@ -271,7 +280,7 @@ describe('TableRenderer', () => { expect(output).toContain('J.'); expect(output).toContain('Doe'); - expect(output).toMatchSnapshot(); + await expect(renderResult).toMatchSvgSnapshot(); unmount(); }); @@ -317,7 +326,7 @@ describe('TableRenderer', () => { expected: ['Mixed 😃 中文', '你好 😃', 'こんにちは 🚀'], }, ])('$name', async ({ headers, rows, terminalWidth, expected }) => { - const { lastFrame, waitUntilReady, unmount } = renderWithProviders( + const renderResult = renderWithProviders( { />, { width: terminalWidth }, ); + const { lastFrame, waitUntilReady, unmount } = renderResult; await waitUntilReady(); const output = lastFrame(); expected.forEach((text) => { expect(output).toContain(text); }); - expect(output).toMatchSnapshot(); + await expect(renderResult).toMatchSvgSnapshot(); unmount(); }); @@ -351,19 +361,21 @@ describe('TableRenderer', () => { ])('$name', async ({ headers, rows, expected }) => { const terminalWidth = 50; - const { lastFrame, waitUntilReady } = renderWithProviders( + const renderResult = renderWithProviders( , ); + const { lastFrame, waitUntilReady, unmount } = renderResult; await waitUntilReady(); const output = lastFrame(); expected.forEach((text) => { expect(output).toContain(text); }); - expect(output).toMatchSnapshot(); + await expect(renderResult).toMatchSvgSnapshot(); + unmount(); }); }); diff --git a/packages/cli/src/ui/utils/__snapshots__/TableRenderer-TableRenderer-handles-non-ASCII-characters-emojis-.snap.svg b/packages/cli/src/ui/utils/__snapshots__/TableRenderer-TableRenderer-handles-non-ASCII-characters-emojis-.snap.svg new file mode 100644 index 0000000000..d9612cce33 --- /dev/null +++ b/packages/cli/src/ui/utils/__snapshots__/TableRenderer-TableRenderer-handles-non-ASCII-characters-emojis-.snap.svg @@ -0,0 +1,32 @@ + + + + + ┌──────────────┬────────────┬───────────────┐ + + Emoji 😃 + + Asian 汉字 + + Mixed 🚀 Text + + ├──────────────┼────────────┼───────────────┤ + + Start 🌟 End + + 你好世界 + + Rocket 🚀 Man + + + Thumbs 👍 Up + + こんにちは + + Fire 🔥 + + └──────────────┴────────────┴───────────────┘ + + \ No newline at end of file diff --git a/packages/cli/src/ui/utils/__snapshots__/TableRenderer-TableRenderer-handles-wrapped-bold-headers-without-showing-markers.snap.svg b/packages/cli/src/ui/utils/__snapshots__/TableRenderer-TableRenderer-handles-wrapped-bold-headers-without-showing-markers.snap.svg new file mode 100644 index 0000000000..0118d133cf --- /dev/null +++ b/packages/cli/src/ui/utils/__snapshots__/TableRenderer-TableRenderer-handles-wrapped-bold-headers-without-showing-markers.snap.svg @@ -0,0 +1,47 @@ + + + + + ┌─────────────┬───────┬─────────┐ + + Very Long + + Short + + Another + + + Bold Header + + + Long + + + That Will + + + Header + + + Wrap + + + + ├─────────────┼───────┼─────────┤ + + Data 1 + + Data + + Data 3 + + + + 2 + + + └─────────────┴───────┴─────────┘ + + \ No newline at end of file diff --git a/packages/cli/src/ui/utils/__snapshots__/TableRenderer-TableRenderer-renders-a-3x3-table-correctly.snap.svg b/packages/cli/src/ui/utils/__snapshots__/TableRenderer-TableRenderer-renders-a-3x3-table-correctly.snap.svg new file mode 100644 index 0000000000..84e4d856f6 --- /dev/null +++ b/packages/cli/src/ui/utils/__snapshots__/TableRenderer-TableRenderer-renders-a-3x3-table-correctly.snap.svg @@ -0,0 +1,39 @@ + + + + + ┌──────────────┬──────────────┬──────────────┐ + + Header 1 + + Header 2 + + Header 3 + + ├──────────────┼──────────────┼──────────────┤ + + Row 1, Col 1 + + Row 1, Col 2 + + Row 1, Col 3 + + + Row 2, Col 1 + + Row 2, Col 2 + + Row 2, Col 3 + + + Row 3, Col 1 + + Row 3, Col 2 + + Row 3, Col 3 + + └──────────────┴──────────────┴──────────────┘ + + \ No newline at end of file diff --git a/packages/cli/src/ui/utils/__snapshots__/TableRenderer-TableRenderer-renders-a-complex-table-with-mixed-content-lengths-correctly.snap.svg b/packages/cli/src/ui/utils/__snapshots__/TableRenderer-TableRenderer-renders-a-complex-table-with-mixed-content-lengths-correctly.snap.svg new file mode 100644 index 0000000000..95654cb4d8 --- /dev/null +++ b/packages/cli/src/ui/utils/__snapshots__/TableRenderer-TableRenderer-renders-a-complex-table-with-mixed-content-lengths-correctly.snap.svg @@ -0,0 +1,401 @@ + + + + + ┌─────────────────────────────┬──────────────────────────────┬─────────────────────────────┬──────────────────────────────┬─────┬────────┬─────────┬───────┐ + + Comprehensive Architectural + + Implementation Details for + + Longitudinal Performance + + Strategic Security Framework + + Key + + Status + + Version + + Owner + + + Specification for the + + the High-Throughput + + Analysis Across + + for Mitigating Sophisticated + + + + + + + Distributed Infrastructure + + Asynchronous Message + + Multi-Regional Cloud + + Cross-Site Scripting + + + + + + + Layer + + Processing Pipeline with + + Deployment Clusters + + Vulnerabilities + + + + + + + + Extended Scalability + + + + + + + + + + Features and Redundancy + + + + + + + + + + Protocols + + + + + + + + ├─────────────────────────────┼──────────────────────────────┼─────────────────────────────┼──────────────────────────────┼─────┼────────┼─────────┼───────┤ + + The primary architecture + + Each message is processed + + Historical data indicates a + + A multi-layered defense + + INF + + Active + + v2.4 + + J. + + + utilizes a decoupled + + through a series of + + significant reduction in + + strategy incorporates + + + + + Doe + + + microservices approach, + + specialized workers that + + tail latency when utilizing + + content security policies, + + + + + + + leveraging container + + handle data transformation, + + edge computing nodes closer + + input sanitization + + + + + + + orchestration for + + validation, and persistent + + to the geographic location + + libraries, and regular + + + + + + + scalability and fault + + storage using a persistent + + of the end-user base. + + automated penetration + + + + + + + tolerance in high-load + + queue. + + + testing routines. + + + + + + + scenarios. + + + Monitoring tools have + + + + + + + + + The pipeline features + + captured a steady increase + + Developers are required to + + + + + + + This layer provides the + + built-in retry mechanisms + + in throughput efficiency + + undergo mandatory security + + + + + + + fundamental building blocks + + with exponential backoff to + + since the introduction of + + training focusing on the + + + + + + + for service discovery, load + + ensure message delivery + + the vectorized query engine + + OWASP Top Ten to ensure that + + + + + + + balancing, and + + integrity even during + + in the primary data + + security is integrated into + + + + + + + inter-service communication + + transient network or service + + warehouse. + + the initial design phase. + + + + + + + via highly efficient + + failures. + + + + + + + + + protocol buffers. + + + Resource utilization + + The implementation of a + + + + + + + + Horizontal autoscaling is + + metrics demonstrate that + + robust Identity and Access + + + + + + + Advanced telemetry and + + triggered automatically + + the transition to + + Management system ensures + + + + + + + logging integrations allow + + based on the depth of the + + serverless compute for + + that the principle of least + + + + + + + for real-time monitoring of + + processing queue, ensuring + + intermittent tasks has + + privilege is strictly + + + + + + + system health and rapid + + consistent performance + + resulted in a thirty + + enforced across all + + + + + + + identification of + + during unexpected traffic + + percent cost optimization. + + environments. + + + + + + + bottlenecks within the + + spikes. + + + + + + + + + service mesh. + + + + + + + + + └─────────────────────────────┴──────────────────────────────┴─────────────────────────────┴──────────────────────────────┴─────┴────────┴─────────┴───────┘ + + \ No newline at end of file diff --git a/packages/cli/src/ui/utils/__snapshots__/TableRenderer-TableRenderer-renders-a-table-with-long-headers-and-4-columns-correctly.snap.svg b/packages/cli/src/ui/utils/__snapshots__/TableRenderer-TableRenderer-renders-a-table-with-long-headers-and-4-columns-correctly.snap.svg new file mode 100644 index 0000000000..b4d6353c3c --- /dev/null +++ b/packages/cli/src/ui/utils/__snapshots__/TableRenderer-TableRenderer-renders-a-table-with-long-headers-and-4-columns-correctly.snap.svg @@ -0,0 +1,63 @@ + + + + + ┌───────────────┬───────────────┬──────────────────┬──────────────────┐ + + Very Long + + Very Long + + Very Long Column + + Very Long Column + + + Column Header + + Column Header + + Header Three + + Header Four + + + One + + Two + + + + ├───────────────┼───────────────┼──────────────────┼──────────────────┤ + + Data 1.1 + + Data 1.2 + + Data 1.3 + + Data 1.4 + + + Data 2.1 + + Data 2.2 + + Data 2.3 + + Data 2.4 + + + Data 3.1 + + Data 3.2 + + Data 3.3 + + Data 3.4 + + └───────────────┴───────────────┴──────────────────┴──────────────────┘ + + \ No newline at end of file diff --git a/packages/cli/src/ui/utils/__snapshots__/TableRenderer-TableRenderer-renders-a-table-with-mixed-emojis-As-.snap.svg b/packages/cli/src/ui/utils/__snapshots__/TableRenderer-TableRenderer-renders-a-table-with-mixed-emojis-As-.snap.svg new file mode 100644 index 0000000000..707bf53f43 --- /dev/null +++ b/packages/cli/src/ui/utils/__snapshots__/TableRenderer-TableRenderer-renders-a-table-with-mixed-emojis-As-.snap.svg @@ -0,0 +1,32 @@ + + + + + ┌───────────────┬───────────────────┬────────────────┐ + + Mixed 😃 中文 + + Complex 🚀 日本語 + + Text 📝 한국어 + + ├───────────────┼───────────────────┼────────────────┤ + + 你好 😃 + + こんにちは 🚀 + + 안녕하세요 📝 + + + World 🌍 + + Code 💻 + + Pizza 🍕 + + └───────────────┴───────────────────┴────────────────┘ + + \ No newline at end of file diff --git a/packages/cli/src/ui/utils/__snapshots__/TableRenderer-TableRenderer-renders-a-table-with-only-Asian-chara-.snap.svg b/packages/cli/src/ui/utils/__snapshots__/TableRenderer-TableRenderer-renders-a-table-with-only-Asian-chara-.snap.svg new file mode 100644 index 0000000000..0f51eba244 --- /dev/null +++ b/packages/cli/src/ui/utils/__snapshots__/TableRenderer-TableRenderer-renders-a-table-with-only-Asian-chara-.snap.svg @@ -0,0 +1,32 @@ + + + + + ┌──────────────┬─────────────────┬───────────────┐ + + Chinese 中文 + + Japanese 日本語 + + Korean 한국어 + + ├──────────────┼─────────────────┼───────────────┤ + + 你好 + + こんにちは + + 안녕하세요 + + + 世界 + + 世界 + + 세계 + + └──────────────┴─────────────────┴───────────────┘ + + \ No newline at end of file diff --git a/packages/cli/src/ui/utils/__snapshots__/TableRenderer-TableRenderer-renders-a-table-with-only-emojis-and-.snap.svg b/packages/cli/src/ui/utils/__snapshots__/TableRenderer-TableRenderer-renders-a-table-with-only-emojis-and-.snap.svg new file mode 100644 index 0000000000..1a849696dd --- /dev/null +++ b/packages/cli/src/ui/utils/__snapshots__/TableRenderer-TableRenderer-renders-a-table-with-only-emojis-and-.snap.svg @@ -0,0 +1,32 @@ + + + + + ┌──────────┬───────────┬──────────┐ + + Happy 😀 + + Rocket 🚀 + + Heart ❤️ + + ├──────────┼───────────┼──────────┤ + + Smile 😃 + + Fire 🔥 + + Love 💖 + + + Cool 😎 + + Star ⭐ + + Blue 💙 + + └──────────┴───────────┴──────────┘ + + \ No newline at end of file diff --git a/packages/cli/src/ui/utils/__snapshots__/TableRenderer-TableRenderer-renders-correctly-when-headers-are-em-.snap.svg b/packages/cli/src/ui/utils/__snapshots__/TableRenderer-TableRenderer-renders-correctly-when-headers-are-em-.snap.svg new file mode 100644 index 0000000000..2cc7b1cadd --- /dev/null +++ b/packages/cli/src/ui/utils/__snapshots__/TableRenderer-TableRenderer-renders-correctly-when-headers-are-em-.snap.svg @@ -0,0 +1,19 @@ + + + + + ┌────────┬────────┐ + + + + ├────────┼────────┤ + + Data 1 + + Data 2 + + └────────┴────────┘ + + \ No newline at end of file diff --git a/packages/cli/src/ui/utils/__snapshots__/TableRenderer-TableRenderer-renders-correctly-when-there-are-more-.snap.svg b/packages/cli/src/ui/utils/__snapshots__/TableRenderer-TableRenderer-renders-correctly-when-there-are-more-.snap.svg new file mode 100644 index 0000000000..452bb1fb12 --- /dev/null +++ b/packages/cli/src/ui/utils/__snapshots__/TableRenderer-TableRenderer-renders-correctly-when-there-are-more-.snap.svg @@ -0,0 +1,24 @@ + + + + + ┌──────────┬──────────┬──────────┐ + + Header 1 + + Header 2 + + Header 3 + + ├──────────┼──────────┼──────────┤ + + Data 1 + + Data 2 + + + └──────────┴──────────┴──────────┘ + + \ No newline at end of file diff --git a/packages/cli/src/ui/utils/__snapshots__/TableRenderer-TableRenderer-strips-bold-markers-from-headers-and-renders-them-correctly.snap.svg b/packages/cli/src/ui/utils/__snapshots__/TableRenderer-TableRenderer-strips-bold-markers-from-headers-and-renders-them-correctly.snap.svg new file mode 100644 index 0000000000..6de776060b --- /dev/null +++ b/packages/cli/src/ui/utils/__snapshots__/TableRenderer-TableRenderer-strips-bold-markers-from-headers-and-renders-them-correctly.snap.svg @@ -0,0 +1,25 @@ + + + + + ┌─────────────┬───────────────┬──────────────┐ + + Bold Header + + Normal Header + + Another Bold + + ├─────────────┼───────────────┼──────────────┤ + + Data 1 + + Data 2 + + Data 3 + + └─────────────┴───────────────┴──────────────┘ + + \ No newline at end of file diff --git a/packages/cli/src/ui/utils/__snapshots__/TableRenderer-TableRenderer-wraps-all-long-columns-correctly.snap.svg b/packages/cli/src/ui/utils/__snapshots__/TableRenderer-TableRenderer-wraps-all-long-columns-correctly.snap.svg new file mode 100644 index 0000000000..4b459cfea0 --- /dev/null +++ b/packages/cli/src/ui/utils/__snapshots__/TableRenderer-TableRenderer-wraps-all-long-columns-correctly.snap.svg @@ -0,0 +1,52 @@ + + + + + ┌────────────────┬────────────────┬─────────────────┐ + + Col 1 + + Col 2 + + Col 3 + + ├────────────────┼────────────────┼─────────────────┤ + + This is a very + + This is also a + + And this is the + + + long text that + + very long text + + third long text + + + needs wrapping + + that needs + + that needs + + + in column 1 + + wrapping in + + wrapping in + + + + column 2 + + column 3 + + └────────────────┴────────────────┴─────────────────┘ + + \ No newline at end of file diff --git a/packages/cli/src/ui/utils/__snapshots__/TableRenderer-TableRenderer-wraps-columns-with-punctuation-correctly.snap.svg b/packages/cli/src/ui/utils/__snapshots__/TableRenderer-TableRenderer-wraps-columns-with-punctuation-correctly.snap.svg new file mode 100644 index 0000000000..7173ce475f --- /dev/null +++ b/packages/cli/src/ui/utils/__snapshots__/TableRenderer-TableRenderer-wraps-columns-with-punctuation-correctly.snap.svg @@ -0,0 +1,51 @@ + + + + + ┌───────────────────┬───────────────┬─────────────────┐ + + Punctuation 1 + + Punctuation 2 + + Punctuation 3 + + ├───────────────────┼───────────────┼─────────────────┤ + + Start. Stop. + + Semi; colon: + + At@ Hash# + + + Comma, separated. + + Pipe| Slash/ + + Dollar$ + + + Exclamation! + + Backslash\ + + Percent% Caret^ + + + Question? + + + Ampersand& + + + hyphen-ated + + + Asterisk* + + └───────────────────┴───────────────┴─────────────────┘ + + \ No newline at end of file diff --git a/packages/cli/src/ui/utils/__snapshots__/TableRenderer-TableRenderer-wraps-long-cell-content-correctly.snap.svg b/packages/cli/src/ui/utils/__snapshots__/TableRenderer-TableRenderer-wraps-long-cell-content-correctly.snap.svg new file mode 100644 index 0000000000..7f7b67a7dd --- /dev/null +++ b/packages/cli/src/ui/utils/__snapshots__/TableRenderer-TableRenderer-wraps-long-cell-content-correctly.snap.svg @@ -0,0 +1,35 @@ + + + + + ┌───────┬─────────────────────────────┬───────┐ + + Col 1 + + Col 2 + + Col 3 + + ├───────┼─────────────────────────────┼───────┤ + + Short + + This is a very long cell + + Short + + + + content that should wrap to + + + + + multiple lines + + + └───────┴─────────────────────────────┴───────┘ + + \ No newline at end of file diff --git a/packages/cli/src/ui/utils/__snapshots__/TableRenderer-TableRenderer-wraps-mixed-long-and-short-columns-correctly.snap.svg b/packages/cli/src/ui/utils/__snapshots__/TableRenderer-TableRenderer-wraps-mixed-long-and-short-columns-correctly.snap.svg new file mode 100644 index 0000000000..3ff0542a26 --- /dev/null +++ b/packages/cli/src/ui/utils/__snapshots__/TableRenderer-TableRenderer-wraps-mixed-long-and-short-columns-correctly.snap.svg @@ -0,0 +1,36 @@ + + + + + ┌───────┬──────────────────────────┬────────┐ + + Short + + Long + + Medium + + ├───────┼──────────────────────────┼────────┤ + + Tiny + + This is a very long text + + Not so + + + + that definitely needs to + + long + + + + wrap to the next line + + + └───────┴──────────────────────────┴────────┘ + + \ No newline at end of file From ac454bcfbc6a61807af249da88e765cc22b38f9c Mon Sep 17 00:00:00 2001 From: Google Admin Date: Thu, 26 Feb 2026 09:58:14 -0800 Subject: [PATCH 03/41] Refactor Github Action per b/485167538 (#19443) Co-authored-by: Ben Knutson --- .../actions/create-pull-request/action.yml | 16 +++-- .github/actions/npm-auth-token/action.yml | 18 ++++-- .github/actions/publish-release/action.yml | 61 +++++++++++++------ .github/actions/push-docker/action.yml | 4 +- .github/actions/push-sandbox/action.yml | 14 +++-- .github/actions/setup-npmrc/action.yml | 4 +- .github/actions/tag-npm-release/action.yml | 28 +++++++-- .github/actions/verify-release/action.yml | 16 +++-- .github/workflows/chained_e2e.yml | 15 +++-- .github/workflows/ci.yml | 22 ++++--- .github/workflows/deflake.yml | 10 +-- .github/workflows/eval.yml | 2 +- .github/workflows/evals-nightly.yml | 2 +- .github/workflows/release-patch-3-release.yml | 16 +++-- .github/workflows/release-promote.yml | 11 +++- .github/workflows/trigger_e2e.yml | 4 +- 16 files changed, 167 insertions(+), 76 deletions(-) diff --git a/.github/actions/create-pull-request/action.yml b/.github/actions/create-pull-request/action.yml index 6a6b6dbf03..fbbc910bc5 100644 --- a/.github/actions/create-pull-request/action.yml +++ b/.github/actions/create-pull-request/action.yml @@ -39,18 +39,22 @@ runs: if: "inputs.dry-run != 'true'" env: GH_TOKEN: '${{ inputs.github-token }}' + INPUTS_BRANCH_NAME: ${{ inputs.branch-name }} + INPUTS_PR_TITLE: ${{ inputs.pr-title }} + INPUTS_PR_BODY: ${{ inputs.pr-body }} + INPUTS_BASE_BRANCH: ${{ inputs.base-branch }} shell: 'bash' working-directory: '${{ inputs.working-directory }}' run: | set -e - if ! git ls-remote --exit-code --heads origin "${{ inputs.branch-name }}"; then - echo "::error::Branch '${{ inputs.branch-name }}' does not exist on the remote repository." + if ! git ls-remote --exit-code --heads origin "${INPUTS_BRANCH_NAME}"; then + echo "::error::Branch '${INPUTS_BRANCH_NAME}' does not exist on the remote repository." exit 1 fi PR_URL=$(gh pr create \ - --title "${{ inputs.pr-title }}" \ - --body "${{ inputs.pr-body }}" \ - --base "${{ inputs.base-branch }}" \ - --head "${{ inputs.branch-name }}" \ + --title "${INPUTS_PR_TITLE}" \ + --body "${INPUTS_PR_BODY}" \ + --base "${INPUTS_BASE_BRANCH}" \ + --head "${INPUTS_BRANCH_NAME}" \ --fill) gh pr merge "$PR_URL" --auto diff --git a/.github/actions/npm-auth-token/action.yml b/.github/actions/npm-auth-token/action.yml index 94249d6c51..1c19da5971 100644 --- a/.github/actions/npm-auth-token/action.yml +++ b/.github/actions/npm-auth-token/action.yml @@ -30,16 +30,22 @@ runs: id: 'npm_auth_token' shell: 'bash' run: | - AUTH_TOKEN="${{ inputs.github-token }}" - PACKAGE_NAME="${{ inputs.package-name }}" + AUTH_TOKEN="${INPUTS_GITHUB_TOKEN}" + PACKAGE_NAME="${INPUTS_PACKAGE_NAME}" PRIVATE_REPO="@google-gemini/" if [[ "$PACKAGE_NAME" == "$PRIVATE_REPO"* ]]; then - AUTH_TOKEN="${{ inputs.github-token }}" + AUTH_TOKEN="${INPUTS_GITHUB_TOKEN}" elif [[ "$PACKAGE_NAME" == "@google/gemini-cli" ]]; then - AUTH_TOKEN="${{ inputs.wombat-token-cli }}" + AUTH_TOKEN="${INPUTS_WOMBAT_TOKEN_CLI}" elif [[ "$PACKAGE_NAME" == "@google/gemini-cli-core" ]]; then - AUTH_TOKEN="${{ inputs.wombat-token-core }}" + AUTH_TOKEN="${INPUTS_WOMBAT_TOKEN_CORE}" elif [[ "$PACKAGE_NAME" == "@google/gemini-cli-a2a-server" ]]; then - AUTH_TOKEN="${{ inputs.wombat-token-a2a-server }}" + AUTH_TOKEN="${INPUTS_WOMBAT_TOKEN_A2A_SERVER}" fi echo "auth-token=$AUTH_TOKEN" >> $GITHUB_OUTPUT + env: + INPUTS_GITHUB_TOKEN: ${{ inputs.github-token }} + INPUTS_PACKAGE_NAME: ${{ inputs.package-name }} + INPUTS_WOMBAT_TOKEN_CLI: ${{ inputs.wombat-token-cli }} + INPUTS_WOMBAT_TOKEN_CORE: ${{ inputs.wombat-token-core }} + INPUTS_WOMBAT_TOKEN_A2A_SERVER: ${{ inputs.wombat-token-a2a-server }} diff --git a/.github/actions/publish-release/action.yml b/.github/actions/publish-release/action.yml index 5c74524ddb..35e65493c9 100644 --- a/.github/actions/publish-release/action.yml +++ b/.github/actions/publish-release/action.yml @@ -93,15 +93,19 @@ runs: id: 'release_branch' shell: 'bash' run: | - BRANCH_NAME="release/${{ inputs.release-tag }}" + BRANCH_NAME="release/${INPUTS_RELEASE_TAG}" git switch -c "${BRANCH_NAME}" echo "BRANCH_NAME=${BRANCH_NAME}" >> "${GITHUB_OUTPUT}" + env: + INPUTS_RELEASE_TAG: ${{ inputs.release-tag }} - name: '⬆️ Update package versions' working-directory: '${{ inputs.working-directory }}' shell: 'bash' run: | - npm run release:version "${{ inputs.release-version }}" + npm run release:version "${INPUTS_RELEASE_VERSION}" + env: + INPUTS_RELEASE_VERSION: ${{ inputs.release-version }} - name: '💾 Commit and Conditionally Push package versions' working-directory: '${{ inputs.working-directory }}' @@ -163,23 +167,30 @@ runs: working-directory: '${{ inputs.working-directory }}' env: NODE_AUTH_TOKEN: '${{ steps.core-token.outputs.auth-token }}' + INPUTS_DRY_RUN: ${{ inputs.dry-run }} + INPUTS_CORE_PACKAGE_NAME: ${{ inputs.core-package-name }} shell: 'bash' run: | npm publish \ - --dry-run="${{ inputs.dry-run }}" \ - --workspace="${{ inputs.core-package-name }}" \ + --dry-run="${INPUTS_DRY_RUN}" \ + --workspace="${INPUTS_CORE_PACKAGE_NAME}" \ --no-tag - npm dist-tag rm ${{ inputs.core-package-name }} false --silent + npm dist-tag rm ${INPUTS_CORE_PACKAGE_NAME} false --silent - name: '🔗 Install latest core package' working-directory: '${{ inputs.working-directory }}' if: "${{ inputs.dry-run != 'true' }}" shell: 'bash' run: | - npm install "${{ inputs.core-package-name }}@${{ inputs.release-version }}" \ - --workspace="${{ inputs.cli-package-name }}" \ - --workspace="${{ inputs.a2a-package-name }}" \ + npm install "${INPUTS_CORE_PACKAGE_NAME}@${INPUTS_RELEASE_VERSION}" \ + --workspace="${INPUTS_CLI_PACKAGE_NAME}" \ + --workspace="${INPUTS_A2A_PACKAGE_NAME}" \ --save-exact + env: + INPUTS_CORE_PACKAGE_NAME: ${{ inputs.core-package-name }} + INPUTS_RELEASE_VERSION: ${{ inputs.release-version }} + INPUTS_CLI_PACKAGE_NAME: ${{ inputs.cli-package-name }} + INPUTS_A2A_PACKAGE_NAME: ${{ inputs.a2a-package-name }} - name: 'Get CLI Token' uses: './.github/actions/npm-auth-token' @@ -195,13 +206,15 @@ runs: working-directory: '${{ inputs.working-directory }}' env: NODE_AUTH_TOKEN: '${{ steps.cli-token.outputs.auth-token }}' + INPUTS_DRY_RUN: ${{ inputs.dry-run }} + INPUTS_CLI_PACKAGE_NAME: ${{ inputs.cli-package-name }} shell: 'bash' run: | npm publish \ - --dry-run="${{ inputs.dry-run }}" \ - --workspace="${{ inputs.cli-package-name }}" \ + --dry-run="${INPUTS_DRY_RUN}" \ + --workspace="${INPUTS_CLI_PACKAGE_NAME}" \ --no-tag - npm dist-tag rm ${{ inputs.cli-package-name }} false --silent + npm dist-tag rm ${INPUTS_CLI_PACKAGE_NAME} false --silent - name: 'Get a2a-server Token' uses: './.github/actions/npm-auth-token' @@ -217,14 +230,16 @@ runs: working-directory: '${{ inputs.working-directory }}' env: NODE_AUTH_TOKEN: '${{ steps.a2a-token.outputs.auth-token }}' + INPUTS_DRY_RUN: ${{ inputs.dry-run }} + INPUTS_A2A_PACKAGE_NAME: ${{ inputs.a2a-package-name }} shell: 'bash' # Tag staging for initial release run: | npm publish \ - --dry-run="${{ inputs.dry-run }}" \ - --workspace="${{ inputs.a2a-package-name }}" \ + --dry-run="${INPUTS_DRY_RUN}" \ + --workspace="${INPUTS_A2A_PACKAGE_NAME}" \ --no-tag - npm dist-tag rm ${{ inputs.a2a-package-name }} false --silent + npm dist-tag rm ${INPUTS_A2A_PACKAGE_NAME} false --silent - name: '🔬 Verify NPM release by version' uses: './.github/actions/verify-release' @@ -258,13 +273,16 @@ runs: if: "${{ inputs.dry-run != 'true' && inputs.skip-github-release != 'true' && inputs.npm-tag != 'dev' && inputs.npm-registry-url != 'https://npm.pkg.github.com/' }}" env: GITHUB_TOKEN: '${{ inputs.github-release-token || inputs.github-token }}' + INPUTS_RELEASE_TAG: ${{ inputs.release-tag }} + STEPS_RELEASE_BRANCH_OUTPUTS_BRANCH_NAME: ${{ steps.release_branch.outputs.BRANCH_NAME }} + INPUTS_PREVIOUS_TAG: ${{ inputs.previous-tag }} shell: 'bash' run: | - gh release create "${{ inputs.release-tag }}" \ + gh release create "${INPUTS_RELEASE_TAG}" \ bundle/gemini.js \ - --target "${{ steps.release_branch.outputs.BRANCH_NAME }}" \ - --title "Release ${{ inputs.release-tag }}" \ - --notes-start-tag "${{ inputs.previous-tag }}" \ + --target "${STEPS_RELEASE_BRANCH_OUTPUTS_BRANCH_NAME}" \ + --title "Release ${INPUTS_RELEASE_TAG}" \ + --notes-start-tag "${INPUTS_PREVIOUS_TAG}" \ --generate-notes \ ${{ inputs.npm-tag != 'latest' && '--prerelease' || '' }} @@ -274,5 +292,8 @@ runs: continue-on-error: true shell: 'bash' run: | - echo "Cleaning up release branch ${{ steps.release_branch.outputs.BRANCH_NAME }}..." - git push origin --delete "${{ steps.release_branch.outputs.BRANCH_NAME }}" + echo "Cleaning up release branch ${STEPS_RELEASE_BRANCH_OUTPUTS_BRANCH_NAME}..." + git push origin --delete "${STEPS_RELEASE_BRANCH_OUTPUTS_BRANCH_NAME}" + + env: + STEPS_RELEASE_BRANCH_OUTPUTS_BRANCH_NAME: ${{ steps.release_branch.outputs.BRANCH_NAME }} diff --git a/.github/actions/push-docker/action.yml b/.github/actions/push-docker/action.yml index 5016d76821..67f03b3cca 100644 --- a/.github/actions/push-docker/action.yml +++ b/.github/actions/push-docker/action.yml @@ -52,8 +52,10 @@ runs: id: 'branch_name' shell: 'bash' run: | - REF_NAME="${{ inputs.ref-name }}" + REF_NAME="${INPUTS_REF_NAME}" echo "name=${REF_NAME%/merge}" >> $GITHUB_OUTPUT + env: + INPUTS_REF_NAME: ${{ inputs.ref-name }} - name: 'Build and Push the Docker Image' uses: 'docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83' # ratchet:docker/build-push-action@v6 with: diff --git a/.github/actions/push-sandbox/action.yml b/.github/actions/push-sandbox/action.yml index db75ce10cd..22edf5d838 100644 --- a/.github/actions/push-sandbox/action.yml +++ b/.github/actions/push-sandbox/action.yml @@ -56,8 +56,8 @@ runs: id: 'image_tag' shell: 'bash' run: |- - SHELL_TAG_NAME="${{ inputs.github-ref-name }}" - FINAL_TAG="${{ inputs.github-sha }}" + SHELL_TAG_NAME="${INPUTS_GITHUB_REF_NAME}" + FINAL_TAG="${INPUTS_GITHUB_SHA}" if [[ "$SHELL_TAG_NAME" =~ ^v[0-9]+\.[0-9]+\.[0-9]+(-[a-zA-Z0-9.-]+)?$ ]]; then echo "Release detected." FINAL_TAG="${SHELL_TAG_NAME#v}" @@ -66,15 +66,19 @@ runs: fi echo "Determined image tag: $FINAL_TAG" echo "FINAL_TAG=$FINAL_TAG" >> $GITHUB_OUTPUT + env: + INPUTS_GITHUB_REF_NAME: ${{ inputs.github-ref-name }} + INPUTS_GITHUB_SHA: ${{ inputs.github-sha }} - name: 'build' id: 'docker_build' shell: 'bash' env: GEMINI_SANDBOX_IMAGE_TAG: '${{ steps.image_tag.outputs.FINAL_TAG }}' GEMINI_SANDBOX: 'docker' + STEPS_IMAGE_TAG_OUTPUTS_FINAL_TAG: ${{ steps.image_tag.outputs.FINAL_TAG }} run: |- npm run build:sandbox -- \ - --image google/gemini-cli-sandbox:${{ steps.image_tag.outputs.FINAL_TAG }} \ + --image google/gemini-cli-sandbox:${STEPS_IMAGE_TAG_OUTPUTS_FINAL_TAG} \ --output-file final_image_uri.txt echo "uri=$(cat final_image_uri.txt)" >> $GITHUB_OUTPUT - name: 'verify' @@ -89,7 +93,9 @@ runs: shell: 'bash' if: "${{ inputs.dry-run != 'true' }}" run: |- - docker push "${{ steps.docker_build.outputs.uri }}" + docker push "${STEPS_DOCKER_BUILD_OUTPUTS_URI}" + env: + STEPS_DOCKER_BUILD_OUTPUTS_URI: ${{ steps.docker_build.outputs.uri }} - name: 'Create issue on failure' if: |- ${{ failure() }} diff --git a/.github/actions/setup-npmrc/action.yml b/.github/actions/setup-npmrc/action.yml index fba0c14712..5dddfe48f3 100644 --- a/.github/actions/setup-npmrc/action.yml +++ b/.github/actions/setup-npmrc/action.yml @@ -18,5 +18,7 @@ runs: shell: 'bash' run: |- echo ""@google-gemini:registry=https://npm.pkg.github.com"" > ~/.npmrc - echo ""//npm.pkg.github.com/:_authToken=${{ inputs.github-token }}"" >> ~/.npmrc + echo ""//npm.pkg.github.com/:_authToken=${INPUTS_GITHUB_TOKEN}"" >> ~/.npmrc echo ""@google:registry=https://wombat-dressing-room.appspot.com"" >> ~/.npmrc + env: + INPUTS_GITHUB_TOKEN: ${{ inputs.github-token }} diff --git a/.github/actions/tag-npm-release/action.yml b/.github/actions/tag-npm-release/action.yml index 7bcafcb6b2..8088035598 100644 --- a/.github/actions/tag-npm-release/action.yml +++ b/.github/actions/tag-npm-release/action.yml @@ -71,10 +71,13 @@ runs: ${{ inputs.dry-run != 'true' }} env: NODE_AUTH_TOKEN: '${{ steps.core-token.outputs.auth-token }}' + INPUTS_CORE_PACKAGE_NAME: ${{ inputs.core-package-name }} + INPUTS_VERSION: ${{ inputs.version }} + INPUTS_CHANNEL: ${{ inputs.channel }} shell: 'bash' working-directory: '${{ inputs.working-directory }}' run: | - npm dist-tag add ${{ inputs.core-package-name }}@${{ inputs.version }} ${{ inputs.channel }} + npm dist-tag add ${INPUTS_CORE_PACKAGE_NAME}@${INPUTS_VERSION} ${INPUTS_CHANNEL} - name: 'Get cli Token' uses: './.github/actions/npm-auth-token' @@ -91,10 +94,13 @@ runs: ${{ inputs.dry-run != 'true' }} env: NODE_AUTH_TOKEN: '${{ steps.cli-token.outputs.auth-token }}' + INPUTS_CLI_PACKAGE_NAME: ${{ inputs.cli-package-name }} + INPUTS_VERSION: ${{ inputs.version }} + INPUTS_CHANNEL: ${{ inputs.channel }} shell: 'bash' working-directory: '${{ inputs.working-directory }}' run: | - npm dist-tag add ${{ inputs.cli-package-name }}@${{ inputs.version }} ${{ inputs.channel }} + npm dist-tag add ${INPUTS_CLI_PACKAGE_NAME}@${INPUTS_VERSION} ${INPUTS_CHANNEL} - name: 'Get a2a Token' uses: './.github/actions/npm-auth-token' @@ -111,10 +117,13 @@ runs: ${{ inputs.dry-run == 'false' }} env: NODE_AUTH_TOKEN: '${{ steps.a2a-token.outputs.auth-token }}' + INPUTS_A2A_PACKAGE_NAME: ${{ inputs.a2a-package-name }} + INPUTS_VERSION: ${{ inputs.version }} + INPUTS_CHANNEL: ${{ inputs.channel }} shell: 'bash' working-directory: '${{ inputs.working-directory }}' run: | - npm dist-tag add ${{ inputs.a2a-package-name }}@${{ inputs.version }} ${{ inputs.channel }} + npm dist-tag add ${INPUTS_A2A_PACKAGE_NAME}@${INPUTS_VERSION} ${INPUTS_CHANNEL} - name: 'Log dry run' if: |- @@ -122,4 +131,15 @@ runs: shell: 'bash' working-directory: '${{ inputs.working-directory }}' run: | - echo "Dry run: Would have added tag '${{ inputs.channel }}' to version '${{ inputs.version }}' for ${{ inputs.cli-package-name }}, ${{ inputs.core-package-name }}, and ${{ inputs.a2a-package-name }}." + echo "Dry run: Would have added tag '${INPUTS_CHANNEL}' to version '${INPUTS_VERSION}' for ${INPUTS_CLI_PACKAGE_NAME}, ${INPUTS_CORE_PACKAGE_NAME}, and ${INPUTS_A2A_PACKAGE_NAME}." + + env: + INPUTS_CHANNEL: ${{ inputs.channel }} + + INPUTS_VERSION: ${{ inputs.version }} + + INPUTS_CLI_PACKAGE_NAME: ${{ inputs.cli-package-name }} + + INPUTS_CORE_PACKAGE_NAME: ${{ inputs.core-package-name }} + + INPUTS_A2A_PACKAGE_NAME: ${{ inputs.a2a-package-name }} diff --git a/.github/actions/verify-release/action.yml b/.github/actions/verify-release/action.yml index 14b595cb10..349e6a19d5 100644 --- a/.github/actions/verify-release/action.yml +++ b/.github/actions/verify-release/action.yml @@ -64,10 +64,13 @@ runs: working-directory: '${{ inputs.working-directory }}' run: |- gemini_version=$(gemini --version) - if [ "$gemini_version" != "${{ inputs.expected-version }}" ]; then - echo "❌ NPM Version mismatch: Got $gemini_version from ${{ inputs.npm-package }}, expected ${{ inputs.expected-version }}" + if [ "$gemini_version" != "${INPUTS_EXPECTED_VERSION}" ]; then + echo "❌ NPM Version mismatch: Got $gemini_version from ${INPUTS_NPM_PACKAGE}, expected ${INPUTS_EXPECTED_VERSION}" exit 1 fi + env: + INPUTS_EXPECTED_VERSION: ${{ inputs.expected-version }} + INPUTS_NPM_PACKAGE: ${{ inputs.npm-package }} - name: 'Clear npm cache' shell: 'bash' @@ -77,11 +80,14 @@ runs: shell: 'bash' working-directory: '${{ inputs.working-directory }}' run: |- - gemini_version=$(npx --prefer-online "${{ inputs.npm-package}}" --version) - if [ "$gemini_version" != "${{ inputs.expected-version }}" ]; then - echo "❌ NPX Run Version mismatch: Got $gemini_version from ${{ inputs.npm-package }}, expected ${{ inputs.expected-version }}" + gemini_version=$(npx --prefer-online "${INPUTS_NPM_PACKAGE}" --version) + if [ "$gemini_version" != "${INPUTS_EXPECTED_VERSION}" ]; then + echo "❌ NPX Run Version mismatch: Got $gemini_version from ${INPUTS_NPM_PACKAGE}, expected ${INPUTS_EXPECTED_VERSION}" exit 1 fi + env: + INPUTS_NPM_PACKAGE: ${{ inputs.npm-package }} + INPUTS_EXPECTED_VERSION: ${{ inputs.expected-version }} - name: 'Install dependencies for integration tests' shell: 'bash' diff --git a/.github/workflows/chained_e2e.yml b/.github/workflows/chained_e2e.yml index 4b37d0e109..d4d8780cb3 100644 --- a/.github/workflows/chained_e2e.yml +++ b/.github/workflows/chained_e2e.yml @@ -53,7 +53,7 @@ jobs: REPO_NAME: '${{ github.event.inputs.repo_name }}' run: | mkdir -p ./pr - echo '${{ env.REPO_NAME }}' > ./pr/repo_name + echo '${REPO_NAME}' > ./pr/repo_name - uses: 'actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02' # ratchet:actions/upload-artifact@v4 with: name: 'repo_name' @@ -320,14 +320,19 @@ jobs: steps: - name: 'Check E2E test results' run: | - if [[ ${{ needs.e2e_linux.result }} != 'success' || \ - ${{ needs.e2e_mac.result }} != 'success' || \ - ${{ needs.e2e_windows.result }} != 'success' || \ - ${{ needs.evals.result }} != 'success' ]]; then + if [[ ${NEEDS_E2E_LINUX_RESULT} != 'success' || \ + ${NEEDS_E2E_MAC_RESULT} != 'success' || \ + ${NEEDS_E2E_WINDOWS_RESULT} != 'success' || \ + ${NEEDS_EVALS_RESULT} != 'success' ]]; then echo "One or more E2E jobs failed." exit 1 fi echo "All required E2E jobs passed!" + env: + NEEDS_E2E_LINUX_RESULT: ${{ needs.e2e_linux.result }} + NEEDS_E2E_MAC_RESULT: ${{ needs.e2e_mac.result }} + NEEDS_E2E_WINDOWS_RESULT: ${{ needs.e2e_windows.result }} + NEEDS_EVALS_RESULT: ${{ needs.evals.result }} set_workflow_status: runs-on: 'gemini-cli-ubuntu-16-core' diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index dd7288cde5..ab79110574 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -464,14 +464,22 @@ jobs: steps: - name: 'Check all job results' run: | - if [[ (${{ needs.lint.result }} != 'success' && ${{ needs.lint.result }} != 'skipped') || \ - (${{ needs.link_checker.result }} != 'success' && ${{ needs.link_checker.result }} != 'skipped') || \ - (${{ needs.test_linux.result }} != 'success' && ${{ needs.test_linux.result }} != 'skipped') || \ - (${{ needs.test_mac.result }} != 'success' && ${{ needs.test_mac.result }} != 'skipped') || \ - (${{ needs.test_windows.result }} != 'success' && ${{ needs.test_windows.result }} != 'skipped') || \ - (${{ needs.codeql.result }} != 'success' && ${{ needs.codeql.result }} != 'skipped') || \ - (${{ needs.bundle_size.result }} != 'success' && ${{ needs.bundle_size.result }} != 'skipped') ]]; then + if [[ (${NEEDS_LINT_RESULT} != 'success' && ${NEEDS_LINT_RESULT} != 'skipped') || \ + (${NEEDS_LINK_CHECKER_RESULT} != 'success' && ${NEEDS_LINK_CHECKER_RESULT} != 'skipped') || \ + (${NEEDS_TEST_LINUX_RESULT} != 'success' && ${NEEDS_TEST_LINUX_RESULT} != 'skipped') || \ + (${NEEDS_TEST_MAC_RESULT} != 'success' && ${NEEDS_TEST_MAC_RESULT} != 'skipped') || \ + (${NEEDS_TEST_WINDOWS_RESULT} != 'success' && ${NEEDS_TEST_WINDOWS_RESULT} != 'skipped') || \ + (${NEEDS_CODEQL_RESULT} != 'success' && ${NEEDS_CODEQL_RESULT} != 'skipped') || \ + (${NEEDS_BUNDLE_SIZE_RESULT} != 'success' && ${NEEDS_BUNDLE_SIZE_RESULT} != 'skipped') ]]; then echo "One or more CI jobs failed." exit 1 fi echo "All CI jobs passed!" + env: + NEEDS_LINT_RESULT: ${{ needs.lint.result }} + NEEDS_LINK_CHECKER_RESULT: ${{ needs.link_checker.result }} + NEEDS_TEST_LINUX_RESULT: ${{ needs.test_linux.result }} + NEEDS_TEST_MAC_RESULT: ${{ needs.test_mac.result }} + NEEDS_TEST_WINDOWS_RESULT: ${{ needs.test_windows.result }} + NEEDS_CODEQL_RESULT: ${{ needs.codeql.result }} + NEEDS_BUNDLE_SIZE_RESULT: ${{ needs.bundle_size.result }} diff --git a/.github/workflows/deflake.yml b/.github/workflows/deflake.yml index c9f4c3d59f..a0eb51a7f4 100644 --- a/.github/workflows/deflake.yml +++ b/.github/workflows/deflake.yml @@ -68,10 +68,10 @@ jobs: VERBOSE: 'true' shell: 'bash' run: | - if [[ "${{ env.IS_DOCKER }}" == "true" ]]; then - npm run deflake:test:integration:sandbox:docker -- --runs="${{ env.RUNS }}" -- --testNamePattern "'${{ env.TEST_NAME_PATTERN }}'" + if [[ "${IS_DOCKER}" == "true" ]]; then + npm run deflake:test:integration:sandbox:docker -- --runs="${RUNS}" -- --testNamePattern "'${TEST_NAME_PATTERN}'" else - npm run deflake:test:integration:sandbox:none -- --runs="${{ env.RUNS }}" -- --testNamePattern "'${{ env.TEST_NAME_PATTERN }}'" + npm run deflake:test:integration:sandbox:none -- --runs="${RUNS}" -- --testNamePattern "'${TEST_NAME_PATTERN}'" fi deflake_e2e_mac: @@ -109,7 +109,7 @@ jobs: TEST_NAME_PATTERN: '${{ github.event.inputs.test_name_pattern }}' VERBOSE: 'true' run: | - npm run deflake:test:integration:sandbox:none -- --runs="${{ env.RUNS }}" -- --testNamePattern "'${{ env.TEST_NAME_PATTERN }}'" + npm run deflake:test:integration:sandbox:none -- --runs="${RUNS}" -- --testNamePattern "'${TEST_NAME_PATTERN}'" deflake_e2e_windows: name: 'Slow E2E - Win' @@ -167,4 +167,4 @@ jobs: TEST_NAME_PATTERN: '${{ github.event.inputs.test_name_pattern }}' shell: 'pwsh' run: | - npm run deflake:test:integration:sandbox:none -- --runs="${{ env.RUNS }}" -- --testNamePattern "'${{ env.TEST_NAME_PATTERN }}'" + npm run deflake:test:integration:sandbox:none -- --runs="$env:RUNS" -- --testNamePattern "'$env:TEST_NAME_PATTERN'" diff --git a/.github/workflows/eval.yml b/.github/workflows/eval.yml index d5616a3419..02aa38b3dd 100644 --- a/.github/workflows/eval.yml +++ b/.github/workflows/eval.yml @@ -44,5 +44,5 @@ jobs: - name: 'Run evaluation' working-directory: '/app' run: | - poetry run exp_run --experiment-mode=on-demand --branch-or-commit=${{ github.ref_name }} --model-name=gemini-2.5-pro --dataset=swebench_verified --concurrency=15 + poetry run exp_run --experiment-mode=on-demand --branch-or-commit=${GITHUB_REF_NAME} --model-name=gemini-2.5-pro --dataset=swebench_verified --concurrency=15 poetry run python agent_prototypes/scripts/parse_gcli_logs_experiment.py --experiment_dir=experiments/adhoc/gcli_temp_exp --gcs-bucket="${EVAL_GCS_BUCKET}" --gcs-path=gh_action_artifacts diff --git a/.github/workflows/evals-nightly.yml b/.github/workflows/evals-nightly.yml index 6f6767ebfe..1ed9448c03 100644 --- a/.github/workflows/evals-nightly.yml +++ b/.github/workflows/evals-nightly.yml @@ -62,7 +62,7 @@ jobs: TEST_NAME_PATTERN: '${{ github.event.inputs.test_name_pattern }}' run: | CMD="npm run test:all_evals" - PATTERN="${{ env.TEST_NAME_PATTERN }}" + PATTERN="${TEST_NAME_PATTERN}" if [[ -n "$PATTERN" ]]; then if [[ "$PATTERN" == *.ts || "$PATTERN" == *.js || "$PATTERN" == */* ]]; then diff --git a/.github/workflows/release-patch-3-release.yml b/.github/workflows/release-patch-3-release.yml index b0d459f256..26a3208e43 100644 --- a/.github/workflows/release-patch-3-release.yml +++ b/.github/workflows/release-patch-3-release.yml @@ -118,6 +118,7 @@ jobs: ORIGINAL_RELEASE_VERSION: '${{ steps.patch_version.outputs.RELEASE_VERSION }}' ORIGINAL_RELEASE_TAG: '${{ steps.patch_version.outputs.RELEASE_TAG }}' ORIGINAL_PREVIOUS_TAG: '${{ steps.patch_version.outputs.PREVIOUS_TAG }}' + VARS_CLI_PACKAGE_NAME: ${{vars.CLI_PACKAGE_NAME}} run: | echo "🔍 Verifying no concurrent patch releases have occurred..." @@ -129,7 +130,7 @@ jobs: # Re-run the same version calculation script echo "Re-calculating version to check for changes..." - CURRENT_PATCH_JSON=$(node scripts/get-release-version.js --cli-package-name="${{vars.CLI_PACKAGE_NAME}}" --type=patch --patch-from="${CHANNEL}") + CURRENT_PATCH_JSON=$(node scripts/get-release-version.js --cli-package-name="${VARS_CLI_PACKAGE_NAME}" --type=patch --patch-from="${CHANNEL}") CURRENT_RELEASE_VERSION=$(echo "${CURRENT_PATCH_JSON}" | jq -r .releaseVersion) CURRENT_RELEASE_TAG=$(echo "${CURRENT_PATCH_JSON}" | jq -r .releaseTag) CURRENT_PREVIOUS_TAG=$(echo "${CURRENT_PATCH_JSON}" | jq -r .previousReleaseTag) @@ -162,10 +163,15 @@ jobs: - name: 'Print Calculated Version' run: |- echo "Patch Release Summary:" - echo " Release Version: ${{ steps.patch_version.outputs.RELEASE_VERSION }}" - echo " Release Tag: ${{ steps.patch_version.outputs.RELEASE_TAG }}" - echo " NPM Tag: ${{ steps.patch_version.outputs.NPM_TAG }}" - echo " Previous Tag: ${{ steps.patch_version.outputs.PREVIOUS_TAG }}" + echo " Release Version: ${STEPS_PATCH_VERSION_OUTPUTS_RELEASE_VERSION}" + echo " Release Tag: ${STEPS_PATCH_VERSION_OUTPUTS_RELEASE_TAG}" + echo " NPM Tag: ${STEPS_PATCH_VERSION_OUTPUTS_NPM_TAG}" + echo " Previous Tag: ${STEPS_PATCH_VERSION_OUTPUTS_PREVIOUS_TAG}" + env: + STEPS_PATCH_VERSION_OUTPUTS_RELEASE_VERSION: ${{ steps.patch_version.outputs.RELEASE_VERSION }} + STEPS_PATCH_VERSION_OUTPUTS_RELEASE_TAG: ${{ steps.patch_version.outputs.RELEASE_TAG }} + STEPS_PATCH_VERSION_OUTPUTS_NPM_TAG: ${{ steps.patch_version.outputs.NPM_TAG }} + STEPS_PATCH_VERSION_OUTPUTS_PREVIOUS_TAG: ${{ steps.patch_version.outputs.PREVIOUS_TAG }} - name: 'Run Tests' if: "${{github.event.inputs.force_skip_tests != 'true'}}" diff --git a/.github/workflows/release-promote.yml b/.github/workflows/release-promote.yml index ebe16b1a39..100fc54462 100644 --- a/.github/workflows/release-promote.yml +++ b/.github/workflows/release-promote.yml @@ -362,23 +362,28 @@ jobs: - name: 'Create and switch to a new branch' id: 'release_branch' run: | - BRANCH_NAME="chore/nightly-version-bump-${{ needs.calculate-versions.outputs.NEXT_NIGHTLY_VERSION }}" + BRANCH_NAME="chore/nightly-version-bump-${NEEDS_CALCULATE_VERSIONS_OUTPUTS_NEXT_NIGHTLY_VERSION}" git switch -c "${BRANCH_NAME}" echo "BRANCH_NAME=${BRANCH_NAME}" >> "${GITHUB_OUTPUT}" + env: + NEEDS_CALCULATE_VERSIONS_OUTPUTS_NEXT_NIGHTLY_VERSION: ${{ needs.calculate-versions.outputs.NEXT_NIGHTLY_VERSION }} - name: 'Update package versions' - run: 'npm run release:version "${{ needs.calculate-versions.outputs.NEXT_NIGHTLY_VERSION }}"' + run: 'npm run release:version "${NEEDS_CALCULATE_VERSIONS_OUTPUTS_NEXT_NIGHTLY_VERSION}"' + env: + NEEDS_CALCULATE_VERSIONS_OUTPUTS_NEXT_NIGHTLY_VERSION: ${{ needs.calculate-versions.outputs.NEXT_NIGHTLY_VERSION }} - name: 'Commit and Push package versions' env: BRANCH_NAME: '${{ steps.release_branch.outputs.BRANCH_NAME }}' DRY_RUN: '${{ github.event.inputs.dry_run }}' + NEEDS_CALCULATE_VERSIONS_OUTPUTS_NEXT_NIGHTLY_VERSION: ${{ needs.calculate-versions.outputs.NEXT_NIGHTLY_VERSION }} run: |- git add package.json packages/*/package.json if [ -f package-lock.json ]; then git add package-lock.json fi - git commit -m "chore(release): bump version to ${{ needs.calculate-versions.outputs.NEXT_NIGHTLY_VERSION }}" + git commit -m "chore(release): bump version to ${NEEDS_CALCULATE_VERSIONS_OUTPUTS_NEXT_NIGHTLY_VERSION}" if [[ "${DRY_RUN}" == "false" ]]; then echo "Pushing release branch to remote..." git push --set-upstream origin "${BRANCH_NAME}" diff --git a/.github/workflows/trigger_e2e.yml b/.github/workflows/trigger_e2e.yml index 52b3a26f6f..d83dc1d7cf 100644 --- a/.github/workflows/trigger_e2e.yml +++ b/.github/workflows/trigger_e2e.yml @@ -23,8 +23,8 @@ jobs: HEAD_SHA: '${{ github.event.inputs.head_sha || github.event.pull_request.head.sha }}' run: | mkdir -p ./pr - echo '${{ env.REPO_NAME }}' > ./pr/repo_name - echo '${{ env.HEAD_SHA }}' > ./pr/head_sha + echo '${REPO_NAME}' > ./pr/repo_name + echo '${HEAD_SHA}' > ./pr/head_sha - uses: 'actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02' # ratchet:actions/upload-artifact@v4 with: name: 'repo_name' From 020da58327a7c06c845f7913093eddb069d169bc Mon Sep 17 00:00:00 2001 From: Jerop Kipruto Date: Thu, 26 Feb 2026 14:31:31 -0500 Subject: [PATCH 04/41] fix(github): resolve actionlint and yamllint regressions from #19443 (#20467) --- .../actions/create-pull-request/action.yml | 8 ++--- .github/actions/npm-auth-token/action.yml | 10 +++--- .github/actions/publish-release/action.yml | 32 +++++++++---------- .github/actions/push-docker/action.yml | 2 +- .github/actions/push-sandbox/action.yml | 8 ++--- .github/actions/setup-npmrc/action.yml | 2 +- .github/actions/tag-npm-release/action.yml | 28 ++++++++-------- .github/actions/verify-release/action.yml | 8 ++--- .github/workflows/chained_e2e.yml | 8 ++--- .github/workflows/ci.yml | 14 ++++---- .github/workflows/eval.yml | 2 +- .github/workflows/release-patch-3-release.yml | 10 +++--- .github/workflows/release-promote.yml | 6 ++-- 13 files changed, 69 insertions(+), 69 deletions(-) diff --git a/.github/actions/create-pull-request/action.yml b/.github/actions/create-pull-request/action.yml index fbbc910bc5..fa38bd58ab 100644 --- a/.github/actions/create-pull-request/action.yml +++ b/.github/actions/create-pull-request/action.yml @@ -39,10 +39,10 @@ runs: if: "inputs.dry-run != 'true'" env: GH_TOKEN: '${{ inputs.github-token }}' - INPUTS_BRANCH_NAME: ${{ inputs.branch-name }} - INPUTS_PR_TITLE: ${{ inputs.pr-title }} - INPUTS_PR_BODY: ${{ inputs.pr-body }} - INPUTS_BASE_BRANCH: ${{ inputs.base-branch }} + INPUTS_BRANCH_NAME: '${{ inputs.branch-name }}' + INPUTS_PR_TITLE: '${{ inputs.pr-title }}' + INPUTS_PR_BODY: '${{ inputs.pr-body }}' + INPUTS_BASE_BRANCH: '${{ inputs.base-branch }}' shell: 'bash' working-directory: '${{ inputs.working-directory }}' run: | diff --git a/.github/actions/npm-auth-token/action.yml b/.github/actions/npm-auth-token/action.yml index 1c19da5971..f9fe4bd894 100644 --- a/.github/actions/npm-auth-token/action.yml +++ b/.github/actions/npm-auth-token/action.yml @@ -44,8 +44,8 @@ runs: fi echo "auth-token=$AUTH_TOKEN" >> $GITHUB_OUTPUT env: - INPUTS_GITHUB_TOKEN: ${{ inputs.github-token }} - INPUTS_PACKAGE_NAME: ${{ inputs.package-name }} - INPUTS_WOMBAT_TOKEN_CLI: ${{ inputs.wombat-token-cli }} - INPUTS_WOMBAT_TOKEN_CORE: ${{ inputs.wombat-token-core }} - INPUTS_WOMBAT_TOKEN_A2A_SERVER: ${{ inputs.wombat-token-a2a-server }} + INPUTS_GITHUB_TOKEN: '${{ inputs.github-token }}' + INPUTS_PACKAGE_NAME: '${{ inputs.package-name }}' + INPUTS_WOMBAT_TOKEN_CLI: '${{ inputs.wombat-token-cli }}' + INPUTS_WOMBAT_TOKEN_CORE: '${{ inputs.wombat-token-core }}' + INPUTS_WOMBAT_TOKEN_A2A_SERVER: '${{ inputs.wombat-token-a2a-server }}' diff --git a/.github/actions/publish-release/action.yml b/.github/actions/publish-release/action.yml index 35e65493c9..8f062205cb 100644 --- a/.github/actions/publish-release/action.yml +++ b/.github/actions/publish-release/action.yml @@ -97,7 +97,7 @@ runs: git switch -c "${BRANCH_NAME}" echo "BRANCH_NAME=${BRANCH_NAME}" >> "${GITHUB_OUTPUT}" env: - INPUTS_RELEASE_TAG: ${{ inputs.release-tag }} + INPUTS_RELEASE_TAG: '${{ inputs.release-tag }}' - name: '⬆️ Update package versions' working-directory: '${{ inputs.working-directory }}' @@ -105,7 +105,7 @@ runs: run: | npm run release:version "${INPUTS_RELEASE_VERSION}" env: - INPUTS_RELEASE_VERSION: ${{ inputs.release-version }} + INPUTS_RELEASE_VERSION: '${{ inputs.release-version }}' - name: '💾 Commit and Conditionally Push package versions' working-directory: '${{ inputs.working-directory }}' @@ -167,8 +167,8 @@ runs: working-directory: '${{ inputs.working-directory }}' env: NODE_AUTH_TOKEN: '${{ steps.core-token.outputs.auth-token }}' - INPUTS_DRY_RUN: ${{ inputs.dry-run }} - INPUTS_CORE_PACKAGE_NAME: ${{ inputs.core-package-name }} + INPUTS_DRY_RUN: '${{ inputs.dry-run }}' + INPUTS_CORE_PACKAGE_NAME: '${{ inputs.core-package-name }}' shell: 'bash' run: | npm publish \ @@ -187,10 +187,10 @@ runs: --workspace="${INPUTS_A2A_PACKAGE_NAME}" \ --save-exact env: - INPUTS_CORE_PACKAGE_NAME: ${{ inputs.core-package-name }} - INPUTS_RELEASE_VERSION: ${{ inputs.release-version }} - INPUTS_CLI_PACKAGE_NAME: ${{ inputs.cli-package-name }} - INPUTS_A2A_PACKAGE_NAME: ${{ inputs.a2a-package-name }} + INPUTS_CORE_PACKAGE_NAME: '${{ inputs.core-package-name }}' + INPUTS_RELEASE_VERSION: '${{ inputs.release-version }}' + INPUTS_CLI_PACKAGE_NAME: '${{ inputs.cli-package-name }}' + INPUTS_A2A_PACKAGE_NAME: '${{ inputs.a2a-package-name }}' - name: 'Get CLI Token' uses: './.github/actions/npm-auth-token' @@ -206,8 +206,8 @@ runs: working-directory: '${{ inputs.working-directory }}' env: NODE_AUTH_TOKEN: '${{ steps.cli-token.outputs.auth-token }}' - INPUTS_DRY_RUN: ${{ inputs.dry-run }} - INPUTS_CLI_PACKAGE_NAME: ${{ inputs.cli-package-name }} + INPUTS_DRY_RUN: '${{ inputs.dry-run }}' + INPUTS_CLI_PACKAGE_NAME: '${{ inputs.cli-package-name }}' shell: 'bash' run: | npm publish \ @@ -230,8 +230,8 @@ runs: working-directory: '${{ inputs.working-directory }}' env: NODE_AUTH_TOKEN: '${{ steps.a2a-token.outputs.auth-token }}' - INPUTS_DRY_RUN: ${{ inputs.dry-run }} - INPUTS_A2A_PACKAGE_NAME: ${{ inputs.a2a-package-name }} + INPUTS_DRY_RUN: '${{ inputs.dry-run }}' + INPUTS_A2A_PACKAGE_NAME: '${{ inputs.a2a-package-name }}' shell: 'bash' # Tag staging for initial release run: | @@ -273,9 +273,9 @@ runs: if: "${{ inputs.dry-run != 'true' && inputs.skip-github-release != 'true' && inputs.npm-tag != 'dev' && inputs.npm-registry-url != 'https://npm.pkg.github.com/' }}" env: GITHUB_TOKEN: '${{ inputs.github-release-token || inputs.github-token }}' - INPUTS_RELEASE_TAG: ${{ inputs.release-tag }} - STEPS_RELEASE_BRANCH_OUTPUTS_BRANCH_NAME: ${{ steps.release_branch.outputs.BRANCH_NAME }} - INPUTS_PREVIOUS_TAG: ${{ inputs.previous-tag }} + INPUTS_RELEASE_TAG: '${{ inputs.release-tag }}' + STEPS_RELEASE_BRANCH_OUTPUTS_BRANCH_NAME: '${{ steps.release_branch.outputs.BRANCH_NAME }}' + INPUTS_PREVIOUS_TAG: '${{ inputs.previous-tag }}' shell: 'bash' run: | gh release create "${INPUTS_RELEASE_TAG}" \ @@ -296,4 +296,4 @@ runs: git push origin --delete "${STEPS_RELEASE_BRANCH_OUTPUTS_BRANCH_NAME}" env: - STEPS_RELEASE_BRANCH_OUTPUTS_BRANCH_NAME: ${{ steps.release_branch.outputs.BRANCH_NAME }} + STEPS_RELEASE_BRANCH_OUTPUTS_BRANCH_NAME: '${{ steps.release_branch.outputs.BRANCH_NAME }}' diff --git a/.github/actions/push-docker/action.yml b/.github/actions/push-docker/action.yml index 67f03b3cca..e660733428 100644 --- a/.github/actions/push-docker/action.yml +++ b/.github/actions/push-docker/action.yml @@ -55,7 +55,7 @@ runs: REF_NAME="${INPUTS_REF_NAME}" echo "name=${REF_NAME%/merge}" >> $GITHUB_OUTPUT env: - INPUTS_REF_NAME: ${{ inputs.ref-name }} + INPUTS_REF_NAME: '${{ inputs.ref-name }}' - name: 'Build and Push the Docker Image' uses: 'docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83' # ratchet:docker/build-push-action@v6 with: diff --git a/.github/actions/push-sandbox/action.yml b/.github/actions/push-sandbox/action.yml index 22edf5d838..e2d1ac942c 100644 --- a/.github/actions/push-sandbox/action.yml +++ b/.github/actions/push-sandbox/action.yml @@ -67,15 +67,15 @@ runs: echo "Determined image tag: $FINAL_TAG" echo "FINAL_TAG=$FINAL_TAG" >> $GITHUB_OUTPUT env: - INPUTS_GITHUB_REF_NAME: ${{ inputs.github-ref-name }} - INPUTS_GITHUB_SHA: ${{ inputs.github-sha }} + INPUTS_GITHUB_REF_NAME: '${{ inputs.github-ref-name }}' + INPUTS_GITHUB_SHA: '${{ inputs.github-sha }}' - name: 'build' id: 'docker_build' shell: 'bash' env: GEMINI_SANDBOX_IMAGE_TAG: '${{ steps.image_tag.outputs.FINAL_TAG }}' GEMINI_SANDBOX: 'docker' - STEPS_IMAGE_TAG_OUTPUTS_FINAL_TAG: ${{ steps.image_tag.outputs.FINAL_TAG }} + STEPS_IMAGE_TAG_OUTPUTS_FINAL_TAG: '${{ steps.image_tag.outputs.FINAL_TAG }}' run: |- npm run build:sandbox -- \ --image google/gemini-cli-sandbox:${STEPS_IMAGE_TAG_OUTPUTS_FINAL_TAG} \ @@ -95,7 +95,7 @@ runs: run: |- docker push "${STEPS_DOCKER_BUILD_OUTPUTS_URI}" env: - STEPS_DOCKER_BUILD_OUTPUTS_URI: ${{ steps.docker_build.outputs.uri }} + STEPS_DOCKER_BUILD_OUTPUTS_URI: '${{ steps.docker_build.outputs.uri }}' - name: 'Create issue on failure' if: |- ${{ failure() }} diff --git a/.github/actions/setup-npmrc/action.yml b/.github/actions/setup-npmrc/action.yml index 5dddfe48f3..137451740f 100644 --- a/.github/actions/setup-npmrc/action.yml +++ b/.github/actions/setup-npmrc/action.yml @@ -21,4 +21,4 @@ runs: echo ""//npm.pkg.github.com/:_authToken=${INPUTS_GITHUB_TOKEN}"" >> ~/.npmrc echo ""@google:registry=https://wombat-dressing-room.appspot.com"" >> ~/.npmrc env: - INPUTS_GITHUB_TOKEN: ${{ inputs.github-token }} + INPUTS_GITHUB_TOKEN: '${{ inputs.github-token }}' diff --git a/.github/actions/tag-npm-release/action.yml b/.github/actions/tag-npm-release/action.yml index 8088035598..085cf15e99 100644 --- a/.github/actions/tag-npm-release/action.yml +++ b/.github/actions/tag-npm-release/action.yml @@ -71,9 +71,9 @@ runs: ${{ inputs.dry-run != 'true' }} env: NODE_AUTH_TOKEN: '${{ steps.core-token.outputs.auth-token }}' - INPUTS_CORE_PACKAGE_NAME: ${{ inputs.core-package-name }} - INPUTS_VERSION: ${{ inputs.version }} - INPUTS_CHANNEL: ${{ inputs.channel }} + INPUTS_CORE_PACKAGE_NAME: '${{ inputs.core-package-name }}' + INPUTS_VERSION: '${{ inputs.version }}' + INPUTS_CHANNEL: '${{ inputs.channel }}' shell: 'bash' working-directory: '${{ inputs.working-directory }}' run: | @@ -94,9 +94,9 @@ runs: ${{ inputs.dry-run != 'true' }} env: NODE_AUTH_TOKEN: '${{ steps.cli-token.outputs.auth-token }}' - INPUTS_CLI_PACKAGE_NAME: ${{ inputs.cli-package-name }} - INPUTS_VERSION: ${{ inputs.version }} - INPUTS_CHANNEL: ${{ inputs.channel }} + INPUTS_CLI_PACKAGE_NAME: '${{ inputs.cli-package-name }}' + INPUTS_VERSION: '${{ inputs.version }}' + INPUTS_CHANNEL: '${{ inputs.channel }}' shell: 'bash' working-directory: '${{ inputs.working-directory }}' run: | @@ -117,9 +117,9 @@ runs: ${{ inputs.dry-run == 'false' }} env: NODE_AUTH_TOKEN: '${{ steps.a2a-token.outputs.auth-token }}' - INPUTS_A2A_PACKAGE_NAME: ${{ inputs.a2a-package-name }} - INPUTS_VERSION: ${{ inputs.version }} - INPUTS_CHANNEL: ${{ inputs.channel }} + INPUTS_A2A_PACKAGE_NAME: '${{ inputs.a2a-package-name }}' + INPUTS_VERSION: '${{ inputs.version }}' + INPUTS_CHANNEL: '${{ inputs.channel }}' shell: 'bash' working-directory: '${{ inputs.working-directory }}' run: | @@ -134,12 +134,12 @@ runs: echo "Dry run: Would have added tag '${INPUTS_CHANNEL}' to version '${INPUTS_VERSION}' for ${INPUTS_CLI_PACKAGE_NAME}, ${INPUTS_CORE_PACKAGE_NAME}, and ${INPUTS_A2A_PACKAGE_NAME}." env: - INPUTS_CHANNEL: ${{ inputs.channel }} + INPUTS_CHANNEL: '${{ inputs.channel }}' - INPUTS_VERSION: ${{ inputs.version }} + INPUTS_VERSION: '${{ inputs.version }}' - INPUTS_CLI_PACKAGE_NAME: ${{ inputs.cli-package-name }} + INPUTS_CLI_PACKAGE_NAME: '${{ inputs.cli-package-name }}' - INPUTS_CORE_PACKAGE_NAME: ${{ inputs.core-package-name }} + INPUTS_CORE_PACKAGE_NAME: '${{ inputs.core-package-name }}' - INPUTS_A2A_PACKAGE_NAME: ${{ inputs.a2a-package-name }} + INPUTS_A2A_PACKAGE_NAME: '${{ inputs.a2a-package-name }}' diff --git a/.github/actions/verify-release/action.yml b/.github/actions/verify-release/action.yml index 349e6a19d5..261715c1b9 100644 --- a/.github/actions/verify-release/action.yml +++ b/.github/actions/verify-release/action.yml @@ -69,8 +69,8 @@ runs: exit 1 fi env: - INPUTS_EXPECTED_VERSION: ${{ inputs.expected-version }} - INPUTS_NPM_PACKAGE: ${{ inputs.npm-package }} + INPUTS_EXPECTED_VERSION: '${{ inputs.expected-version }}' + INPUTS_NPM_PACKAGE: '${{ inputs.npm-package }}' - name: 'Clear npm cache' shell: 'bash' @@ -86,8 +86,8 @@ runs: exit 1 fi env: - INPUTS_NPM_PACKAGE: ${{ inputs.npm-package }} - INPUTS_EXPECTED_VERSION: ${{ inputs.expected-version }} + INPUTS_NPM_PACKAGE: '${{ inputs.npm-package }}' + INPUTS_EXPECTED_VERSION: '${{ inputs.expected-version }}' - name: 'Install dependencies for integration tests' shell: 'bash' diff --git a/.github/workflows/chained_e2e.yml b/.github/workflows/chained_e2e.yml index d4d8780cb3..f1023c3ac8 100644 --- a/.github/workflows/chained_e2e.yml +++ b/.github/workflows/chained_e2e.yml @@ -329,10 +329,10 @@ jobs: fi echo "All required E2E jobs passed!" env: - NEEDS_E2E_LINUX_RESULT: ${{ needs.e2e_linux.result }} - NEEDS_E2E_MAC_RESULT: ${{ needs.e2e_mac.result }} - NEEDS_E2E_WINDOWS_RESULT: ${{ needs.e2e_windows.result }} - NEEDS_EVALS_RESULT: ${{ needs.evals.result }} + NEEDS_E2E_LINUX_RESULT: '${{ needs.e2e_linux.result }}' + NEEDS_E2E_MAC_RESULT: '${{ needs.e2e_mac.result }}' + NEEDS_E2E_WINDOWS_RESULT: '${{ needs.e2e_windows.result }}' + NEEDS_EVALS_RESULT: '${{ needs.evals.result }}' set_workflow_status: runs-on: 'gemini-cli-ubuntu-16-core' diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index ab79110574..999eb778c4 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -476,10 +476,10 @@ jobs: fi echo "All CI jobs passed!" env: - NEEDS_LINT_RESULT: ${{ needs.lint.result }} - NEEDS_LINK_CHECKER_RESULT: ${{ needs.link_checker.result }} - NEEDS_TEST_LINUX_RESULT: ${{ needs.test_linux.result }} - NEEDS_TEST_MAC_RESULT: ${{ needs.test_mac.result }} - NEEDS_TEST_WINDOWS_RESULT: ${{ needs.test_windows.result }} - NEEDS_CODEQL_RESULT: ${{ needs.codeql.result }} - NEEDS_BUNDLE_SIZE_RESULT: ${{ needs.bundle_size.result }} + NEEDS_LINT_RESULT: '${{ needs.lint.result }}' + NEEDS_LINK_CHECKER_RESULT: '${{ needs.link_checker.result }}' + NEEDS_TEST_LINUX_RESULT: '${{ needs.test_linux.result }}' + NEEDS_TEST_MAC_RESULT: '${{ needs.test_mac.result }}' + NEEDS_TEST_WINDOWS_RESULT: '${{ needs.test_windows.result }}' + NEEDS_CODEQL_RESULT: '${{ needs.codeql.result }}' + NEEDS_BUNDLE_SIZE_RESULT: '${{ needs.bundle_size.result }}' diff --git a/.github/workflows/eval.yml b/.github/workflows/eval.yml index 02aa38b3dd..23dc1cfdfb 100644 --- a/.github/workflows/eval.yml +++ b/.github/workflows/eval.yml @@ -44,5 +44,5 @@ jobs: - name: 'Run evaluation' working-directory: '/app' run: | - poetry run exp_run --experiment-mode=on-demand --branch-or-commit=${GITHUB_REF_NAME} --model-name=gemini-2.5-pro --dataset=swebench_verified --concurrency=15 + poetry run exp_run --experiment-mode=on-demand --branch-or-commit="${GITHUB_REF_NAME}" --model-name=gemini-2.5-pro --dataset=swebench_verified --concurrency=15 poetry run python agent_prototypes/scripts/parse_gcli_logs_experiment.py --experiment_dir=experiments/adhoc/gcli_temp_exp --gcs-bucket="${EVAL_GCS_BUCKET}" --gcs-path=gh_action_artifacts diff --git a/.github/workflows/release-patch-3-release.yml b/.github/workflows/release-patch-3-release.yml index 26a3208e43..6680362a16 100644 --- a/.github/workflows/release-patch-3-release.yml +++ b/.github/workflows/release-patch-3-release.yml @@ -118,7 +118,7 @@ jobs: ORIGINAL_RELEASE_VERSION: '${{ steps.patch_version.outputs.RELEASE_VERSION }}' ORIGINAL_RELEASE_TAG: '${{ steps.patch_version.outputs.RELEASE_TAG }}' ORIGINAL_PREVIOUS_TAG: '${{ steps.patch_version.outputs.PREVIOUS_TAG }}' - VARS_CLI_PACKAGE_NAME: ${{vars.CLI_PACKAGE_NAME}} + VARS_CLI_PACKAGE_NAME: '${{ vars.CLI_PACKAGE_NAME }}' run: | echo "🔍 Verifying no concurrent patch releases have occurred..." @@ -168,10 +168,10 @@ jobs: echo " NPM Tag: ${STEPS_PATCH_VERSION_OUTPUTS_NPM_TAG}" echo " Previous Tag: ${STEPS_PATCH_VERSION_OUTPUTS_PREVIOUS_TAG}" env: - STEPS_PATCH_VERSION_OUTPUTS_RELEASE_VERSION: ${{ steps.patch_version.outputs.RELEASE_VERSION }} - STEPS_PATCH_VERSION_OUTPUTS_RELEASE_TAG: ${{ steps.patch_version.outputs.RELEASE_TAG }} - STEPS_PATCH_VERSION_OUTPUTS_NPM_TAG: ${{ steps.patch_version.outputs.NPM_TAG }} - STEPS_PATCH_VERSION_OUTPUTS_PREVIOUS_TAG: ${{ steps.patch_version.outputs.PREVIOUS_TAG }} + STEPS_PATCH_VERSION_OUTPUTS_RELEASE_VERSION: '${{ steps.patch_version.outputs.RELEASE_VERSION }}' + STEPS_PATCH_VERSION_OUTPUTS_RELEASE_TAG: '${{ steps.patch_version.outputs.RELEASE_TAG }}' + STEPS_PATCH_VERSION_OUTPUTS_NPM_TAG: '${{ steps.patch_version.outputs.NPM_TAG }}' + STEPS_PATCH_VERSION_OUTPUTS_PREVIOUS_TAG: '${{ steps.patch_version.outputs.PREVIOUS_TAG }}' - name: 'Run Tests' if: "${{github.event.inputs.force_skip_tests != 'true'}}" diff --git a/.github/workflows/release-promote.yml b/.github/workflows/release-promote.yml index 100fc54462..d5c16b94fe 100644 --- a/.github/workflows/release-promote.yml +++ b/.github/workflows/release-promote.yml @@ -366,18 +366,18 @@ jobs: git switch -c "${BRANCH_NAME}" echo "BRANCH_NAME=${BRANCH_NAME}" >> "${GITHUB_OUTPUT}" env: - NEEDS_CALCULATE_VERSIONS_OUTPUTS_NEXT_NIGHTLY_VERSION: ${{ needs.calculate-versions.outputs.NEXT_NIGHTLY_VERSION }} + NEEDS_CALCULATE_VERSIONS_OUTPUTS_NEXT_NIGHTLY_VERSION: '${{ needs.calculate-versions.outputs.NEXT_NIGHTLY_VERSION }}' - name: 'Update package versions' run: 'npm run release:version "${NEEDS_CALCULATE_VERSIONS_OUTPUTS_NEXT_NIGHTLY_VERSION}"' env: - NEEDS_CALCULATE_VERSIONS_OUTPUTS_NEXT_NIGHTLY_VERSION: ${{ needs.calculate-versions.outputs.NEXT_NIGHTLY_VERSION }} + NEEDS_CALCULATE_VERSIONS_OUTPUTS_NEXT_NIGHTLY_VERSION: '${{ needs.calculate-versions.outputs.NEXT_NIGHTLY_VERSION }}' - name: 'Commit and Push package versions' env: BRANCH_NAME: '${{ steps.release_branch.outputs.BRANCH_NAME }}' DRY_RUN: '${{ github.event.inputs.dry_run }}' - NEEDS_CALCULATE_VERSIONS_OUTPUTS_NEXT_NIGHTLY_VERSION: ${{ needs.calculate-versions.outputs.NEXT_NIGHTLY_VERSION }} + NEEDS_CALCULATE_VERSIONS_OUTPUTS_NEXT_NIGHTLY_VERSION: '${{ needs.calculate-versions.outputs.NEXT_NIGHTLY_VERSION }}' run: |- git add package.json packages/*/package.json if [ -f package-lock.json ]; then From 9de8349cf08f536838b53dadbdbf13cd5252fa59 Mon Sep 17 00:00:00 2001 From: Gal Zahavi <38544478+galz10@users.noreply.github.com> Date: Thu, 26 Feb 2026 14:06:09 -0800 Subject: [PATCH 05/41] fix: action var usage (#20492) --- .github/workflows/chained_e2e.yml | 2 +- .github/workflows/release-rollback.yml | 2 +- .github/workflows/trigger_e2e.yml | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/chained_e2e.yml b/.github/workflows/chained_e2e.yml index f1023c3ac8..05b1fb0f1d 100644 --- a/.github/workflows/chained_e2e.yml +++ b/.github/workflows/chained_e2e.yml @@ -53,7 +53,7 @@ jobs: REPO_NAME: '${{ github.event.inputs.repo_name }}' run: | mkdir -p ./pr - echo '${REPO_NAME}' > ./pr/repo_name + echo "${REPO_NAME}" > ./pr/repo_name - uses: 'actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02' # ratchet:actions/upload-artifact@v4 with: name: 'repo_name' diff --git a/.github/workflows/release-rollback.yml b/.github/workflows/release-rollback.yml index 75c2d0c799..8840b65721 100644 --- a/.github/workflows/release-rollback.yml +++ b/.github/workflows/release-rollback.yml @@ -203,7 +203,7 @@ jobs: run: | ROLLBACK_COMMIT=$(git rev-parse -q --verify "$TARGET_TAG") if [ "$ROLLBACK_COMMIT" != "$TARGET_HASH" ]; then - echo '❌ Failed to add tag $TARGET_TAG to commit $TARGET_HASH' + echo "❌ Failed to add tag ${TARGET_TAG} to commit ${TARGET_HASH}" echo '❌ This means the tag was not added, and the workflow should fail.' exit 1 fi diff --git a/.github/workflows/trigger_e2e.yml b/.github/workflows/trigger_e2e.yml index d83dc1d7cf..babe08e4e3 100644 --- a/.github/workflows/trigger_e2e.yml +++ b/.github/workflows/trigger_e2e.yml @@ -23,8 +23,8 @@ jobs: HEAD_SHA: '${{ github.event.inputs.head_sha || github.event.pull_request.head.sha }}' run: | mkdir -p ./pr - echo '${REPO_NAME}' > ./pr/repo_name - echo '${HEAD_SHA}' > ./pr/head_sha + echo "${REPO_NAME}" > ./pr/repo_name + echo "${HEAD_SHA}" > ./pr/head_sha - uses: 'actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02' # ratchet:actions/upload-artifact@v4 with: name: 'repo_name' From 10c5bd8ce9de6bb18e14dbdfd05334b2ebb5341e Mon Sep 17 00:00:00 2001 From: Adam Weidman <65992621+adamfweidman@users.noreply.github.com> Date: Thu, 26 Feb 2026 17:38:30 -0500 Subject: [PATCH 06/41] feat(core): improve A2A content extraction (#20487) Co-authored-by: Gal Zahavi <38544478+galz10@users.noreply.github.com> --- packages/core/src/agents/a2aUtils.test.ts | 63 +++++++++++++++++++++++ packages/core/src/agents/a2aUtils.ts | 23 ++++++++- 2 files changed, 84 insertions(+), 2 deletions(-) diff --git a/packages/core/src/agents/a2aUtils.test.ts b/packages/core/src/agents/a2aUtils.test.ts index 711650ea80..f0ea746025 100644 --- a/packages/core/src/agents/a2aUtils.test.ts +++ b/packages/core/src/agents/a2aUtils.test.ts @@ -284,5 +284,68 @@ describe('a2aUtils', () => { 'Analyzing...\n\nProcessing...\n\nArtifact (Code):\nprint("Done")', ); }); + + it('should fallback to history in a task chunk if no message or artifacts exist and task is terminal', () => { + const reassembler = new A2AResultReassembler(); + + reassembler.update({ + kind: 'task', + status: { state: 'completed' }, + history: [ + { + kind: 'message', + role: 'agent', + parts: [{ kind: 'text', text: 'Answer from history' }], + } as Message, + ], + } as unknown as SendMessageResult); + + expect(reassembler.toString()).toBe('Answer from history'); + }); + + it('should NOT fallback to history in a task chunk if task is not terminal', () => { + const reassembler = new A2AResultReassembler(); + + reassembler.update({ + kind: 'task', + status: { state: 'working' }, + history: [ + { + kind: 'message', + role: 'agent', + parts: [{ kind: 'text', text: 'Answer from history' }], + } as Message, + ], + } as unknown as SendMessageResult); + + expect(reassembler.toString()).toBe(''); + }); + + it('should not fallback to history if artifacts exist', () => { + const reassembler = new A2AResultReassembler(); + + reassembler.update({ + kind: 'task', + status: { state: 'completed' }, + artifacts: [ + { + artifactId: 'art-1', + name: 'Data', + parts: [{ kind: 'text', text: 'Artifact Content' }], + }, + ], + history: [ + { + kind: 'message', + role: 'agent', + parts: [{ kind: 'text', text: 'Answer from history' }], + } as Message, + ], + } as unknown as SendMessageResult); + + const output = reassembler.toString(); + expect(output).toContain('Artifact (Data):'); + expect(output).not.toContain('Answer from history'); + }); }); }); diff --git a/packages/core/src/agents/a2aUtils.ts b/packages/core/src/agents/a2aUtils.ts index e753d047d0..52817f4971 100644 --- a/packages/core/src/agents/a2aUtils.ts +++ b/packages/core/src/agents/a2aUtils.ts @@ -74,6 +74,26 @@ export class A2AResultReassembler { ]); } } + // History Fallback: Some agent implementations do not populate the + // status.message in their final terminal response, instead archiving + // the final answer in the task's history array. To ensure we don't + // present an empty result, we fallback to the most recent agent message + // in the history only when the task is terminal and no other content + // (message log or artifacts) has been reassembled. + if ( + isTerminalState(chunk.status?.state) && + this.messageLog.length === 0 && + this.artifacts.size === 0 && + chunk.history && + chunk.history.length > 0 + ) { + const lastAgentMsg = [...chunk.history] + .reverse() + .find((m) => m.role?.toLowerCase().includes('agent')); + if (lastAgentMsg) { + this.pushMessage(lastAgentMsg); + } + } break; case 'message': { @@ -126,7 +146,7 @@ export class A2AResultReassembler { * Handles Text, Data (JSON), and File parts. */ export function extractMessageText(message: Message | undefined): string { - if (!message) { + if (!message || !message.parts || !Array.isArray(message.parts)) { return ''; } @@ -158,7 +178,6 @@ function extractPartText(part: Part): string { } if (isDataPart(part)) { - // Attempt to format known data types if metadata exists, otherwise JSON stringify return `Data: ${JSON.stringify(part.data)}`; } From edb1fdea30a43f9497c9ce30ad37683dbb8b3a19 Mon Sep 17 00:00:00 2001 From: Sehoon Shon Date: Thu, 26 Feb 2026 17:39:25 -0500 Subject: [PATCH 07/41] fix(cli): support quota error fallbacks for all authentication types (#20475) Co-authored-by: Gal Zahavi <38544478+galz10@users.noreply.github.com> --- .../cli/src/ui/components/DialogManager.tsx | 1 + .../src/ui/components/ProQuotaDialog.test.tsx | 51 +++++++++++++++---- .../cli/src/ui/components/ProQuotaDialog.tsx | 17 +++++-- .../cli/src/ui/contexts/UIStateContext.tsx | 2 + .../src/ui/hooks/useQuotaAndFallback.test.ts | 36 ++++++++----- .../cli/src/ui/hooks/useQuotaAndFallback.ts | 12 ++--- packages/core/src/core/contentGenerator.ts | 6 +++ 7 files changed, 89 insertions(+), 36 deletions(-) diff --git a/packages/cli/src/ui/components/DialogManager.tsx b/packages/cli/src/ui/components/DialogManager.tsx index 3d56c68e5b..c90194052a 100644 --- a/packages/cli/src/ui/components/DialogManager.tsx +++ b/packages/cli/src/ui/components/DialogManager.tsx @@ -135,6 +135,7 @@ export const DialogManager = ({ isModelNotFoundError={ !!uiState.quota.proQuotaRequest.isModelNotFoundError } + authType={uiState.quota.proQuotaRequest.authType} onChoice={uiActions.handleProQuotaChoice} /> ); diff --git a/packages/cli/src/ui/components/ProQuotaDialog.test.tsx b/packages/cli/src/ui/components/ProQuotaDialog.test.tsx index f74f5fa447..d97d53314e 100644 --- a/packages/cli/src/ui/components/ProQuotaDialog.test.tsx +++ b/packages/cli/src/ui/components/ProQuotaDialog.test.tsx @@ -13,6 +13,7 @@ import { RadioButtonSelect } from './shared/RadioButtonSelect.js'; import { PREVIEW_GEMINI_MODEL, DEFAULT_GEMINI_FLASH_MODEL, + AuthType, } from '@google/gemini-cli-core'; // Mock the child component to make it easier to test the parent @@ -62,7 +63,7 @@ describe('ProQuotaDialog', () => { describe('for non-flash model failures', () => { describe('when it is a terminal quota error', () => { - it('should render switch, upgrade, and stop options for paid tiers', () => { + it('should render switch, upgrade, and stop options for LOGIN_WITH_GOOGLE', () => { const { unmount } = render( { message="paid tier quota error" isTerminalQuotaError={true} isModelNotFoundError={false} + authType={AuthType.LOGIN_WITH_GOOGLE} onChoice={mockOnChoice} />, ); @@ -99,6 +101,39 @@ describe('ProQuotaDialog', () => { unmount(); }); + it('should NOT render upgrade option for USE_GEMINI', () => { + const { unmount } = render( + , + ); + + expect(RadioButtonSelect).toHaveBeenCalledWith( + expect.objectContaining({ + items: [ + { + label: 'Switch to gemini-2.5-flash', + value: 'retry_always', + key: 'retry_always', + }, + { + label: 'Stop', + value: 'retry_later', + key: 'retry_later', + }, + ], + }), + undefined, + ); + unmount(); + }); + it('should render "Keep trying" and "Stop" options when failed model and fallback model are the same', () => { const { unmount } = render( { unmount(); }); - it('should render switch, upgrade, and stop options for free tier', () => { + it('should render switch, upgrade, and stop options for LOGIN_WITH_GOOGLE (free tier)', () => { const { unmount } = render( { message="free tier quota error" isTerminalQuotaError={true} isModelNotFoundError={false} + authType={AuthType.LOGIN_WITH_GOOGLE} onChoice={mockOnChoice} />, ); @@ -204,7 +240,7 @@ describe('ProQuotaDialog', () => { }); describe('when it is a model not found error', () => { - it('should render switch and stop options regardless of tier', () => { + it('should render switch, upgrade, and stop options for LOGIN_WITH_GOOGLE', () => { const { unmount } = render( { message="You don't have access to gemini-3-pro-preview yet." isTerminalQuotaError={false} isModelNotFoundError={true} + authType={AuthType.LOGIN_WITH_GOOGLE} onChoice={mockOnChoice} />, ); @@ -241,7 +278,7 @@ describe('ProQuotaDialog', () => { unmount(); }); - it('should render switch and stop options for paid tier as well', () => { + it('should NOT render upgrade option for USE_GEMINI', () => { const { unmount } = render( { message="You don't have access to gemini-3-pro-preview yet." isTerminalQuotaError={false} isModelNotFoundError={true} + authType={AuthType.USE_GEMINI} onChoice={mockOnChoice} />, ); @@ -261,11 +299,6 @@ describe('ProQuotaDialog', () => { value: 'retry_always', key: 'retry_always', }, - { - label: 'Upgrade for higher limits', - value: 'upgrade', - key: 'upgrade', - }, { label: 'Stop', value: 'retry_later', diff --git a/packages/cli/src/ui/components/ProQuotaDialog.tsx b/packages/cli/src/ui/components/ProQuotaDialog.tsx index ccc20b3e75..82a679db8c 100644 --- a/packages/cli/src/ui/components/ProQuotaDialog.tsx +++ b/packages/cli/src/ui/components/ProQuotaDialog.tsx @@ -8,6 +8,7 @@ import type React from 'react'; import { Box, Text } from 'ink'; import { RadioButtonSelect } from './shared/RadioButtonSelect.js'; import { theme } from '../semantic-colors.js'; +import { AuthType } from '@google/gemini-cli-core'; interface ProQuotaDialogProps { failedModel: string; @@ -15,6 +16,7 @@ interface ProQuotaDialogProps { message: string; isTerminalQuotaError: boolean; isModelNotFoundError?: boolean; + authType?: AuthType; onChoice: ( choice: 'retry_later' | 'retry_once' | 'retry_always' | 'upgrade', ) => void; @@ -26,6 +28,7 @@ export function ProQuotaDialog({ message, isTerminalQuotaError, isModelNotFoundError, + authType, onChoice, }: ProQuotaDialogProps): React.JSX.Element { let items; @@ -51,11 +54,15 @@ export function ProQuotaDialog({ value: 'retry_always' as const, key: 'retry_always', }, - { - label: 'Upgrade for higher limits', - value: 'upgrade' as const, - key: 'upgrade', - }, + ...(authType === AuthType.LOGIN_WITH_GOOGLE + ? [ + { + label: 'Upgrade for higher limits', + value: 'upgrade' as const, + key: 'upgrade', + }, + ] + : []), { label: `Stop`, value: 'retry_later' as const, diff --git a/packages/cli/src/ui/contexts/UIStateContext.tsx b/packages/cli/src/ui/contexts/UIStateContext.tsx index 9fb2852361..79464271b8 100644 --- a/packages/cli/src/ui/contexts/UIStateContext.tsx +++ b/packages/cli/src/ui/contexts/UIStateContext.tsx @@ -24,6 +24,7 @@ import type { ApprovalMode, UserTierId, IdeInfo, + AuthType, FallbackIntent, ValidationIntent, AgentDefinition, @@ -42,6 +43,7 @@ export interface ProQuotaDialogRequest { message: string; isTerminalQuotaError: boolean; isModelNotFoundError?: boolean; + authType?: AuthType; resolve: (intent: FallbackIntent) => void; } diff --git a/packages/cli/src/ui/hooks/useQuotaAndFallback.test.ts b/packages/cli/src/ui/hooks/useQuotaAndFallback.test.ts index 5d6db5abfa..2272de5bf9 100644 --- a/packages/cli/src/ui/hooks/useQuotaAndFallback.test.ts +++ b/packages/cli/src/ui/hooks/useQuotaAndFallback.test.ts @@ -96,9 +96,13 @@ describe('useQuotaAndFallback', () => { }); describe('Fallback Handler Logic', () => { - // Helper function to render the hook and extract the registered handler - const getRegisteredHandler = (): FallbackModelHandler => { - renderHook(() => + it('should show fallback dialog but omit switch to API key message if authType is not LOGIN_WITH_GOOGLE', async () => { + // Override the default mock from beforeEach for this specific test + vi.spyOn(mockConfig, 'getContentGeneratorConfig').mockReturnValue({ + authType: AuthType.USE_GEMINI, + }); + + const { result } = renderHook(() => useQuotaAndFallback({ config: mockConfig, historyManager: mockHistoryManager, @@ -107,20 +111,24 @@ describe('useQuotaAndFallback', () => { onShowAuthSelection: mockOnShowAuthSelection, }), ); - return setFallbackHandlerSpy.mock.calls[0][0] as FallbackModelHandler; - }; - it('should return null and take no action if authType is not LOGIN_WITH_GOOGLE', async () => { - // Override the default mock from beforeEach for this specific test - vi.spyOn(mockConfig, 'getContentGeneratorConfig').mockReturnValue({ - authType: AuthType.USE_GEMINI, + const handler = setFallbackHandlerSpy.mock + .calls[0][0] as FallbackModelHandler; + + const error = new TerminalQuotaError( + 'pro quota', + mockGoogleApiError, + 1000 * 60 * 5, + ); + + act(() => { + void handler('gemini-pro', 'gemini-flash', error); }); - const handler = getRegisteredHandler(); - const result = await handler('gemini-pro', 'gemini-flash', new Error()); - - expect(result).toBeNull(); - expect(mockHistoryManager.addItem).not.toHaveBeenCalled(); + expect(result.current.proQuotaRequest).not.toBeNull(); + expect(result.current.proQuotaRequest?.message).not.toContain( + '/auth to switch to API key.', + ); }); describe('Interactive Fallback', () => { diff --git a/packages/cli/src/ui/hooks/useQuotaAndFallback.ts b/packages/cli/src/ui/hooks/useQuotaAndFallback.ts index 1ba03f2a47..a9e2b0c867 100644 --- a/packages/cli/src/ui/hooks/useQuotaAndFallback.ts +++ b/packages/cli/src/ui/hooks/useQuotaAndFallback.ts @@ -55,14 +55,7 @@ export function useQuotaAndFallback({ fallbackModel, error, ): Promise => { - // Fallbacks are currently only handled for OAuth users. const contentGeneratorConfig = config.getContentGeneratorConfig(); - if ( - !contentGeneratorConfig || - contentGeneratorConfig.authType !== AuthType.LOGIN_WITH_GOOGLE - ) { - return null; - } let message: string; let isTerminalQuotaError = false; @@ -78,7 +71,9 @@ export function useQuotaAndFallback({ error.retryDelayMs ? getResetTimeMessage(error.retryDelayMs) : null, `/stats model for usage details`, `/model to switch models.`, - `/auth to switch to API key.`, + contentGeneratorConfig?.authType === AuthType.LOGIN_WITH_GOOGLE + ? `/auth to switch to API key.` + : null, ].filter(Boolean); message = messageLines.join('\n'); } else if (error instanceof ModelNotFoundError) { @@ -122,6 +117,7 @@ export function useQuotaAndFallback({ message, isTerminalQuotaError, isModelNotFoundError, + authType: contentGeneratorConfig?.authType, }); }, ); diff --git a/packages/core/src/core/contentGenerator.ts b/packages/core/src/core/contentGenerator.ts index 98d8d50020..29f2ff03df 100644 --- a/packages/core/src/core/contentGenerator.ts +++ b/packages/core/src/core/contentGenerator.ts @@ -77,6 +77,12 @@ export function getAuthTypeFromEnv(): AuthType | undefined { if (process.env['GEMINI_API_KEY']) { return AuthType.USE_GEMINI; } + if ( + process.env['CLOUD_SHELL'] === 'true' || + process.env['GEMINI_CLI_USE_COMPUTE_ADC'] === 'true' + ) { + return AuthType.COMPUTE_ADC; + } return undefined; } From f700c923d9e0b78a1b625f79713a66efea7d8502 Mon Sep 17 00:00:00 2001 From: krishdef7 <157892833+krishdef7@users.noreply.github.com> Date: Fri, 27 Feb 2026 04:09:36 +0530 Subject: [PATCH 08/41] fix(core): flush transcript for pure tool-call responses to ensure BeforeTool hooks see complete state (#20419) Co-authored-by: Bryan Morgan --- packages/core/src/core/geminiChat.test.ts | 53 +++++++++++++++++++++++ packages/core/src/core/geminiChat.ts | 8 +++- 2 files changed, 59 insertions(+), 2 deletions(-) diff --git a/packages/core/src/core/geminiChat.test.ts b/packages/core/src/core/geminiChat.test.ts index bfcb803a95..770a594bda 100644 --- a/packages/core/src/core/geminiChat.test.ts +++ b/packages/core/src/core/geminiChat.test.ts @@ -1032,6 +1032,59 @@ describe('GeminiChat', () => { LlmRole.MAIN, ); }); + + it('should flush transcript before tool dispatch for pure tool call with no text or thoughts', async () => { + const pureToolCallStream = (async function* () { + yield { + candidates: [ + { + content: { + role: 'model', + parts: [ + { + functionCall: { + name: 'read_file', + args: { path: 'test.py' }, + }, + }, + ], + }, + }, + ], + } as unknown as GenerateContentResponse; + })(); + + vi.mocked(mockContentGenerator.generateContentStream).mockResolvedValue( + pureToolCallStream, + ); + + const { default: fs } = await import('node:fs'); + const writeFileSync = vi.mocked(fs.writeFileSync); + const writeCountBefore = writeFileSync.mock.calls.length; + + const stream = await chat.sendMessageStream( + { model: 'test-model' }, + 'analyze test.py', + 'prompt-id-pure-tool-flush', + new AbortController().signal, + LlmRole.MAIN, + ); + for await (const _ of stream) { + // consume + } + + const newWrites = writeFileSync.mock.calls.slice(writeCountBefore); + expect(newWrites.length).toBeGreaterThan(0); + + const lastWriteData = JSON.parse( + newWrites[newWrites.length - 1][1] as string, + ) as { messages: Array<{ type: string }> }; + + const geminiMessages = lastWriteData.messages.filter( + (m) => m.type === 'gemini', + ); + expect(geminiMessages.length).toBeGreaterThan(0); + }); }); describe('addHistory', () => { diff --git a/packages/core/src/core/geminiChat.ts b/packages/core/src/core/geminiChat.ts index b7319c8afd..6814f31402 100644 --- a/packages/core/src/core/geminiChat.ts +++ b/packages/core/src/core/geminiChat.ts @@ -818,6 +818,7 @@ export class GeminiChat { const modelResponseParts: Part[] = []; let hasToolCall = false; + let hasThoughts = false; let finishReason: FinishReason | undefined; for await (const chunk of streamResponse) { @@ -834,6 +835,7 @@ export class GeminiChat { if (content?.parts) { if (content.parts.some((part) => part.thought)) { // Record thoughts + hasThoughts = true; this.recordThoughtFromContent(content); } if (content.parts.some((part) => part.functionCall)) { @@ -901,8 +903,10 @@ export class GeminiChat { .join('') .trim(); - // Record model response text from the collected parts - if (responseText) { + // Record model response text from the collected parts. + // Also flush when there are thoughts or a tool call (even with no text) + // so that BeforeTool hooks always see the latest transcript state. + if (responseText || hasThoughts || hasToolCall) { this.chatRecordingService.recordMessage({ model, type: 'gemini', From aa98cafca7d461e88757b5fab912c9a5740e4b85 Mon Sep 17 00:00:00 2001 From: Jerop Kipruto Date: Thu, 26 Feb 2026 17:58:19 -0500 Subject: [PATCH 09/41] feat(plan): adapt planning workflow based on complexity of task (#20465) Co-authored-by: Gal Zahavi <38544478+galz10@users.noreply.github.com> --- docs/cli/plan-mode.md | 29 +++- .../core/__snapshots__/prompts.test.ts.snap | 146 ++++++++++-------- packages/core/src/core/prompts.test.ts | 2 +- packages/core/src/prompts/snippets.ts | 54 +++---- 4 files changed, 129 insertions(+), 102 deletions(-) diff --git a/docs/cli/plan-mode.md b/docs/cli/plan-mode.md index 654b54dbc9..fc09dd06d5 100644 --- a/docs/cli/plan-mode.md +++ b/docs/cli/plan-mode.md @@ -80,14 +80,29 @@ manually during a session. ### Planning Workflow +Plan Mode uses an adaptive planning workflow where the research depth, plan +structure, and consultation level are proportional to the task's complexity: + 1. **Explore & Analyze:** Analyze requirements and use read-only tools to map - the codebase and validate assumptions. For complex tasks, identify at least - two viable implementation approaches. -2. **Consult:** Present a summary of the identified approaches via [`ask_user`] - to obtain a selection. For simple or canonical tasks, this step may be - skipped. -3. **Draft:** Once an approach is selected, write a detailed implementation - plan to the plans directory. + affected modules and identify dependencies. +2. **Consult:** The depth of consultation is proportional to the task's + complexity: + - **Simple Tasks:** Proceed directly to drafting. + - **Standard Tasks:** Present a summary of viable approaches via + [`ask_user`] for selection. + - **Complex Tasks:** Present detailed trade-offs for at least two viable + approaches via [`ask_user`] and obtain approval before drafting. +3. **Draft:** Write a detailed implementation plan to the + [plans directory](#custom-plan-directory-and-policies). The plan's structure + adapts to the task: + - **Simple Tasks:** Focused on specific **Changes** and **Verification** + steps. + - **Standard Tasks:** Includes an **Objective**, **Key Files & Context**, + **Implementation Steps**, and **Verification & Testing**. + - **Complex Tasks:** Comprehensive plans including **Background & + Motivation**, **Scope & Impact**, **Proposed Solution**, **Alternatives + Considered**, a phased **Implementation Plan**, **Verification**, and + **Migration & Rollback** strategies. 4. **Review & Approval:** Use the [`exit_plan_mode`] tool to present the plan and formally request approval. - **Approve:** Exit Plan Mode and start implementation. diff --git a/packages/core/src/core/__snapshots__/prompts.test.ts.snap b/packages/core/src/core/__snapshots__/prompts.test.ts.snap index f1979ef00b..48afa13515 100644 --- a/packages/core/src/core/__snapshots__/prompts.test.ts.snap +++ b/packages/core/src/core/__snapshots__/prompts.test.ts.snap @@ -91,7 +91,7 @@ For example: # Active Approval Mode: Plan -You are operating in **Plan Mode**. Your goal is to produce a detailed implementation plan in \`/tmp/plans/\` and get user approval before editing source code. +You are operating in **Plan Mode**. Your goal is to produce an implementation plan in \`/tmp/plans/\` and get user approval before editing source code. ## Available Tools The following tools are available in Plan Mode: @@ -107,31 +107,35 @@ The following tools are available in Plan Mode: ## Rules -1. **Read-Only:** You cannot modify source code. You may ONLY use read-only tools to explore, and you can only write to \`/tmp/plans/\`. If the user asks you to modify source code directly, you MUST explain that you are in Plan Mode and must first create a detailed plan in the plans directory and get approval before any source code changes can be made. +1. **Read-Only:** You cannot modify source code. You may ONLY use read-only tools to explore, and you can only write to \`/tmp/plans/\`. If the user asks you to modify source code directly, you MUST explain that you are in Plan Mode and must first create a plan and get approval. 2. **Write Constraint:** \`write_file\` and \`replace\` may ONLY be used to write .md plan files to \`/tmp/plans/\`. They cannot modify source code. -3. **Efficiency:** Autonomously combine discovery and drafting phases to minimize conversational turns. If the request is ambiguous, use \`ask_user\` to clarify. Otherwise, explore the codebase and write the draft in one fluid motion. +3. **Efficiency:** Autonomously combine discovery and drafting phases to minimize conversational turns. If the request is ambiguous, use \`ask_user\` to clarify. 4. **Inquiries and Directives:** Distinguish between Inquiries and Directives to minimize unnecessary planning. - - **Inquiries:** If the request is an **Inquiry** (e.g., "How does X work?"), use read-only tools to explore and answer directly in your chat response. DO NOT create a plan or call \`exit_plan_mode\`. - - **Directives:** If the request is a **Directive** (e.g., "Fix bug Y"), follow the workflow below to create and approve a plan. -5. **Plan Storage:** Save plans as Markdown (.md) using descriptive filenames (e.g., \`feature-x.md\`). -6. **Direct Modification:** If asked to modify code outside the plans directory, or if the user requests implementation of an existing plan, explain that you are in Plan Mode and use the \`exit_plan_mode\` tool to request approval and exit Plan Mode to enable edits. + - **Inquiries:** If the request is an **Inquiry** (e.g., "How does X work?"), answer directly. DO NOT create a plan. + - **Directives:** If the request is a **Directive** (e.g., "Fix bug Y"), follow the workflow below. +5. **Plan Storage:** Save plans as Markdown (.md) using descriptive filenames. +6. **Direct Modification:** If asked to modify code, explain you are in Plan Mode and use \`exit_plan_mode\` to request approval. -## Required Plan Structure -When writing the plan file, you MUST include the following structure: - # Objective - (A concise summary of what needs to be built or fixed) - # Key Files & Context - (List the specific files that will be modified, including helpful context like function signatures or code snippets) - # Implementation Steps - (Iterative development steps, e.g., "1. Implement X in [File]", "2. Verify with test Y") - # Verification & Testing - (Specific unit tests, manual checks, or build commands to verify success) +## Planning Workflow +Plan Mode uses an adaptive planning workflow where the research depth, plan structure, and consultation level are proportional to the task's complexity. -## Workflow -1. **Explore & Analyze:** Analyze requirements and use search/read tools to explore the codebase. For complex tasks, identify at least two viable implementation approaches. -2. **Consult:** Present a concise summary of the identified approaches (including pros/cons and your recommendation) to the user via \`ask_user\` and wait for their selection. For simple or canonical tasks, you may skip this and proceed to drafting. -3. **Draft:** Write the detailed implementation plan for the selected approach to the plans directory using \`write_file\`. -4. **Review & Approval:** Present a brief summary of the drafted plan in your chat response and concurrently call the \`exit_plan_mode\` tool to formally request approval. If rejected, iterate. +### 1. Explore & Analyze +Analyze requirements and use search/read tools to explore the codebase. Systematically map affected modules, trace data flow, and identify dependencies. + +### 2. Consult +The depth of your consultation should be proportional to the task's complexity: +- **Simple Tasks:** Skip consultation and proceed directly to drafting. +- **Standard Tasks:** If multiple viable approaches exist, present a concise summary (including pros/cons and your recommendation) via \`ask_user\` and wait for a decision. +- **Complex Tasks:** You MUST present at least two viable approaches with detailed trade-offs via \`ask_user\` and obtain approval before drafting the plan. + +### 3. Draft +Write the implementation plan to \`/tmp/plans/\`. The plan's structure adapts to the task: +- **Simple Tasks:** Include a bulleted list of specific **Changes** and **Verification** steps. +- **Standard Tasks:** Include an **Objective**, **Key Files & Context**, **Implementation Steps**, and **Verification & Testing**. +- **Complex Tasks:** Include **Background & Motivation**, **Scope & Impact**, **Proposed Solution**, **Alternatives Considered**, a phased **Implementation Plan**, **Verification**, and **Migration & Rollback** strategies. + +### 4. Review & Approval +Use the \`exit_plan_mode\` tool to present the plan and formally request approval. # Operational Guidelines @@ -255,7 +259,7 @@ For example: # Active Approval Mode: Plan -You are operating in **Plan Mode**. Your goal is to produce a detailed implementation plan in \`/tmp/plans/\` and get user approval before editing source code. +You are operating in **Plan Mode**. Your goal is to produce an implementation plan in \`/tmp/plans/\` and get user approval before editing source code. ## Available Tools The following tools are available in Plan Mode: @@ -271,31 +275,35 @@ The following tools are available in Plan Mode: ## Rules -1. **Read-Only:** You cannot modify source code. You may ONLY use read-only tools to explore, and you can only write to \`/tmp/plans/\`. If the user asks you to modify source code directly, you MUST explain that you are in Plan Mode and must first create a detailed plan in the plans directory and get approval before any source code changes can be made. +1. **Read-Only:** You cannot modify source code. You may ONLY use read-only tools to explore, and you can only write to \`/tmp/plans/\`. If the user asks you to modify source code directly, you MUST explain that you are in Plan Mode and must first create a plan and get approval. 2. **Write Constraint:** \`write_file\` and \`replace\` may ONLY be used to write .md plan files to \`/tmp/plans/\`. They cannot modify source code. -3. **Efficiency:** Autonomously combine discovery and drafting phases to minimize conversational turns. If the request is ambiguous, use \`ask_user\` to clarify. Otherwise, explore the codebase and write the draft in one fluid motion. +3. **Efficiency:** Autonomously combine discovery and drafting phases to minimize conversational turns. If the request is ambiguous, use \`ask_user\` to clarify. 4. **Inquiries and Directives:** Distinguish between Inquiries and Directives to minimize unnecessary planning. - - **Inquiries:** If the request is an **Inquiry** (e.g., "How does X work?"), use read-only tools to explore and answer directly in your chat response. DO NOT create a plan or call \`exit_plan_mode\`. - - **Directives:** If the request is a **Directive** (e.g., "Fix bug Y"), follow the workflow below to create and approve a plan. -5. **Plan Storage:** Save plans as Markdown (.md) using descriptive filenames (e.g., \`feature-x.md\`). -6. **Direct Modification:** If asked to modify code outside the plans directory, or if the user requests implementation of an existing plan, explain that you are in Plan Mode and use the \`exit_plan_mode\` tool to request approval and exit Plan Mode to enable edits. + - **Inquiries:** If the request is an **Inquiry** (e.g., "How does X work?"), answer directly. DO NOT create a plan. + - **Directives:** If the request is a **Directive** (e.g., "Fix bug Y"), follow the workflow below. +5. **Plan Storage:** Save plans as Markdown (.md) using descriptive filenames. +6. **Direct Modification:** If asked to modify code, explain you are in Plan Mode and use \`exit_plan_mode\` to request approval. -## Required Plan Structure -When writing the plan file, you MUST include the following structure: - # Objective - (A concise summary of what needs to be built or fixed) - # Key Files & Context - (List the specific files that will be modified, including helpful context like function signatures or code snippets) - # Implementation Steps - (Iterative development steps, e.g., "1. Implement X in [File]", "2. Verify with test Y") - # Verification & Testing - (Specific unit tests, manual checks, or build commands to verify success) +## Planning Workflow +Plan Mode uses an adaptive planning workflow where the research depth, plan structure, and consultation level are proportional to the task's complexity. -## Workflow -1. **Explore & Analyze:** Analyze requirements and use search/read tools to explore the codebase. For complex tasks, identify at least two viable implementation approaches. -2. **Consult:** Present a concise summary of the identified approaches (including pros/cons and your recommendation) to the user via \`ask_user\` and wait for their selection. For simple or canonical tasks, you may skip this and proceed to drafting. -3. **Draft:** Write the detailed implementation plan for the selected approach to the plans directory using \`write_file\`. -4. **Review & Approval:** Present a brief summary of the drafted plan in your chat response and concurrently call the \`exit_plan_mode\` tool to formally request approval. If rejected, iterate. +### 1. Explore & Analyze +Analyze requirements and use search/read tools to explore the codebase. Systematically map affected modules, trace data flow, and identify dependencies. + +### 2. Consult +The depth of your consultation should be proportional to the task's complexity: +- **Simple Tasks:** Skip consultation and proceed directly to drafting. +- **Standard Tasks:** If multiple viable approaches exist, present a concise summary (including pros/cons and your recommendation) via \`ask_user\` and wait for a decision. +- **Complex Tasks:** You MUST present at least two viable approaches with detailed trade-offs via \`ask_user\` and obtain approval before drafting the plan. + +### 3. Draft +Write the implementation plan to \`/tmp/plans/\`. The plan's structure adapts to the task: +- **Simple Tasks:** Include a bulleted list of specific **Changes** and **Verification** steps. +- **Standard Tasks:** Include an **Objective**, **Key Files & Context**, **Implementation Steps**, and **Verification & Testing**. +- **Complex Tasks:** Include **Background & Motivation**, **Scope & Impact**, **Proposed Solution**, **Alternatives Considered**, a phased **Implementation Plan**, **Verification**, and **Migration & Rollback** strategies. + +### 4. Review & Approval +Use the \`exit_plan_mode\` tool to present the plan and formally request approval. ## Approved Plan An approved plan is available for this task at \`/tmp/plans/feature-x.md\`. @@ -538,7 +546,7 @@ For example: # Active Approval Mode: Plan -You are operating in **Plan Mode**. Your goal is to produce a detailed implementation plan in \`/tmp/project-temp/plans/\` and get user approval before editing source code. +You are operating in **Plan Mode**. Your goal is to produce an implementation plan in \`/tmp/project-temp/plans/\` and get user approval before editing source code. ## Available Tools The following tools are available in Plan Mode: @@ -554,31 +562,35 @@ The following tools are available in Plan Mode: ## Rules -1. **Read-Only:** You cannot modify source code. You may ONLY use read-only tools to explore, and you can only write to \`/tmp/project-temp/plans/\`. If the user asks you to modify source code directly, you MUST explain that you are in Plan Mode and must first create a detailed plan in the plans directory and get approval before any source code changes can be made. +1. **Read-Only:** You cannot modify source code. You may ONLY use read-only tools to explore, and you can only write to \`/tmp/project-temp/plans/\`. If the user asks you to modify source code directly, you MUST explain that you are in Plan Mode and must first create a plan and get approval. 2. **Write Constraint:** \`write_file\` and \`replace\` may ONLY be used to write .md plan files to \`/tmp/project-temp/plans/\`. They cannot modify source code. -3. **Efficiency:** Autonomously combine discovery and drafting phases to minimize conversational turns. If the request is ambiguous, use \`ask_user\` to clarify. Otherwise, explore the codebase and write the draft in one fluid motion. +3. **Efficiency:** Autonomously combine discovery and drafting phases to minimize conversational turns. If the request is ambiguous, use \`ask_user\` to clarify. 4. **Inquiries and Directives:** Distinguish between Inquiries and Directives to minimize unnecessary planning. - - **Inquiries:** If the request is an **Inquiry** (e.g., "How does X work?"), use read-only tools to explore and answer directly in your chat response. DO NOT create a plan or call \`exit_plan_mode\`. - - **Directives:** If the request is a **Directive** (e.g., "Fix bug Y"), follow the workflow below to create and approve a plan. -5. **Plan Storage:** Save plans as Markdown (.md) using descriptive filenames (e.g., \`feature-x.md\`). -6. **Direct Modification:** If asked to modify code outside the plans directory, or if the user requests implementation of an existing plan, explain that you are in Plan Mode and use the \`exit_plan_mode\` tool to request approval and exit Plan Mode to enable edits. + - **Inquiries:** If the request is an **Inquiry** (e.g., "How does X work?"), answer directly. DO NOT create a plan. + - **Directives:** If the request is a **Directive** (e.g., "Fix bug Y"), follow the workflow below. +5. **Plan Storage:** Save plans as Markdown (.md) using descriptive filenames. +6. **Direct Modification:** If asked to modify code, explain you are in Plan Mode and use \`exit_plan_mode\` to request approval. -## Required Plan Structure -When writing the plan file, you MUST include the following structure: - # Objective - (A concise summary of what needs to be built or fixed) - # Key Files & Context - (List the specific files that will be modified, including helpful context like function signatures or code snippets) - # Implementation Steps - (Iterative development steps, e.g., "1. Implement X in [File]", "2. Verify with test Y") - # Verification & Testing - (Specific unit tests, manual checks, or build commands to verify success) +## Planning Workflow +Plan Mode uses an adaptive planning workflow where the research depth, plan structure, and consultation level are proportional to the task's complexity. -## Workflow -1. **Explore & Analyze:** Analyze requirements and use search/read tools to explore the codebase. For complex tasks, identify at least two viable implementation approaches. -2. **Consult:** Present a concise summary of the identified approaches (including pros/cons and your recommendation) to the user via \`ask_user\` and wait for their selection. For simple or canonical tasks, you may skip this and proceed to drafting. -3. **Draft:** Write the detailed implementation plan for the selected approach to the plans directory using \`write_file\`. -4. **Review & Approval:** Present a brief summary of the drafted plan in your chat response and concurrently call the \`exit_plan_mode\` tool to formally request approval. If rejected, iterate. +### 1. Explore & Analyze +Analyze requirements and use search/read tools to explore the codebase. Systematically map affected modules, trace data flow, and identify dependencies. + +### 2. Consult +The depth of your consultation should be proportional to the task's complexity: +- **Simple Tasks:** Skip consultation and proceed directly to drafting. +- **Standard Tasks:** If multiple viable approaches exist, present a concise summary (including pros/cons and your recommendation) via \`ask_user\` and wait for a decision. +- **Complex Tasks:** You MUST present at least two viable approaches with detailed trade-offs via \`ask_user\` and obtain approval before drafting the plan. + +### 3. Draft +Write the implementation plan to \`/tmp/project-temp/plans/\`. The plan's structure adapts to the task: +- **Simple Tasks:** Include a bulleted list of specific **Changes** and **Verification** steps. +- **Standard Tasks:** Include an **Objective**, **Key Files & Context**, **Implementation Steps**, and **Verification & Testing**. +- **Complex Tasks:** Include **Background & Motivation**, **Scope & Impact**, **Proposed Solution**, **Alternatives Considered**, a phased **Implementation Plan**, **Verification**, and **Migration & Rollback** strategies. + +### 4. Review & Approval +Use the \`exit_plan_mode\` tool to present the plan and formally request approval. # Operational Guidelines @@ -2509,7 +2521,7 @@ For example: ## Development Lifecycle Operate using a **Research -> Strategy -> Execution** lifecycle. For the Execution phase, resolve each sub-task through an iterative **Plan -> Act -> Validate** cycle. -1. **Research:** Systematically map the codebase and validate assumptions. Use search tools extensively to understand file structures, existing code patterns, and conventions. Use \`read_file\` to validate all assumptions. **Prioritize empirical reproduction of reported issues to confirm the failure state.** If the request is ambiguous, broad in scope, or involves creating a new feature/application, you MUST use the \`enter_plan_mode\` tool to design your approach before making changes. Do NOT use Plan Mode for straightforward bug fixes, answering questions, or simple inquiries. +1. **Research:** Systematically map the codebase and validate assumptions. Use search tools extensively to understand file structures, existing code patterns, and conventions. Use \`read_file\` to validate all assumptions. **Prioritize empirical reproduction of reported issues to confirm the failure state.** If the request is ambiguous, broad in scope, or involves architectural decisions or cross-cutting changes, use the \`enter_plan_mode\` tool to safely research and design your strategy. Do NOT use Plan Mode for straightforward bug fixes, answering questions, or simple inquiries. 2. **Strategy:** Formulate a grounded plan based on your research. Share a concise summary of your strategy. 3. **Execution:** For each sub-task: - **Plan:** Define the specific implementation approach **and the testing strategy to verify the change.** diff --git a/packages/core/src/core/prompts.test.ts b/packages/core/src/core/prompts.test.ts index 0ed072b64f..6d65596ce4 100644 --- a/packages/core/src/core/prompts.test.ts +++ b/packages/core/src/core/prompts.test.ts @@ -652,7 +652,7 @@ describe('Core System Prompt (prompts.ts)', () => { const prompt = getCoreSystemPrompt(mockConfig); expect(prompt).toContain( - 'If the request is ambiguous, broad in scope, or involves creating a new feature/application, you MUST use the `enter_plan_mode` tool to design your approach before making changes. Do NOT use Plan Mode for straightforward bug fixes, answering questions, or simple inquiries.', + 'If the request is ambiguous, broad in scope, or involves architectural decisions or cross-cutting changes, use the `enter_plan_mode` tool to safely research and design your strategy. Do NOT use Plan Mode for straightforward bug fixes, answering questions, or simple inquiries.', ); expect(prompt).toMatchSnapshot(); }); diff --git a/packages/core/src/prompts/snippets.ts b/packages/core/src/prompts/snippets.ts index 3ab3b68e0a..7f6c5c633e 100644 --- a/packages/core/src/prompts/snippets.ts +++ b/packages/core/src/prompts/snippets.ts @@ -461,7 +461,7 @@ export function renderPlanningWorkflow( return ` # Active Approval Mode: Plan -You are operating in **Plan Mode**. Your goal is to produce a detailed implementation plan in \`${options.plansDir}/\` and get user approval before editing source code. +You are operating in **Plan Mode**. Your goal is to produce an implementation plan in \`${options.plansDir}/\` and get user approval before editing source code. ## Available Tools The following tools are available in Plan Mode: @@ -470,35 +470,35 @@ ${options.planModeToolsList} ## Rules -1. **Read-Only:** You cannot modify source code. You may ONLY use read-only tools to explore, and you can only write to \`${options.plansDir}/\`. If the user asks you to modify source code directly, you MUST explain that you are in Plan Mode and must first create a detailed plan in the plans directory and get approval before any source code changes can be made. +1. **Read-Only:** You cannot modify source code. You may ONLY use read-only tools to explore, and you can only write to \`${options.plansDir}/\`. If the user asks you to modify source code directly, you MUST explain that you are in Plan Mode and must first create a plan and get approval. 2. **Write Constraint:** ${formatToolName(WRITE_FILE_TOOL_NAME)} and ${formatToolName(EDIT_TOOL_NAME)} may ONLY be used to write .md plan files to \`${options.plansDir}/\`. They cannot modify source code. -3. **Efficiency:** Autonomously combine discovery and drafting phases to minimize conversational turns. If the request is ambiguous, use ${formatToolName(ASK_USER_TOOL_NAME)} to clarify. Otherwise, explore the codebase and write the draft in one fluid motion. +3. **Efficiency:** Autonomously combine discovery and drafting phases to minimize conversational turns. If the request is ambiguous, use ${formatToolName(ASK_USER_TOOL_NAME)} to clarify. 4. **Inquiries and Directives:** Distinguish between Inquiries and Directives to minimize unnecessary planning. - - **Inquiries:** If the request is an **Inquiry** (e.g., "How does X work?"), use read-only tools to explore and answer directly in your chat response. DO NOT create a plan or call ${formatToolName( - EXIT_PLAN_MODE_TOOL_NAME, - )}. - - **Directives:** If the request is a **Directive** (e.g., "Fix bug Y"), follow the workflow below to create and approve a plan. -5. **Plan Storage:** Save plans as Markdown (.md) using descriptive filenames (e.g., \`feature-x.md\`). -6. **Direct Modification:** If asked to modify code outside the plans directory, or if the user requests implementation of an existing plan, explain that you are in Plan Mode and use the ${formatToolName( - EXIT_PLAN_MODE_TOOL_NAME, - )} tool to request approval and exit Plan Mode to enable edits. + - **Inquiries:** If the request is an **Inquiry** (e.g., "How does X work?"), answer directly. DO NOT create a plan. + - **Directives:** If the request is a **Directive** (e.g., "Fix bug Y"), follow the workflow below. +5. **Plan Storage:** Save plans as Markdown (.md) using descriptive filenames. +6. **Direct Modification:** If asked to modify code, explain you are in Plan Mode and use ${formatToolName(EXIT_PLAN_MODE_TOOL_NAME)} to request approval. -## Required Plan Structure -When writing the plan file, you MUST include the following structure: - # Objective - (A concise summary of what needs to be built or fixed) - # Key Files & Context - (List the specific files that will be modified, including helpful context like function signatures or code snippets) - # Implementation Steps - (Iterative development steps, e.g., "1. Implement X in [File]", "2. Verify with test Y") - # Verification & Testing - (Specific unit tests, manual checks, or build commands to verify success) +## Planning Workflow +Plan Mode uses an adaptive planning workflow where the research depth, plan structure, and consultation level are proportional to the task's complexity. -## Workflow -1. **Explore & Analyze:** Analyze requirements and use search/read tools to explore the codebase. For complex tasks, identify at least two viable implementation approaches. -2. **Consult:** Present a concise summary of the identified approaches (including pros/cons and your recommendation) to the user via ${formatToolName(ASK_USER_TOOL_NAME)} and wait for their selection. For simple or canonical tasks, you may skip this and proceed to drafting. -3. **Draft:** Write the detailed implementation plan for the selected approach to the plans directory using ${formatToolName(WRITE_FILE_TOOL_NAME)}. -4. **Review & Approval:** Present a brief summary of the drafted plan in your chat response and concurrently call the ${formatToolName(EXIT_PLAN_MODE_TOOL_NAME)} tool to formally request approval. If rejected, iterate. +### 1. Explore & Analyze +Analyze requirements and use search/read tools to explore the codebase. Systematically map affected modules, trace data flow, and identify dependencies. + +### 2. Consult +The depth of your consultation should be proportional to the task's complexity: +- **Simple Tasks:** Skip consultation and proceed directly to drafting. +- **Standard Tasks:** If multiple viable approaches exist, present a concise summary (including pros/cons and your recommendation) via ${formatToolName(ASK_USER_TOOL_NAME)} and wait for a decision. +- **Complex Tasks:** You MUST present at least two viable approaches with detailed trade-offs via ${formatToolName(ASK_USER_TOOL_NAME)} and obtain approval before drafting the plan. + +### 3. Draft +Write the implementation plan to \`${options.plansDir}/\`. The plan's structure adapts to the task: +- **Simple Tasks:** Include a bulleted list of specific **Changes** and **Verification** steps. +- **Standard Tasks:** Include an **Objective**, **Key Files & Context**, **Implementation Steps**, and **Verification & Testing**. +- **Complex Tasks:** Include **Background & Motivation**, **Scope & Impact**, **Proposed Solution**, **Alternatives Considered**, a phased **Implementation Plan**, **Verification**, and **Migration & Rollback** strategies. + +### 4. Review & Approval +Use the ${formatToolName(EXIT_PLAN_MODE_TOOL_NAME)} tool to present the plan and formally request approval. ${renderApprovedPlanSection(options.approvedPlanPath)}`.trim(); } @@ -541,7 +541,7 @@ function mandateContinueWork(interactive: boolean): string { function workflowStepResearch(options: PrimaryWorkflowsOptions): string { let suggestion = ''; if (options.enableEnterPlanModeTool) { - suggestion = ` If the request is ambiguous, broad in scope, or involves creating a new feature/application, you MUST use the ${formatToolName(ENTER_PLAN_MODE_TOOL_NAME)} tool to design your approach before making changes. Do NOT use Plan Mode for straightforward bug fixes, answering questions, or simple inquiries.`; + suggestion = ` If the request is ambiguous, broad in scope, or involves architectural decisions or cross-cutting changes, use the ${formatToolName(ENTER_PLAN_MODE_TOOL_NAME)} tool to safely research and design your strategy. Do NOT use Plan Mode for straightforward bug fixes, answering questions, or simple inquiries.`; } const searchTools: string[] = []; From b8d6041d42e869fef2b1754a6a6d9d110d5f9ac8 Mon Sep 17 00:00:00 2001 From: Yuvraj Angad Singh <36276913+yuvrajangadsingh@users.noreply.github.com> Date: Fri, 27 Feb 2026 04:47:09 +0530 Subject: [PATCH 10/41] fix: prevent orphaned processes from consuming 100% CPU when terminal closes (#16965) Co-authored-by: Tommaso Sciortino --- packages/cli/src/gemini.test.tsx | 5 +- packages/cli/src/gemini.tsx | 11 +- packages/cli/src/utils/cleanup.test.ts | 162 ++++++++++++++++++++++++- packages/cli/src/utils/cleanup.ts | 62 ++++++++++ 4 files changed, 234 insertions(+), 6 deletions(-) diff --git a/packages/cli/src/gemini.test.tsx b/packages/cli/src/gemini.test.tsx index 538fb8ee4e..dae249a8ac 100644 --- a/packages/cli/src/gemini.test.tsx +++ b/packages/cli/src/gemini.test.tsx @@ -1216,6 +1216,8 @@ describe('startInteractiveUI', () => { runExitCleanup: vi.fn(), registerSyncCleanup: vi.fn(), registerTelemetryConfig: vi.fn(), + setupSignalHandlers: vi.fn(), + setupTtyCheck: vi.fn(() => vi.fn()), })); beforeEach(() => { @@ -1322,7 +1324,8 @@ describe('startInteractiveUI', () => { // Verify all startup tasks were called expect(getVersion).toHaveBeenCalledTimes(1); - expect(registerCleanup).toHaveBeenCalledTimes(4); + // 5 cleanups: mouseEvents, consolePatcher, lineWrapping, instance.unmount, and TTY check + expect(registerCleanup).toHaveBeenCalledTimes(5); // Verify cleanup handler is registered with unmount function const cleanupFn = vi.mocked(registerCleanup).mock.calls[0][0]; diff --git a/packages/cli/src/gemini.tsx b/packages/cli/src/gemini.tsx index aa830c0250..8cd7048a7e 100644 --- a/packages/cli/src/gemini.tsx +++ b/packages/cli/src/gemini.tsx @@ -32,6 +32,8 @@ import { registerSyncCleanup, runExitCleanup, registerTelemetryConfig, + setupSignalHandlers, + setupTtyCheck, } from './utils/cleanup.js'; import { cleanupToolOutputFiles, @@ -319,6 +321,8 @@ export async function startInteractiveUI( }); registerCleanup(() => instance.unmount()); + + registerCleanup(setupTtyCheck()); } export async function main() { @@ -340,6 +344,8 @@ export async function main() { setupUnhandledRejectionHandler(); + setupSignalHandlers(); + const slashCommandConflictHandler = new SlashCommandConflictHandler(); slashCommandConflictHandler.start(); registerCleanup(() => slashCommandConflictHandler.stop()); @@ -646,10 +652,7 @@ export async function main() { process.stdin.setRawMode(true); // This cleanup isn't strictly needed but may help in certain situations. - process.on('SIGTERM', () => { - process.stdin.setRawMode(wasRaw); - }); - process.on('SIGINT', () => { + registerSyncCleanup(() => { process.stdin.setRawMode(wasRaw); }); } diff --git a/packages/cli/src/utils/cleanup.test.ts b/packages/cli/src/utils/cleanup.test.ts index 5dbeb4d548..e9a2b0ea76 100644 --- a/packages/cli/src/utils/cleanup.test.ts +++ b/packages/cli/src/utils/cleanup.test.ts @@ -4,7 +4,7 @@ * SPDX-License-Identifier: Apache-2.0 */ -import { vi, describe, it, expect, beforeEach } from 'vitest'; +import { vi, describe, it, expect, beforeEach, afterEach } from 'vitest'; import { promises as fs } from 'node:fs'; import * as path from 'node:path'; @@ -15,6 +15,7 @@ vi.mock('@google/gemini-cli-core', () => ({ })), shutdownTelemetry: vi.fn(), isTelemetrySdkInitialized: vi.fn().mockReturnValue(false), + ExitCodes: { SUCCESS: 0 }, })); vi.mock('node:fs', () => ({ @@ -30,6 +31,8 @@ import { runSyncCleanup, cleanupCheckpoints, resetCleanupForTesting, + setupSignalHandlers, + setupTtyCheck, } from './cleanup.js'; describe('cleanup', () => { @@ -123,3 +126,160 @@ describe('cleanup', () => { }); }); }); + +describe('signal and TTY handling', () => { + let processOnHandlers: Map< + string, + Array<(...args: unknown[]) => void | Promise> + >; + + beforeEach(() => { + processOnHandlers = new Map(); + resetCleanupForTesting(); + + vi.spyOn(process, 'on').mockImplementation( + (event: string | symbol, handler: (...args: unknown[]) => void) => { + if (typeof event === 'string') { + const handlers = processOnHandlers.get(event) || []; + handlers.push(handler); + processOnHandlers.set(event, handlers); + } + return process; + }, + ); + + vi.spyOn(process, 'exit').mockImplementation((() => { + // Don't actually exit + }) as typeof process.exit); + }); + + afterEach(() => { + vi.restoreAllMocks(); + processOnHandlers.clear(); + }); + + describe('setupSignalHandlers', () => { + it('should register handlers for SIGHUP, SIGTERM, and SIGINT', () => { + setupSignalHandlers(); + + expect(processOnHandlers.has('SIGHUP')).toBe(true); + expect(processOnHandlers.has('SIGTERM')).toBe(true); + expect(processOnHandlers.has('SIGINT')).toBe(true); + }); + + it('should gracefully shutdown when SIGHUP is received', async () => { + setupSignalHandlers(); + + const sighupHandlers = processOnHandlers.get('SIGHUP') || []; + expect(sighupHandlers.length).toBeGreaterThan(0); + + await sighupHandlers[0]?.(); + + expect(process.exit).toHaveBeenCalledWith(0); + }); + + it('should register SIGTERM handler that can trigger shutdown', () => { + setupSignalHandlers(); + + const sigtermHandlers = processOnHandlers.get('SIGTERM') || []; + expect(sigtermHandlers.length).toBeGreaterThan(0); + expect(typeof sigtermHandlers[0]).toBe('function'); + }); + }); + + describe('setupTtyCheck', () => { + let originalStdinIsTTY: boolean | undefined; + let originalStdoutIsTTY: boolean | undefined; + + beforeEach(() => { + originalStdinIsTTY = process.stdin.isTTY; + originalStdoutIsTTY = process.stdout.isTTY; + vi.useFakeTimers(); + }); + + afterEach(() => { + vi.useRealTimers(); + Object.defineProperty(process.stdin, 'isTTY', { + value: originalStdinIsTTY, + writable: true, + configurable: true, + }); + Object.defineProperty(process.stdout, 'isTTY', { + value: originalStdoutIsTTY, + writable: true, + configurable: true, + }); + }); + + it('should return a cleanup function', () => { + const cleanup = setupTtyCheck(); + expect(typeof cleanup).toBe('function'); + cleanup(); + }); + + it('should not exit when both stdin and stdout are TTY', async () => { + Object.defineProperty(process.stdin, 'isTTY', { + value: true, + writable: true, + configurable: true, + }); + Object.defineProperty(process.stdout, 'isTTY', { + value: true, + writable: true, + configurable: true, + }); + + const cleanup = setupTtyCheck(); + await vi.advanceTimersByTimeAsync(5000); + expect(process.exit).not.toHaveBeenCalled(); + cleanup(); + }); + + it('should exit when both stdin and stdout are not TTY', async () => { + Object.defineProperty(process.stdin, 'isTTY', { + value: false, + writable: true, + configurable: true, + }); + Object.defineProperty(process.stdout, 'isTTY', { + value: false, + writable: true, + configurable: true, + }); + + const cleanup = setupTtyCheck(); + await vi.advanceTimersByTimeAsync(5000); + expect(process.exit).toHaveBeenCalledWith(0); + cleanup(); + }); + + it('should not check when SANDBOX env is set', async () => { + const originalSandbox = process.env['SANDBOX']; + process.env['SANDBOX'] = 'true'; + + Object.defineProperty(process.stdin, 'isTTY', { + value: false, + writable: true, + configurable: true, + }); + Object.defineProperty(process.stdout, 'isTTY', { + value: false, + writable: true, + configurable: true, + }); + + const cleanup = setupTtyCheck(); + await vi.advanceTimersByTimeAsync(5000); + expect(process.exit).not.toHaveBeenCalled(); + cleanup(); + process.env['SANDBOX'] = originalSandbox; + }); + + it('cleanup function should stop the interval', () => { + const cleanup = setupTtyCheck(); + cleanup(); + vi.advanceTimersByTime(10000); + expect(process.exit).not.toHaveBeenCalled(); + }); + }); +}); diff --git a/packages/cli/src/utils/cleanup.ts b/packages/cli/src/utils/cleanup.ts index 3fce73dd44..6185b34fe5 100644 --- a/packages/cli/src/utils/cleanup.ts +++ b/packages/cli/src/utils/cleanup.ts @@ -10,12 +10,14 @@ import { Storage, shutdownTelemetry, isTelemetrySdkInitialized, + ExitCodes, } from '@google/gemini-cli-core'; import type { Config } from '@google/gemini-cli-core'; const cleanupFunctions: Array<(() => void) | (() => Promise)> = []; const syncCleanupFunctions: Array<() => void> = []; let configForTelemetry: Config | null = null; +let isShuttingDown = false; export function registerCleanup(fn: (() => void) | (() => Promise)) { cleanupFunctions.push(fn); @@ -33,6 +35,7 @@ export function resetCleanupForTesting() { cleanupFunctions.length = 0; syncCleanupFunctions.length = 0; configForTelemetry = null; + isShuttingDown = false; } export function runSyncCleanup() { @@ -100,6 +103,65 @@ async function drainStdin() { await new Promise((resolve) => setTimeout(resolve, 50)); } +/** + * Gracefully shuts down the process, ensuring cleanup runs exactly once. + * Guards against concurrent shutdown from signals (SIGHUP, SIGTERM, SIGINT) + * and TTY loss detection racing each other. + * + * @see https://github.com/google-gemini/gemini-cli/issues/15874 + */ +async function gracefulShutdown(_reason: string) { + if (isShuttingDown) { + return; + } + isShuttingDown = true; + + await runExitCleanup(); + process.exit(ExitCodes.SUCCESS); +} + +export function setupSignalHandlers() { + process.on('SIGHUP', () => gracefulShutdown('SIGHUP')); + process.on('SIGTERM', () => gracefulShutdown('SIGTERM')); + process.on('SIGINT', () => gracefulShutdown('SIGINT')); +} + +export function setupTtyCheck(): () => void { + let intervalId: ReturnType | null = null; + let isCheckingTty = false; + + intervalId = setInterval(async () => { + if (isCheckingTty || isShuttingDown) { + return; + } + + if (process.env['SANDBOX']) { + return; + } + + if (!process.stdin.isTTY && !process.stdout.isTTY) { + isCheckingTty = true; + + if (intervalId) { + clearInterval(intervalId); + intervalId = null; + } + + await gracefulShutdown('TTY loss'); + } + }, 5000); + + // Don't keep the process alive just for this interval + intervalId.unref(); + + return () => { + if (intervalId) { + clearInterval(intervalId); + intervalId = null; + } + }; +} + export async function cleanupCheckpoints() { const storage = new Storage(process.cwd()); await storage.initialize(); From 6dc9d5ff11c31b8da829ffca91c8e544c3e7bc07 Mon Sep 17 00:00:00 2001 From: Bryan Morgan Date: Thu, 26 Feb 2026 18:41:09 -0500 Subject: [PATCH 11/41] feat(core): increase fetch timeout and fix [object Object] error stringification (#20441) Co-authored-by: Gal Zahavi <38544478+galz10@users.noreply.github.com> --- packages/core/src/code_assist/server.test.ts | 48 ++++++++++--------- packages/core/src/utils/errors.ts | 9 ++++ .../core/src/utils/errors_timeout.test.ts | 46 ++++++++++++++++++ packages/core/src/utils/fetch.ts | 21 +++++++- 4 files changed, 100 insertions(+), 24 deletions(-) create mode 100644 packages/core/src/utils/errors_timeout.test.ts diff --git a/packages/core/src/code_assist/server.test.ts b/packages/core/src/code_assist/server.test.ts index 8ec8cb8dad..d79526d1c3 100644 --- a/packages/core/src/code_assist/server.test.ts +++ b/packages/core/src/code_assist/server.test.ts @@ -73,17 +73,19 @@ describe('CodeAssistServer', () => { LlmRole.MAIN, ); - expect(mockRequest).toHaveBeenCalledWith({ - url: expect.stringContaining(':generateContent'), - method: 'POST', - headers: { - 'Content-Type': 'application/json', - 'x-custom-header': 'test-value', - }, - responseType: 'json', - body: expect.any(String), - signal: undefined, - }); + expect(mockRequest).toHaveBeenCalledWith( + expect.objectContaining({ + url: expect.stringContaining(':generateContent'), + method: 'POST', + headers: { + 'Content-Type': 'application/json', + 'x-custom-header': 'test-value', + }, + responseType: 'json', + body: expect.any(String), + signal: undefined, + }), + ); const requestBody = JSON.parse(mockRequest.mock.calls[0][0].body); expect(requestBody.user_prompt_id).toBe('user-prompt-id'); @@ -391,17 +393,19 @@ describe('CodeAssistServer', () => { results.push(res); } - expect(mockRequest).toHaveBeenCalledWith({ - url: expect.stringContaining(':streamGenerateContent'), - method: 'POST', - params: { alt: 'sse' }, - responseType: 'stream', - body: expect.any(String), - headers: { - 'Content-Type': 'application/json', - }, - signal: undefined, - }); + expect(mockRequest).toHaveBeenCalledWith( + expect.objectContaining({ + url: expect.stringContaining(':streamGenerateContent'), + method: 'POST', + params: { alt: 'sse' }, + responseType: 'stream', + body: expect.any(String), + headers: { + 'Content-Type': 'application/json', + }, + signal: undefined, + }), + ); expect(results).toHaveLength(2); expect(results[0].candidates?.[0].content?.parts?.[0].text).toBe('Hello'); diff --git a/packages/core/src/utils/errors.ts b/packages/core/src/utils/errors.ts index 5465977ff2..62db5dcbf4 100644 --- a/packages/core/src/utils/errors.ts +++ b/packages/core/src/utils/errors.ts @@ -29,6 +29,15 @@ export function getErrorMessage(error: unknown): string { if (friendlyError instanceof Error) { return friendlyError.message; } + if ( + typeof friendlyError === 'object' && + friendlyError !== null && + 'message' in friendlyError && + typeof (friendlyError as { message: unknown }).message === 'string' + ) { + // eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion + return (friendlyError as { message: string }).message; + } try { return String(friendlyError); } catch { diff --git a/packages/core/src/utils/errors_timeout.test.ts b/packages/core/src/utils/errors_timeout.test.ts new file mode 100644 index 0000000000..54eda960a0 --- /dev/null +++ b/packages/core/src/utils/errors_timeout.test.ts @@ -0,0 +1,46 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { describe, it, expect } from 'vitest'; +import { getErrorMessage } from './errors.js'; +import { type HttpError } from './httpErrors.js'; + +describe('getErrorMessage with timeout errors', () => { + it('should handle undici HeadersTimeoutError correctly', () => { + // Simulate what undici might throw if it's not a proper Error instance + // or has a specific code. + const timeoutError = { + name: 'HeadersTimeoutError', + code: 'UND_ERR_HEADERS_TIMEOUT', + message: 'Headers timeout error', + }; + + // If it's a plain object, getErrorMessage might struggle if it expects an Error + const message = getErrorMessage(timeoutError); + // Based on existing implementation: + // friendlyError = toFriendlyError(timeoutError) -> returns timeoutError + // if (friendlyError instanceof Error) -> false + // return String(friendlyError) -> "[object Object]" + + expect(message).toBe('Headers timeout error'); + }); + + it('should handle undici HeadersTimeoutError as an Error instance', () => { + const error = new Error('Headers timeout error'); + (error as HttpError).name = 'HeadersTimeoutError'; + (error as HttpError).status = 504; // simulate status for test + (error as HttpError & { code?: string }).code = 'UND_ERR_HEADERS_TIMEOUT'; + + const message = getErrorMessage(error); + expect(message).toBe('Headers timeout error'); + }); + + it('should return String representation for objects without a message property', () => { + const error = { some: 'other', object: 123 }; + const message = getErrorMessage(error); + expect(message).toBe('[object Object]'); + }); +}); diff --git a/packages/core/src/utils/fetch.ts b/packages/core/src/utils/fetch.ts index 30d583e99f..e0bb1f3378 100644 --- a/packages/core/src/utils/fetch.ts +++ b/packages/core/src/utils/fetch.ts @@ -6,7 +6,18 @@ import { getErrorMessage, isNodeError } from './errors.js'; import { URL } from 'node:url'; -import { ProxyAgent, setGlobalDispatcher } from 'undici'; +import { Agent, ProxyAgent, setGlobalDispatcher } from 'undici'; + +const DEFAULT_HEADERS_TIMEOUT = 60000; // 60 seconds +const DEFAULT_BODY_TIMEOUT = 300000; // 5 minutes + +// Configure default global dispatcher with higher timeouts +setGlobalDispatcher( + new Agent({ + headersTimeout: DEFAULT_HEADERS_TIMEOUT, + bodyTimeout: DEFAULT_BODY_TIMEOUT, + }), +); const PRIVATE_IP_RANGES = [ /^10\./, @@ -73,5 +84,11 @@ export async function fetchWithTimeout( } export function setGlobalProxy(proxy: string) { - setGlobalDispatcher(new ProxyAgent(proxy)); + setGlobalDispatcher( + new ProxyAgent({ + uri: proxy, + headersTimeout: DEFAULT_HEADERS_TIMEOUT, + bodyTimeout: DEFAULT_BODY_TIMEOUT, + }), + ); } From 9b7852f11c5054a52013016644cf4f45e8298348 Mon Sep 17 00:00:00 2001 From: Siddharth Diwan <70694457+sidwan02@users.noreply.github.com> Date: Thu, 26 Feb 2026 15:43:43 -0800 Subject: [PATCH 12/41] [Gemma x Gemini CLI] Add an Experimental Gemma Router that uses a LiteRT-LM shim into the Composite Model Classifier Strategy (#17231) Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> Co-authored-by: Allen Hutchison --- docs/cli/settings.md | 1 + docs/reference/configuration.md | 17 + package-lock.json | 26 +- packages/cli/src/config/config.test.ts | 60 ++++ packages/cli/src/config/config.ts | 1 + .../cli/src/config/settingsSchema.test.ts | 54 +++ packages/cli/src/config/settingsSchema.ts | 59 +++- packages/core/src/config/config.test.ts | 140 ++++++++ packages/core/src/config/config.ts | 38 ++ .../core/src/core/localLiteRtLmClient.test.ts | 125 +++++++ packages/core/src/core/localLiteRtLmClient.ts | 96 ++++++ .../src/routing/modelRouterService.test.ts | 46 +++ .../core/src/routing/modelRouterService.ts | 37 +- packages/core/src/routing/routingStrategy.ts | 3 + .../strategies/classifierStrategy.test.ts | 40 ++- .../routing/strategies/classifierStrategy.ts | 2 + .../strategies/compositeStrategy.test.ts | 17 +- .../routing/strategies/compositeStrategy.ts | 10 +- .../strategies/defaultStrategy.test.ts | 33 +- .../src/routing/strategies/defaultStrategy.ts | 2 + .../strategies/fallbackStrategy.test.ts | 31 +- .../routing/strategies/fallbackStrategy.ts | 2 + .../gemmaClassifierStrategy.test.ts | 324 ++++++++++++++++++ .../strategies/gemmaClassifierStrategy.ts | 232 +++++++++++++ .../numericalClassifierStrategy.test.ts | 49 ++- .../strategies/numericalClassifierStrategy.ts | 2 + .../strategies/overrideStrategy.test.ts | 24 +- .../routing/strategies/overrideStrategy.ts | 2 + schemas/settings.schema.json | 41 +++ 29 files changed, 1456 insertions(+), 58 deletions(-) create mode 100644 packages/core/src/core/localLiteRtLmClient.test.ts create mode 100644 packages/core/src/core/localLiteRtLmClient.ts create mode 100644 packages/core/src/routing/strategies/gemmaClassifierStrategy.test.ts create mode 100644 packages/core/src/routing/strategies/gemmaClassifierStrategy.ts diff --git a/docs/cli/settings.md b/docs/cli/settings.md index 8adccba6ae..b0c12116d6 100644 --- a/docs/cli/settings.md +++ b/docs/cli/settings.md @@ -140,6 +140,7 @@ they appear in the UI. | Plan | `experimental.plan` | Enable planning features (Plan Mode and tools). | `false` | | Model Steering | `experimental.modelSteering` | Enable model steering (user hints) to guide the model during tool execution. | `false` | | Direct Web Fetch | `experimental.directWebFetch` | Enable web fetch behavior that bypasses LLM summarization. | `false` | +| Enable Gemma Model Router | `experimental.gemmaModelRouter.enabled` | Enable the Gemma Model Router. Requires a local endpoint serving Gemma via the Gemini API using LiteRT-LM shim. | `false` | ### Skills diff --git a/docs/reference/configuration.md b/docs/reference/configuration.md index 5337d973b8..c1c67803b0 100644 --- a/docs/reference/configuration.md +++ b/docs/reference/configuration.md @@ -1014,6 +1014,23 @@ their corresponding top-level category object in your `settings.json` file. - **Default:** `false` - **Requires restart:** Yes +- **`experimental.gemmaModelRouter.enabled`** (boolean): + - **Description:** Enable the Gemma Model Router. Requires a local endpoint + serving Gemma via the Gemini API using LiteRT-LM shim. + - **Default:** `false` + - **Requires restart:** Yes + +- **`experimental.gemmaModelRouter.classifier.host`** (string): + - **Description:** The host of the classifier. + - **Default:** `"http://localhost:9379"` + - **Requires restart:** Yes + +- **`experimental.gemmaModelRouter.classifier.model`** (string): + - **Description:** The model to use for the classifier. Only tested on + `gemma3-1b-gpu-custom`. + - **Default:** `"gemma3-1b-gpu-custom"` + - **Requires restart:** Yes + #### `skills` - **`skills.enabled`** (boolean): diff --git a/package-lock.json b/package-lock.json index 5f0c5f058d..82bf1c2221 100644 --- a/package-lock.json +++ b/package-lock.json @@ -2292,7 +2292,6 @@ "integrity": "sha512-t54CUOsFMappY1Jbzb7fetWeO0n6K0k/4+/ZpkS+3Joz8I4VcvY9OiEBFRYISqaI2fq5sCiPtAjRDOzVYG8m+Q==", "dev": true, "license": "MIT", - "peer": true, "dependencies": { "@octokit/auth-token": "^6.0.0", "@octokit/graphql": "^9.0.2", @@ -2473,7 +2472,6 @@ "resolved": "https://registry.npmjs.org/@opentelemetry/api/-/api-1.9.0.tgz", "integrity": "sha512-3giAOQvZiH5F9bMlMiv8+GSPMeqg0dbaeo58/0SlA9sxSqZhnUtxzX9/2FzyhS9sWQf5S0GJE0AKBrFqjpeYcg==", "license": "Apache-2.0", - "peer": true, "engines": { "node": ">=8.0.0" } @@ -2523,7 +2521,6 @@ "resolved": "https://registry.npmjs.org/@opentelemetry/core/-/core-2.5.0.tgz", "integrity": "sha512-ka4H8OM6+DlUhSAZpONu0cPBtPPTQKxbxVzC4CzVx5+K4JnroJVBtDzLAMx4/3CDTJXRvVFhpFjtl4SaiTNoyQ==", "license": "Apache-2.0", - "peer": true, "dependencies": { "@opentelemetry/semantic-conventions": "^1.29.0" }, @@ -2898,7 +2895,6 @@ "resolved": "https://registry.npmjs.org/@opentelemetry/resources/-/resources-2.5.0.tgz", "integrity": "sha512-F8W52ApePshpoSrfsSk1H2yJn9aKjCrbpQF1M9Qii0GHzbfVeFUB+rc3X4aggyZD8x9Gu3Slua+s6krmq6Dt8g==", "license": "Apache-2.0", - "peer": true, "dependencies": { "@opentelemetry/core": "2.5.0", "@opentelemetry/semantic-conventions": "^1.29.0" @@ -2932,7 +2928,6 @@ "resolved": "https://registry.npmjs.org/@opentelemetry/sdk-metrics/-/sdk-metrics-2.5.0.tgz", "integrity": "sha512-BeJLtU+f5Gf905cJX9vXFQorAr6TAfK3SPvTFqP+scfIpDQEJfRaGJWta7sJgP+m4dNtBf9y3yvBKVAZZtJQVA==", "license": "Apache-2.0", - "peer": true, "dependencies": { "@opentelemetry/core": "2.5.0", "@opentelemetry/resources": "2.5.0" @@ -2987,7 +2982,6 @@ "resolved": "https://registry.npmjs.org/@opentelemetry/sdk-trace-base/-/sdk-trace-base-2.5.0.tgz", "integrity": "sha512-VzRf8LzotASEyNDUxTdaJ9IRJ1/h692WyArDBInf5puLCjxbICD6XkHgpuudis56EndyS7LYFmtTMny6UABNdQ==", "license": "Apache-2.0", - "peer": true, "dependencies": { "@opentelemetry/core": "2.5.0", "@opentelemetry/resources": "2.5.0", @@ -4184,7 +4178,6 @@ "integrity": "sha512-6mDvHUFSjyT2B2yeNx2nUgMxh9LtOWvkhIU3uePn2I2oyNymUAX1NIsdgviM4CH+JSrp2D2hsMvJOkxY+0wNRA==", "devOptional": true, "license": "MIT", - "peer": true, "dependencies": { "csstype": "^3.0.2" } @@ -4458,7 +4451,6 @@ "integrity": "sha512-klQbnPAAiGYFyI02+znpBRLyjL4/BrBd0nyWkdC0s/6xFLkXYQ8OoRrSkqacS1ddVxf/LDyODIKbQ5TgKAf/Fg==", "dev": true, "license": "MIT", - "peer": true, "dependencies": { "@typescript-eslint/scope-manager": "8.56.1", "@typescript-eslint/types": "8.56.1", @@ -5306,7 +5298,6 @@ "resolved": "https://registry.npmjs.org/acorn/-/acorn-8.15.0.tgz", "integrity": "sha512-NZyJarBfL7nWwIq+FDL6Zp/yHEhePMNnnJ0y3qfieCrmNvYct8uvtiV41UvlSe6apAfk0fY1FbWx+NwfmpvtTg==", "license": "MIT", - "peer": true, "bin": { "acorn": "bin/acorn" }, @@ -7860,7 +7851,6 @@ "integrity": "sha512-VmQ+sifHUbI/IcSopBCF/HO3YiHQx/AVd3UVyYL6weuwW+HvON9VYn5l6Zl1WZzPWXPNZrSQpxwkkZ/VuvJZzg==", "dev": true, "license": "MIT", - "peer": true, "dependencies": { "@eslint-community/eslint-utils": "^4.8.0", "@eslint-community/regexpp": "^4.12.1", @@ -8493,7 +8483,6 @@ "resolved": "https://registry.npmjs.org/express/-/express-5.2.1.tgz", "integrity": "sha512-hIS4idWWai69NezIdRt2xFVofaF4j+6INOpJlVOLDO8zXGpUVEVzIYk12UUi2JzjEzWL3IOAxcTubgz9Po0yXw==", "license": "MIT", - "peer": true, "dependencies": { "accepts": "^2.0.0", "body-parser": "^2.2.1", @@ -9788,7 +9777,6 @@ "resolved": "https://registry.npmjs.org/hono/-/hono-4.12.2.tgz", "integrity": "sha512-gJnaDHXKDayjt8ue0n8Gs0A007yKXj4Xzb8+cNjZeYsSzzwKc0Lr+OZgYwVfB0pHfUs17EPoLvrOsEaJ9mj+Tg==", "license": "MIT", - "peer": true, "engines": { "node": ">=16.9.0" } @@ -10068,7 +10056,6 @@ "resolved": "https://registry.npmjs.org/@jrichman/ink/-/ink-6.4.11.tgz", "integrity": "sha512-93LQlzT7vvZ1XJcmOMwN4s+6W334QegendeHOMnEJBlhnpIzr8bws6/aOEHG8ZCuVD/vNeeea5m1msHIdAY6ig==", "license": "MIT", - "peer": true, "dependencies": { "@alcalzone/ansi-tokenize": "^0.2.1", "ansi-escapes": "^7.0.0", @@ -13718,7 +13705,6 @@ "resolved": "https://registry.npmjs.org/react/-/react-19.2.4.tgz", "integrity": "sha512-9nfp2hYpCwOjAN+8TZFGhtWEwgvWHXqESH8qT89AT/lWklpLON22Lc8pEtnpsZz7VmawabSU0gCjnj8aC0euHQ==", "license": "MIT", - "peer": true, "engines": { "node": ">=0.10.0" } @@ -13729,7 +13715,6 @@ "integrity": "sha512-ePrwPfxAnB+7hgnEr8vpKxL9cmnp7F322t8oqcPshbIQQhDKgFDW4tjhF2wjVbdXF9O/nyuy3sQWd9JGpiLPvA==", "devOptional": true, "license": "MIT", - "peer": true, "dependencies": { "shell-quote": "^1.6.1", "ws": "^7" @@ -15689,7 +15674,6 @@ "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-4.0.3.tgz", "integrity": "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q==", "license": "MIT", - "peer": true, "engines": { "node": ">=12" }, @@ -15913,8 +15897,7 @@ "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.8.1.tgz", "integrity": "sha512-oJFu94HQb+KVduSUQL7wnpmqnfmLsOA/nAh6b6EH0wCEoK0/mPeXU6c3wKDV83MkOuHPRHtSXKKU99IBazS/2w==", "dev": true, - "license": "0BSD", - "peer": true + "license": "0BSD" }, "node_modules/tsx": { "version": "4.20.3", @@ -15922,7 +15905,6 @@ "integrity": "sha512-qjbnuR9Tr+FJOMBqJCW5ehvIo/buZq7vH7qD7JziU98h6l3qGy0a/yPFjwO+y0/T7GFpNgNAvEcPPVfyT8rrPQ==", "devOptional": true, "license": "MIT", - "peer": true, "dependencies": { "esbuild": "~0.25.0", "get-tsconfig": "^4.7.5" @@ -16082,7 +16064,6 @@ "integrity": "sha512-p1diW6TqL9L07nNxvRMM7hMMw4c5XOo/1ibL4aAIGmSAt9slTE1Xgw5KWuof2uTOvCg9BY7ZRi+GaF+7sfgPeQ==", "devOptional": true, "license": "Apache-2.0", - "peer": true, "bin": { "tsc": "bin/tsc", "tsserver": "bin/tsserver" @@ -16291,7 +16272,6 @@ "resolved": "https://registry.npmjs.org/vite/-/vite-7.2.2.tgz", "integrity": "sha512-BxAKBWmIbrDgrokdGZH1IgkIk/5mMHDreLDmCJ0qpyJaAteP8NvMhkwr/ZCQNqNH97bw/dANTE9PDzqwJghfMQ==", "license": "MIT", - "peer": true, "dependencies": { "esbuild": "^0.25.0", "fdir": "^6.5.0", @@ -16405,7 +16385,6 @@ "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-4.0.3.tgz", "integrity": "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q==", "license": "MIT", - "peer": true, "engines": { "node": ">=12" }, @@ -16418,7 +16397,6 @@ "resolved": "https://registry.npmjs.org/vitest/-/vitest-3.2.4.tgz", "integrity": "sha512-LUCP5ev3GURDysTWiP47wRRUpLKMOfPh+yKTx3kVIEiu5KOMeqzpnYNsKyOoVrULivR8tLcks4+lga33Whn90A==", "license": "MIT", - "peer": true, "dependencies": { "@types/chai": "^5.2.2", "@vitest/expect": "3.2.4", @@ -17063,7 +17041,6 @@ "resolved": "https://registry.npmjs.org/zod/-/zod-3.25.76.tgz", "integrity": "sha512-gzUt/qt81nXsFGKIFcC3YnfEAx5NkunCfnDlvuBSSFS02bcXu4Lmea0AFIUwbLWxWPx3d9p8S5QoaujKcNQxcQ==", "license": "MIT", - "peer": true, "funding": { "url": "https://github.com/sponsors/colinhacks" } @@ -17463,7 +17440,6 @@ "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-4.0.3.tgz", "integrity": "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q==", "license": "MIT", - "peer": true, "engines": { "node": ">=12" }, diff --git a/packages/cli/src/config/config.test.ts b/packages/cli/src/config/config.test.ts index 75812e4442..919ad86c51 100644 --- a/packages/cli/src/config/config.test.ts +++ b/packages/cli/src/config/config.test.ts @@ -2765,6 +2765,66 @@ describe('loadCliConfig approval mode', () => { }); }); +describe('loadCliConfig gemmaModelRouter', () => { + beforeEach(() => { + vi.resetAllMocks(); + vi.mocked(os.homedir).mockReturnValue('/mock/home/user'); + vi.stubEnv('GEMINI_API_KEY', 'test-api-key'); + vi.spyOn(ExtensionManager.prototype, 'getExtensions').mockReturnValue([]); + }); + + afterEach(() => { + vi.unstubAllEnvs(); + vi.restoreAllMocks(); + }); + + it('should have gemmaModelRouter disabled by default', async () => { + process.argv = ['node', 'script.js']; + const argv = await parseArguments(createTestMergedSettings()); + const settings = createTestMergedSettings(); + const config = await loadCliConfig(settings, 'test-session', argv); + expect(config.getGemmaModelRouterEnabled()).toBe(false); + }); + + it('should load gemmaModelRouter settings from merged settings', async () => { + process.argv = ['node', 'script.js']; + const argv = await parseArguments(createTestMergedSettings()); + const settings = createTestMergedSettings({ + experimental: { + gemmaModelRouter: { + enabled: true, + classifier: { + host: 'http://custom:1234', + model: 'custom-gemma', + }, + }, + }, + }); + const config = await loadCliConfig(settings, 'test-session', argv); + expect(config.getGemmaModelRouterEnabled()).toBe(true); + const gemmaSettings = config.getGemmaModelRouterSettings(); + expect(gemmaSettings.classifier?.host).toBe('http://custom:1234'); + expect(gemmaSettings.classifier?.model).toBe('custom-gemma'); + }); + + it('should handle partial gemmaModelRouter settings', async () => { + process.argv = ['node', 'script.js']; + const argv = await parseArguments(createTestMergedSettings()); + const settings = createTestMergedSettings({ + experimental: { + gemmaModelRouter: { + enabled: true, + }, + }, + }); + const config = await loadCliConfig(settings, 'test-session', argv); + expect(config.getGemmaModelRouterEnabled()).toBe(true); + const gemmaSettings = config.getGemmaModelRouterSettings(); + expect(gemmaSettings.classifier?.host).toBe('http://localhost:9379'); + expect(gemmaSettings.classifier?.model).toBe('gemma3-1b-gpu-custom'); + }); +}); + describe('loadCliConfig fileFiltering', () => { const originalArgv = process.argv; diff --git a/packages/cli/src/config/config.ts b/packages/cli/src/config/config.ts index 6a4bd09470..f2870a5f57 100755 --- a/packages/cli/src/config/config.ts +++ b/packages/cli/src/config/config.ts @@ -856,6 +856,7 @@ export async function loadCliConfig( // eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion format: (argv.outputFormat ?? settings.output?.format) as OutputFormat, }, + gemmaModelRouter: settings.experimental?.gemmaModelRouter, fakeResponses: argv.fakeResponses, recordResponses: argv.recordResponses, retryFetchErrors: settings.general?.retryFetchErrors, diff --git a/packages/cli/src/config/settingsSchema.test.ts b/packages/cli/src/config/settingsSchema.test.ts index ffe1dd2ac5..cf9dfc992f 100644 --- a/packages/cli/src/config/settingsSchema.test.ts +++ b/packages/cli/src/config/settingsSchema.test.ts @@ -444,6 +444,60 @@ describe('SettingsSchema', () => { expect(hookItemProperties.description).toBeDefined(); expect(hookItemProperties.description.type).toBe('string'); }); + + it('should have gemmaModelRouter setting in schema', () => { + const gemmaModelRouter = + getSettingsSchema().experimental.properties.gemmaModelRouter; + expect(gemmaModelRouter).toBeDefined(); + expect(gemmaModelRouter.type).toBe('object'); + expect(gemmaModelRouter.category).toBe('Experimental'); + expect(gemmaModelRouter.default).toEqual({}); + expect(gemmaModelRouter.requiresRestart).toBe(true); + expect(gemmaModelRouter.showInDialog).toBe(true); + expect(gemmaModelRouter.description).toBe( + 'Enable Gemma model router (experimental).', + ); + + const enabled = gemmaModelRouter.properties.enabled; + expect(enabled).toBeDefined(); + expect(enabled.type).toBe('boolean'); + expect(enabled.category).toBe('Experimental'); + expect(enabled.default).toBe(false); + expect(enabled.requiresRestart).toBe(true); + expect(enabled.showInDialog).toBe(true); + expect(enabled.description).toBe( + 'Enable the Gemma Model Router. Requires a local endpoint serving Gemma via the Gemini API using LiteRT-LM shim.', + ); + + const classifier = gemmaModelRouter.properties.classifier; + expect(classifier).toBeDefined(); + expect(classifier.type).toBe('object'); + expect(classifier.category).toBe('Experimental'); + expect(classifier.default).toEqual({}); + expect(classifier.requiresRestart).toBe(true); + expect(classifier.showInDialog).toBe(false); + expect(classifier.description).toBe('Classifier configuration.'); + + const host = classifier.properties.host; + expect(host).toBeDefined(); + expect(host.type).toBe('string'); + expect(host.category).toBe('Experimental'); + expect(host.default).toBe('http://localhost:9379'); + expect(host.requiresRestart).toBe(true); + expect(host.showInDialog).toBe(false); + expect(host.description).toBe('The host of the classifier.'); + + const model = classifier.properties.model; + expect(model).toBeDefined(); + expect(model.type).toBe('string'); + expect(model.category).toBe('Experimental'); + expect(model.default).toBe('gemma3-1b-gpu-custom'); + expect(model.requiresRestart).toBe(true); + expect(model.showInDialog).toBe(false); + expect(model.description).toBe( + 'The model to use for the classifier. Only tested on `gemma3-1b-gpu-custom`.', + ); + }); }); it('has JSON schema definitions for every referenced ref', () => { diff --git a/packages/cli/src/config/settingsSchema.ts b/packages/cli/src/config/settingsSchema.ts index 26faaafda7..48a7641766 100644 --- a/packages/cli/src/config/settingsSchema.ts +++ b/packages/cli/src/config/settingsSchema.ts @@ -1787,6 +1787,57 @@ const SETTINGS_SCHEMA = { 'Enable web fetch behavior that bypasses LLM summarization.', showInDialog: true, }, + gemmaModelRouter: { + type: 'object', + label: 'Gemma Model Router', + category: 'Experimental', + requiresRestart: true, + default: {}, + description: 'Enable Gemma model router (experimental).', + showInDialog: true, + properties: { + enabled: { + type: 'boolean', + label: 'Enable Gemma Model Router', + category: 'Experimental', + requiresRestart: true, + default: false, + description: + 'Enable the Gemma Model Router. Requires a local endpoint serving Gemma via the Gemini API using LiteRT-LM shim.', + showInDialog: true, + }, + classifier: { + type: 'object', + label: 'Classifier', + category: 'Experimental', + requiresRestart: true, + default: {}, + description: 'Classifier configuration.', + showInDialog: false, + properties: { + host: { + type: 'string', + label: 'Host', + category: 'Experimental', + requiresRestart: true, + default: 'http://localhost:9379', + description: 'The host of the classifier.', + showInDialog: false, + }, + model: { + type: 'string', + label: 'Model', + category: 'Experimental', + requiresRestart: true, + default: 'gemma3-1b-gpu-custom', + description: + 'The model to use for the classifier. Only tested on `gemma3-1b-gpu-custom`.', + showInDialog: false, + }, + }, + }, + }, + }, }, }, @@ -2532,7 +2583,9 @@ type InferSettings = { : T[K]['default'] : T[K]['default'] extends boolean ? boolean - : T[K]['default']; + : T[K]['default'] extends string + ? string + : T[K]['default']; }; type InferMergedSettings = { @@ -2544,7 +2597,9 @@ type InferMergedSettings = { : T[K]['default'] : T[K]['default'] extends boolean ? boolean - : T[K]['default']; + : T[K]['default'] extends string + ? string + : T[K]['default']; }; export type Settings = InferSettings; diff --git a/packages/core/src/config/config.test.ts b/packages/core/src/config/config.test.ts index e92f464fa2..1034246e9c 100644 --- a/packages/core/src/config/config.test.ts +++ b/packages/core/src/config/config.test.ts @@ -225,8 +225,10 @@ import type { } from '../services/modelConfigService.js'; import { ExitPlanModeTool } from '../tools/exit-plan-mode.js'; import { EnterPlanModeTool } from '../tools/enter-plan-mode.js'; +import { LocalLiteRtLmClient } from '../core/localLiteRtLmClient.js'; vi.mock('../core/baseLlmClient.js'); +vi.mock('../core/localLiteRtLmClient.js'); vi.mock('../core/tokenLimits.js', () => ({ tokenLimit: vi.fn(), })); @@ -1418,6 +1420,79 @@ describe('Server Config (config.ts)', () => { }); }); +describe('GemmaModelRouterSettings', () => { + const MODEL = DEFAULT_GEMINI_MODEL; + const SANDBOX: SandboxConfig = { + command: 'docker', + image: 'gemini-cli-sandbox', + }; + const TARGET_DIR = '/path/to/target'; + const DEBUG_MODE = false; + const QUESTION = 'test question'; + const USER_MEMORY = 'Test User Memory'; + const TELEMETRY_SETTINGS = { enabled: false }; + const EMBEDDING_MODEL = 'gemini-embedding'; + const SESSION_ID = 'test-session-id'; + const baseParams: ConfigParameters = { + cwd: '/tmp', + embeddingModel: EMBEDDING_MODEL, + sandbox: SANDBOX, + targetDir: TARGET_DIR, + debugMode: DEBUG_MODE, + question: QUESTION, + userMemory: USER_MEMORY, + telemetry: TELEMETRY_SETTINGS, + sessionId: SESSION_ID, + model: MODEL, + usageStatisticsEnabled: false, + }; + + it('should default gemmaModelRouter.enabled to false', () => { + const config = new Config(baseParams); + expect(config.getGemmaModelRouterEnabled()).toBe(false); + }); + + it('should return default gemma model router settings when not provided', () => { + const config = new Config(baseParams); + const settings = config.getGemmaModelRouterSettings(); + expect(settings.enabled).toBe(false); + expect(settings.classifier?.host).toBe('http://localhost:9379'); + expect(settings.classifier?.model).toBe('gemma3-1b-gpu-custom'); + }); + + it('should override default gemma model router settings when provided', () => { + const params: ConfigParameters = { + ...baseParams, + gemmaModelRouter: { + enabled: true, + classifier: { + host: 'http://custom:1234', + model: 'custom-gemma', + }, + }, + }; + const config = new Config(params); + const settings = config.getGemmaModelRouterSettings(); + expect(settings.enabled).toBe(true); + expect(settings.classifier?.host).toBe('http://custom:1234'); + expect(settings.classifier?.model).toBe('custom-gemma'); + }); + + it('should merge partial gemma model router settings with defaults', () => { + const params: ConfigParameters = { + ...baseParams, + gemmaModelRouter: { + enabled: true, + }, + }; + const config = new Config(params); + const settings = config.getGemmaModelRouterSettings(); + expect(settings.enabled).toBe(true); + expect(settings.classifier?.host).toBe('http://localhost:9379'); + expect(settings.classifier?.model).toBe('gemma3-1b-gpu-custom'); + }); +}); + describe('setApprovalMode with folder trust', () => { const baseParams: ConfigParameters = { sessionId: 'test', @@ -2069,6 +2144,71 @@ describe('Config getHooks', () => { }); }); +describe('LocalLiteRtLmClient Lifecycle', () => { + const MODEL = 'gemini-pro'; + const SANDBOX: SandboxConfig = { + command: 'docker', + image: 'gemini-cli-sandbox', + }; + const TARGET_DIR = '/path/to/target'; + const DEBUG_MODE = false; + const QUESTION = 'test question'; + const USER_MEMORY = 'Test User Memory'; + const TELEMETRY_SETTINGS = { enabled: false }; + const EMBEDDING_MODEL = 'gemini-embedding'; + const SESSION_ID = 'test-session-id'; + const baseParams: ConfigParameters = { + cwd: '/tmp', + embeddingModel: EMBEDDING_MODEL, + sandbox: SANDBOX, + targetDir: TARGET_DIR, + debugMode: DEBUG_MODE, + question: QUESTION, + userMemory: USER_MEMORY, + telemetry: TELEMETRY_SETTINGS, + sessionId: SESSION_ID, + model: MODEL, + usageStatisticsEnabled: false, + }; + + beforeEach(() => { + vi.clearAllMocks(); + vi.mocked(getExperiments).mockResolvedValue({ + experimentIds: [], + flags: {}, + }); + }); + + it('should successfully initialize LocalLiteRtLmClient on first call and reuse it', () => { + const config = new Config(baseParams); + const client1 = config.getLocalLiteRtLmClient(); + const client2 = config.getLocalLiteRtLmClient(); + + expect(client1).toBeDefined(); + expect(client1).toBe(client2); // Should return the same instance + }); + + it('should configure LocalLiteRtLmClient with settings from getGemmaModelRouterSettings', () => { + const customHost = 'http://my-custom-host:9999'; + const customModel = 'my-custom-gemma-model'; + const params: ConfigParameters = { + ...baseParams, + gemmaModelRouter: { + enabled: true, + classifier: { + host: customHost, + model: customModel, + }, + }, + }; + + const config = new Config(params); + config.getLocalLiteRtLmClient(); + + expect(LocalLiteRtLmClient).toHaveBeenCalledWith(config); + }); +}); + describe('Config getExperiments', () => { const baseParams: ConfigParameters = { cwd: '/tmp', diff --git a/packages/core/src/config/config.ts b/packages/core/src/config/config.ts index 7297693b8e..2f5d452446 100644 --- a/packages/core/src/config/config.ts +++ b/packages/core/src/config/config.ts @@ -38,6 +38,7 @@ import { ExitPlanModeTool } from '../tools/exit-plan-mode.js'; import { EnterPlanModeTool } from '../tools/enter-plan-mode.js'; import { GeminiClient } from '../core/client.js'; import { BaseLlmClient } from '../core/baseLlmClient.js'; +import { LocalLiteRtLmClient } from '../core/localLiteRtLmClient.js'; import type { HookDefinition, HookEventName } from '../hooks/types.js'; import { FileDiscoveryService } from '../services/fileDiscoveryService.js'; import { GitService } from '../services/gitService.js'; @@ -178,6 +179,14 @@ export interface ToolOutputMaskingConfig { protectLatestTurn: boolean; } +export interface GemmaModelRouterSettings { + enabled?: boolean; + classifier?: { + host?: string; + model?: string; + }; +} + export interface ExtensionSetting { name: string; description: string; @@ -509,6 +518,7 @@ export interface ConfigParameters { directWebFetch?: boolean; policyUpdateConfirmationRequest?: PolicyUpdateConfirmationRequest; output?: OutputSettings; + gemmaModelRouter?: GemmaModelRouterSettings; disableModelRouterForAuth?: AuthType[]; continueOnFailedApiCall?: boolean; retryFetchErrors?: boolean; @@ -599,6 +609,7 @@ export class Config { private readonly usageStatisticsEnabled: boolean; private geminiClient!: GeminiClient; private baseLlmClient!: BaseLlmClient; + private localLiteRtLmClient?: LocalLiteRtLmClient; private modelRouterService: ModelRouterService; private readonly modelAvailabilityService: ModelAvailabilityService; private readonly fileFiltering: { @@ -694,6 +705,9 @@ export class Config { | PolicyUpdateConfirmationRequest | undefined; private readonly outputSettings: OutputSettings; + + private readonly gemmaModelRouter: GemmaModelRouterSettings; + private readonly continueOnFailedApiCall: boolean; private readonly retryFetchErrors: boolean; private readonly maxAttempts: number; @@ -942,6 +956,15 @@ export class Config { this.outputSettings = { format: params.output?.format ?? OutputFormat.TEXT, }; + this.gemmaModelRouter = { + enabled: params.gemmaModelRouter?.enabled ?? false, + classifier: { + host: + params.gemmaModelRouter?.classifier?.host ?? 'http://localhost:9379', + model: + params.gemmaModelRouter?.classifier?.model ?? 'gemma3-1b-gpu-custom', + }, + }; this.retryFetchErrors = params.retryFetchErrors ?? false; this.maxAttempts = Math.min( params.maxAttempts ?? DEFAULT_MAX_ATTEMPTS, @@ -1245,6 +1268,13 @@ export class Config { return this.baseLlmClient; } + getLocalLiteRtLmClient(): LocalLiteRtLmClient { + if (!this.localLiteRtLmClient) { + this.localLiteRtLmClient = new LocalLiteRtLmClient(this); + } + return this.localLiteRtLmClient; + } + getSessionId(): string { return this.sessionId; } @@ -2578,6 +2608,14 @@ export class Config { return this.enableHooksUI; } + getGemmaModelRouterEnabled(): boolean { + return this.gemmaModelRouter.enabled ?? false; + } + + getGemmaModelRouterSettings(): GemmaModelRouterSettings { + return this.gemmaModelRouter; + } + /** * Get override settings for a specific agent. * Reads from agents.overrides.. diff --git a/packages/core/src/core/localLiteRtLmClient.test.ts b/packages/core/src/core/localLiteRtLmClient.test.ts new file mode 100644 index 0000000000..c4398b5b9c --- /dev/null +++ b/packages/core/src/core/localLiteRtLmClient.test.ts @@ -0,0 +1,125 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { describe, it, expect, vi, beforeEach } from 'vitest'; +import { LocalLiteRtLmClient } from './localLiteRtLmClient.js'; +import type { Config } from '../config/config.js'; +const mockGenerateContent = vi.fn(); + +vi.mock('@google/genai', () => { + const GoogleGenAI = vi.fn().mockImplementation(() => ({ + models: { + generateContent: mockGenerateContent, + }, + })); + return { GoogleGenAI }; +}); + +describe('LocalLiteRtLmClient', () => { + let mockConfig: Config; + + beforeEach(() => { + vi.clearAllMocks(); + mockGenerateContent.mockClear(); + + mockConfig = { + getGemmaModelRouterSettings: vi.fn().mockReturnValue({ + classifier: { + host: 'http://test-host:1234', + model: 'gemma:latest', + }, + }), + } as unknown as Config; + }); + + it('should successfully call generateJson and return parsed JSON', async () => { + mockGenerateContent.mockResolvedValue({ + text: '{"key": "value"}', + }); + + const client = new LocalLiteRtLmClient(mockConfig); + const result = await client.generateJson([], 'test-instruction'); + + expect(result).toEqual({ key: 'value' }); + expect(mockGenerateContent).toHaveBeenCalledWith( + expect.objectContaining({ + model: 'gemma:latest', + config: expect.objectContaining({ + responseMimeType: 'application/json', + temperature: 0, + }), + }), + ); + }); + + it('should throw an error if the API response has no text', async () => { + mockGenerateContent.mockResolvedValue({ + text: null, + }); + + const client = new LocalLiteRtLmClient(mockConfig); + await expect(client.generateJson([], 'test-instruction')).rejects.toThrow( + 'Invalid response from Local Gemini API: No text found', + ); + }); + + it('should throw if the JSON is malformed', async () => { + mockGenerateContent.mockResolvedValue({ + text: `{ + “key”: ‘value’, +}`, // Smart quotes, trailing comma + }); + + const client = new LocalLiteRtLmClient(mockConfig); + await expect(client.generateJson([], 'test-instruction')).rejects.toThrow( + SyntaxError, + ); + }); + + it('should add reminder to the last user message', async () => { + mockGenerateContent.mockResolvedValue({ + text: '{"key": "value"}', + }); + + const client = new LocalLiteRtLmClient(mockConfig); + await client.generateJson( + [{ role: 'user', parts: [{ text: 'initial prompt' }] }], + 'test-instruction', + 'test-reminder', + ); + + const calledContents = + vi.mocked(mockGenerateContent).mock.calls[0][0].contents; + expect(calledContents.at(-1)?.parts[0].text).toBe( + `initial prompt + +test-reminder`, + ); + }); + + it('should pass abortSignal to generateContent', async () => { + mockGenerateContent.mockResolvedValue({ + text: '{"key": "value"}', + }); + + const client = new LocalLiteRtLmClient(mockConfig); + const controller = new AbortController(); + await client.generateJson( + [], + 'test-instruction', + undefined, + controller.signal, + ); + + expect(mockGenerateContent).toHaveBeenCalledWith( + expect.objectContaining({ + config: expect.objectContaining({ + abortSignal: controller.signal, + }), + }), + ); + }); +}); diff --git a/packages/core/src/core/localLiteRtLmClient.ts b/packages/core/src/core/localLiteRtLmClient.ts new file mode 100644 index 0000000000..8f4a020a50 --- /dev/null +++ b/packages/core/src/core/localLiteRtLmClient.ts @@ -0,0 +1,96 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { GoogleGenAI } from '@google/genai'; +import type { Config } from '../config/config.js'; +import { debugLogger } from '../utils/debugLogger.js'; +import type { Content } from '@google/genai'; + +/** + * A client for making single, non-streaming calls to a local Gemini-compatible API + * and expecting a JSON response. + */ +export class LocalLiteRtLmClient { + private readonly host: string; + private readonly model: string; + private readonly client: GoogleGenAI; + + constructor(config: Config) { + const gemmaModelRouterSettings = config.getGemmaModelRouterSettings(); + this.host = gemmaModelRouterSettings.classifier!.host!; + this.model = gemmaModelRouterSettings.classifier!.model!; + + this.client = new GoogleGenAI({ + // The LiteRT-LM server does not require an API key, but the SDK requires one to be set even for local endpoints. This is a dummy value and is not used for authentication. + apiKey: 'no-api-key-needed', + httpOptions: { + baseUrl: this.host, + // If the LiteRT-LM server is started but the wrong port is set, there will be a lengthy TCP timeout (here fixed to be 10 seconds). + // If the LiteRT-LM server is not started, there will be an immediate connection refusal. + // If the LiteRT-LM server is started and the model is unsupported or not downloaded, the server will return an error immediately. + // If the model's context window is exceeded, the server will return an error immediately. + timeout: 10000, + }, + }); + } + + /** + * Sends a prompt to the local Gemini model and expects a JSON object in response. + * @param contents The history and current prompt. + * @param systemInstruction The system prompt. + * @returns A promise that resolves to the parsed JSON object. + */ + async generateJson( + contents: Content[], + systemInstruction: string, + reminder?: string, + abortSignal?: AbortSignal, + ): Promise { + const geminiContents = contents.map((c) => ({ + role: c.role, + parts: c.parts ? c.parts.map((p) => ({ text: p.text })) : [], + })); + + if (reminder) { + const lastContent = geminiContents.at(-1); + if (lastContent?.role === 'user' && lastContent.parts?.[0]?.text) { + lastContent.parts[0].text += `\n\n${reminder}`; + } + } + + try { + const result = await this.client.models.generateContent({ + model: this.model, + contents: geminiContents, + config: { + responseMimeType: 'application/json', + systemInstruction: systemInstruction + ? { parts: [{ text: systemInstruction }] } + : undefined, + temperature: 0, + maxOutputTokens: 256, + abortSignal, + }, + }); + + const text = result.text; + if (!text) { + throw new Error( + 'Invalid response from Local Gemini API: No text found', + ); + } + + // eslint-disable-next-line @typescript-eslint/no-unsafe-return + return JSON.parse(result.text); + } catch (error) { + debugLogger.error( + `[LocalLiteRtLmClient] Failed to generate content:`, + error, + ); + throw error; + } + } +} diff --git a/packages/core/src/routing/modelRouterService.test.ts b/packages/core/src/routing/modelRouterService.test.ts index 144d8d3232..ad0e3c890e 100644 --- a/packages/core/src/routing/modelRouterService.test.ts +++ b/packages/core/src/routing/modelRouterService.test.ts @@ -9,6 +9,7 @@ import { ModelRouterService } from './modelRouterService.js'; import { Config } from '../config/config.js'; import type { BaseLlmClient } from '../core/baseLlmClient.js'; +import type { LocalLiteRtLmClient } from '../core/localLiteRtLmClient.js'; import type { RoutingContext, RoutingDecision } from './routingStrategy.js'; import { DefaultStrategy } from './strategies/defaultStrategy.js'; import { CompositeStrategy } from './strategies/compositeStrategy.js'; @@ -19,6 +20,7 @@ import { ClassifierStrategy } from './strategies/classifierStrategy.js'; import { NumericalClassifierStrategy } from './strategies/numericalClassifierStrategy.js'; import { logModelRouting } from '../telemetry/loggers.js'; import { ModelRoutingEvent } from '../telemetry/types.js'; +import { GemmaClassifierStrategy } from './strategies/gemmaClassifierStrategy.js'; import { ApprovalMode } from '../policy/types.js'; vi.mock('../config/config.js'); @@ -30,6 +32,7 @@ vi.mock('./strategies/overrideStrategy.js'); vi.mock('./strategies/approvalModeStrategy.js'); vi.mock('./strategies/classifierStrategy.js'); vi.mock('./strategies/numericalClassifierStrategy.js'); +vi.mock('./strategies/gemmaClassifierStrategy.js'); vi.mock('../telemetry/loggers.js'); vi.mock('../telemetry/types.js'); @@ -37,6 +40,7 @@ describe('ModelRouterService', () => { let service: ModelRouterService; let mockConfig: Config; let mockBaseLlmClient: BaseLlmClient; + let mockLocalLiteRtLmClient: LocalLiteRtLmClient; let mockContext: RoutingContext; let mockCompositeStrategy: CompositeStrategy; @@ -45,9 +49,20 @@ describe('ModelRouterService', () => { mockConfig = new Config({} as never); mockBaseLlmClient = {} as BaseLlmClient; + mockLocalLiteRtLmClient = {} as LocalLiteRtLmClient; vi.spyOn(mockConfig, 'getBaseLlmClient').mockReturnValue(mockBaseLlmClient); + vi.spyOn(mockConfig, 'getLocalLiteRtLmClient').mockReturnValue( + mockLocalLiteRtLmClient, + ); vi.spyOn(mockConfig, 'getNumericalRoutingEnabled').mockResolvedValue(false); vi.spyOn(mockConfig, 'getClassifierThreshold').mockResolvedValue(undefined); + vi.spyOn(mockConfig, 'getGemmaModelRouterSettings').mockReturnValue({ + enabled: false, + classifier: { + host: 'http://localhost:1234', + model: 'gemma3-1b-gpu-custom', + }, + }); vi.spyOn(mockConfig, 'getApprovalMode').mockReturnValue( ApprovalMode.DEFAULT, ); @@ -96,6 +111,36 @@ describe('ModelRouterService', () => { expect(compositeStrategyArgs[1]).toBe('agent-router'); }); + it('should include GemmaClassifierStrategy when enabled', () => { + // Override the default mock for this specific test + vi.spyOn(mockConfig, 'getGemmaModelRouterSettings').mockReturnValue({ + enabled: true, + classifier: { + host: 'http://localhost:1234', + model: 'gemma3-1b-gpu-custom', + }, + }); + + // Clear previous mock calls from beforeEach + vi.mocked(CompositeStrategy).mockClear(); + + // Re-initialize the service to pick up the new config + service = new ModelRouterService(mockConfig); + + const compositeStrategyArgs = vi.mocked(CompositeStrategy).mock.calls[0]; + const childStrategies = compositeStrategyArgs[0]; + + expect(childStrategies.length).toBe(7); + expect(childStrategies[0]).toBeInstanceOf(FallbackStrategy); + expect(childStrategies[1]).toBeInstanceOf(OverrideStrategy); + expect(childStrategies[2]).toBeInstanceOf(ApprovalModeStrategy); + expect(childStrategies[3]).toBeInstanceOf(GemmaClassifierStrategy); + expect(childStrategies[4]).toBeInstanceOf(ClassifierStrategy); + expect(childStrategies[5]).toBeInstanceOf(NumericalClassifierStrategy); + expect(childStrategies[6]).toBeInstanceOf(DefaultStrategy); + expect(compositeStrategyArgs[1]).toBe('agent-router'); + }); + describe('route()', () => { const strategyDecision: RoutingDecision = { model: 'strategy-chosen-model', @@ -117,6 +162,7 @@ describe('ModelRouterService', () => { mockContext, mockConfig, mockBaseLlmClient, + mockLocalLiteRtLmClient, ); expect(decision).toEqual(strategyDecision); }); diff --git a/packages/core/src/routing/modelRouterService.ts b/packages/core/src/routing/modelRouterService.ts index 54cfa72259..1bd19f3622 100644 --- a/packages/core/src/routing/modelRouterService.ts +++ b/packages/core/src/routing/modelRouterService.ts @@ -4,10 +4,12 @@ * SPDX-License-Identifier: Apache-2.0 */ +import { GemmaClassifierStrategy } from './strategies/gemmaClassifierStrategy.js'; import type { Config } from '../config/config.js'; import type { RoutingContext, RoutingDecision, + RoutingStrategy, TerminalStrategy, } from './routingStrategy.js'; import { DefaultStrategy } from './strategies/defaultStrategy.js'; @@ -35,17 +37,31 @@ export class ModelRouterService { } private initializeDefaultStrategy(): TerminalStrategy { - // Initialize the composite strategy with the desired priority order. - // The strategies are ordered in order of highest priority. + const strategies: RoutingStrategy[] = []; + + // Order matters here. Fallback and override are checked first. + strategies.push(new FallbackStrategy()); + strategies.push(new OverrideStrategy()); + + // Approval mode is next. + strategies.push(new ApprovalModeStrategy()); + + // Then, if enabled, the Gemma classifier is used. + if (this.config.getGemmaModelRouterSettings()?.enabled) { + strategies.push(new GemmaClassifierStrategy()); + } + + // The generic classifier is next. + strategies.push(new ClassifierStrategy()); + + // The numerical classifier is next. + strategies.push(new NumericalClassifierStrategy()); + + // The default strategy is the terminal strategy. + const terminalStrategy = new DefaultStrategy(); + return new CompositeStrategy( - [ - new FallbackStrategy(), - new OverrideStrategy(), - new ApprovalModeStrategy(), - new ClassifierStrategy(), - new NumericalClassifierStrategy(), - new DefaultStrategy(), - ], + [...strategies, terminalStrategy], 'agent-router', ); } @@ -75,6 +91,7 @@ export class ModelRouterService { context, this.config, this.config.getBaseLlmClient(), + this.config.getLocalLiteRtLmClient(), ); debugLogger.debug( diff --git a/packages/core/src/routing/routingStrategy.ts b/packages/core/src/routing/routingStrategy.ts index de8bcf04f1..a2f9448989 100644 --- a/packages/core/src/routing/routingStrategy.ts +++ b/packages/core/src/routing/routingStrategy.ts @@ -7,6 +7,7 @@ import type { Content, PartListUnion } from '@google/genai'; import type { BaseLlmClient } from '../core/baseLlmClient.js'; import type { Config } from '../config/config.js'; +import type { LocalLiteRtLmClient } from '../core/localLiteRtLmClient.js'; /** * The output of a routing decision. It specifies which model to use and why. @@ -58,6 +59,7 @@ export interface RoutingStrategy { context: RoutingContext, config: Config, baseLlmClient: BaseLlmClient, + localLiteRtLmClient: LocalLiteRtLmClient, ): Promise; } @@ -74,5 +76,6 @@ export interface TerminalStrategy extends RoutingStrategy { context: RoutingContext, config: Config, baseLlmClient: BaseLlmClient, + localLiteRtLmClient: LocalLiteRtLmClient, ): Promise; } diff --git a/packages/core/src/routing/strategies/classifierStrategy.test.ts b/packages/core/src/routing/strategies/classifierStrategy.test.ts index 7e024b790a..701e7de932 100644 --- a/packages/core/src/routing/strategies/classifierStrategy.test.ts +++ b/packages/core/src/routing/strategies/classifierStrategy.test.ts @@ -9,6 +9,7 @@ import { ClassifierStrategy } from './classifierStrategy.js'; import type { RoutingContext } from '../routingStrategy.js'; import type { Config } from '../../config/config.js'; import type { BaseLlmClient } from '../../core/baseLlmClient.js'; +import type { LocalLiteRtLmClient } from '../../core/localLiteRtLmClient.js'; import { isFunctionCall, isFunctionResponse, @@ -34,6 +35,7 @@ describe('ClassifierStrategy', () => { let mockContext: RoutingContext; let mockConfig: Config; let mockBaseLlmClient: BaseLlmClient; + let mockLocalLiteRtLmClient: LocalLiteRtLmClient; let mockResolvedConfig: ResolvedModelConfig; beforeEach(() => { @@ -64,6 +66,7 @@ describe('ClassifierStrategy', () => { mockBaseLlmClient = { generateJson: vi.fn(), } as unknown as BaseLlmClient; + mockLocalLiteRtLmClient = {} as LocalLiteRtLmClient; vi.spyOn(promptIdContext, 'getStore').mockReturnValue('test-prompt-id'); }); @@ -76,6 +79,7 @@ describe('ClassifierStrategy', () => { mockContext, mockConfig, mockBaseLlmClient, + mockLocalLiteRtLmClient, ); expect(decision).toBeNull(); @@ -94,6 +98,7 @@ describe('ClassifierStrategy', () => { mockContext, mockConfig, mockBaseLlmClient, + mockLocalLiteRtLmClient, ); expect(decision).not.toBeNull(); @@ -109,7 +114,12 @@ describe('ClassifierStrategy', () => { mockApiResponse, ); - await strategy.route(mockContext, mockConfig, mockBaseLlmClient); + await strategy.route( + mockContext, + mockConfig, + mockBaseLlmClient, + mockLocalLiteRtLmClient, + ); expect(mockBaseLlmClient.generateJson).toHaveBeenCalledWith( expect.objectContaining({ @@ -132,6 +142,7 @@ describe('ClassifierStrategy', () => { mockContext, mockConfig, mockBaseLlmClient, + mockLocalLiteRtLmClient, ); expect(mockBaseLlmClient.generateJson).toHaveBeenCalledOnce(); @@ -159,6 +170,7 @@ describe('ClassifierStrategy', () => { mockContext, mockConfig, mockBaseLlmClient, + mockLocalLiteRtLmClient, ); expect(mockBaseLlmClient.generateJson).toHaveBeenCalledOnce(); @@ -183,6 +195,7 @@ describe('ClassifierStrategy', () => { mockContext, mockConfig, mockBaseLlmClient, + mockLocalLiteRtLmClient, ); expect(decision).toBeNull(); @@ -206,6 +219,7 @@ describe('ClassifierStrategy', () => { mockContext, mockConfig, mockBaseLlmClient, + mockLocalLiteRtLmClient, ); expect(decision).toBeNull(); @@ -233,7 +247,12 @@ describe('ClassifierStrategy', () => { mockApiResponse, ); - await strategy.route(mockContext, mockConfig, mockBaseLlmClient); + await strategy.route( + mockContext, + mockConfig, + mockBaseLlmClient, + mockLocalLiteRtLmClient, + ); const generateJsonCall = vi.mocked(mockBaseLlmClient.generateJson).mock .calls[0][0]; @@ -269,7 +288,12 @@ describe('ClassifierStrategy', () => { mockApiResponse, ); - await strategy.route(mockContext, mockConfig, mockBaseLlmClient); + await strategy.route( + mockContext, + mockConfig, + mockBaseLlmClient, + mockLocalLiteRtLmClient, + ); const generateJsonCall = vi.mocked(mockBaseLlmClient.generateJson).mock .calls[0][0]; @@ -305,7 +329,12 @@ describe('ClassifierStrategy', () => { mockApiResponse, ); - await strategy.route(mockContext, mockConfig, mockBaseLlmClient); + await strategy.route( + mockContext, + mockConfig, + mockBaseLlmClient, + mockLocalLiteRtLmClient, + ); const generateJsonCall = vi.mocked(mockBaseLlmClient.generateJson).mock .calls[0][0]; @@ -340,6 +369,7 @@ describe('ClassifierStrategy', () => { contextWithRequestedModel, mockConfig, mockBaseLlmClient, + mockLocalLiteRtLmClient, ); expect(decision).not.toBeNull(); @@ -363,6 +393,7 @@ describe('ClassifierStrategy', () => { mockContext, mockConfig, mockBaseLlmClient, + mockLocalLiteRtLmClient, ); expect(decision?.model).toBe(PREVIEW_GEMINI_3_1_MODEL); @@ -386,6 +417,7 @@ describe('ClassifierStrategy', () => { mockContext, mockConfig, mockBaseLlmClient, + mockLocalLiteRtLmClient, ); expect(decision?.model).toBe(PREVIEW_GEMINI_3_1_CUSTOM_TOOLS_MODEL); diff --git a/packages/core/src/routing/strategies/classifierStrategy.ts b/packages/core/src/routing/strategies/classifierStrategy.ts index 7e54d161de..5fd6208b15 100644 --- a/packages/core/src/routing/strategies/classifierStrategy.ts +++ b/packages/core/src/routing/strategies/classifierStrategy.ts @@ -20,6 +20,7 @@ import { isFunctionResponse, } from '../../utils/messageInspectors.js'; import { debugLogger } from '../../utils/debugLogger.js'; +import type { LocalLiteRtLmClient } from '../../core/localLiteRtLmClient.js'; import { LlmRole } from '../../telemetry/types.js'; import { AuthType } from '../../core/contentGenerator.js'; @@ -132,6 +133,7 @@ export class ClassifierStrategy implements RoutingStrategy { context: RoutingContext, config: Config, baseLlmClient: BaseLlmClient, + _localLiteRtLmClient: LocalLiteRtLmClient, ): Promise { const startTime = Date.now(); try { diff --git a/packages/core/src/routing/strategies/compositeStrategy.test.ts b/packages/core/src/routing/strategies/compositeStrategy.test.ts index 1be0b8a8e3..5b627a1692 100644 --- a/packages/core/src/routing/strategies/compositeStrategy.test.ts +++ b/packages/core/src/routing/strategies/compositeStrategy.test.ts @@ -16,6 +16,7 @@ import type { Config } from '../../config/config.js'; import type { BaseLlmClient } from '../../core/baseLlmClient.js'; import { debugLogger } from '../../utils/debugLogger.js'; import { coreEvents } from '../../utils/events.js'; +import type { LocalLiteRtLmClient } from '../../core/localLiteRtLmClient.js'; vi.mock('../../utils/debugLogger.js', () => ({ debugLogger: { @@ -27,6 +28,7 @@ describe('CompositeStrategy', () => { let mockContext: RoutingContext; let mockConfig: Config; let mockBaseLlmClient: BaseLlmClient; + let mockLocalLiteRtLmClient: LocalLiteRtLmClient; let mockStrategy1: RoutingStrategy; let mockStrategy2: RoutingStrategy; let mockTerminalStrategy: TerminalStrategy; @@ -38,6 +40,7 @@ describe('CompositeStrategy', () => { mockContext = {} as RoutingContext; mockConfig = {} as Config; mockBaseLlmClient = {} as BaseLlmClient; + mockLocalLiteRtLmClient = {} as LocalLiteRtLmClient; emitFeedbackSpy = vi.spyOn(coreEvents, 'emitFeedback'); @@ -84,17 +87,20 @@ describe('CompositeStrategy', () => { mockContext, mockConfig, mockBaseLlmClient, + mockLocalLiteRtLmClient, ); expect(mockStrategy1.route).toHaveBeenCalledWith( mockContext, mockConfig, mockBaseLlmClient, + mockLocalLiteRtLmClient, ); expect(mockStrategy2.route).toHaveBeenCalledWith( mockContext, mockConfig, mockBaseLlmClient, + mockLocalLiteRtLmClient, ); expect(mockTerminalStrategy.route).not.toHaveBeenCalled(); @@ -112,6 +118,7 @@ describe('CompositeStrategy', () => { mockContext, mockConfig, mockBaseLlmClient, + mockLocalLiteRtLmClient, ); expect(mockStrategy1.route).toHaveBeenCalledTimes(1); @@ -136,6 +143,7 @@ describe('CompositeStrategy', () => { mockContext, mockConfig, mockBaseLlmClient, + mockLocalLiteRtLmClient, ); expect(debugLogger.warn).toHaveBeenCalledWith( @@ -152,7 +160,12 @@ describe('CompositeStrategy', () => { const composite = new CompositeStrategy([mockTerminalStrategy]); await expect( - composite.route(mockContext, mockConfig, mockBaseLlmClient), + composite.route( + mockContext, + mockConfig, + mockBaseLlmClient, + mockLocalLiteRtLmClient, + ), ).rejects.toThrow(terminalError); expect(emitFeedbackSpy).toHaveBeenCalledWith( @@ -182,6 +195,7 @@ describe('CompositeStrategy', () => { mockContext, mockConfig, mockBaseLlmClient, + mockLocalLiteRtLmClient, ); expect(result.model).toBe('some-model'); @@ -212,6 +226,7 @@ describe('CompositeStrategy', () => { mockContext, mockConfig, mockBaseLlmClient, + mockLocalLiteRtLmClient, ); expect(result.metadata.latencyMs).toBeGreaterThanOrEqual(0); diff --git a/packages/core/src/routing/strategies/compositeStrategy.ts b/packages/core/src/routing/strategies/compositeStrategy.ts index 29e6b96355..1706282864 100644 --- a/packages/core/src/routing/strategies/compositeStrategy.ts +++ b/packages/core/src/routing/strategies/compositeStrategy.ts @@ -14,6 +14,7 @@ import type { RoutingStrategy, TerminalStrategy, } from '../routingStrategy.js'; +import type { LocalLiteRtLmClient } from '../../core/localLiteRtLmClient.js'; /** * A strategy that attempts a list of child strategies in order (Chain of Responsibility). @@ -40,6 +41,7 @@ export class CompositeStrategy implements TerminalStrategy { context: RoutingContext, config: Config, baseLlmClient: BaseLlmClient, + localLiteRtLmClient: LocalLiteRtLmClient, ): Promise { const startTime = performance.now(); @@ -57,7 +59,12 @@ export class CompositeStrategy implements TerminalStrategy { // Try non-terminal strategies, allowing them to fail gracefully. for (const strategy of nonTerminalStrategies) { try { - const decision = await strategy.route(context, config, baseLlmClient); + const decision = await strategy.route( + context, + config, + baseLlmClient, + localLiteRtLmClient, + ); if (decision) { return this.finalizeDecision(decision, startTime); } @@ -75,6 +82,7 @@ export class CompositeStrategy implements TerminalStrategy { context, config, baseLlmClient, + localLiteRtLmClient, ); return this.finalizeDecision(decision, startTime); diff --git a/packages/core/src/routing/strategies/defaultStrategy.test.ts b/packages/core/src/routing/strategies/defaultStrategy.test.ts index ceec72d171..de27a84e19 100644 --- a/packages/core/src/routing/strategies/defaultStrategy.test.ts +++ b/packages/core/src/routing/strategies/defaultStrategy.test.ts @@ -8,6 +8,7 @@ import { describe, it, expect, vi } from 'vitest'; import { DefaultStrategy } from './defaultStrategy.js'; import type { RoutingContext } from '../routingStrategy.js'; import type { BaseLlmClient } from '../../core/baseLlmClient.js'; +import type { LocalLiteRtLmClient } from '../../core/localLiteRtLmClient.js'; import { DEFAULT_GEMINI_MODEL, PREVIEW_GEMINI_MODEL, @@ -26,8 +27,14 @@ describe('DefaultStrategy', () => { getModel: vi.fn().mockReturnValue(DEFAULT_GEMINI_MODEL_AUTO), } as unknown as Config; const mockClient = {} as BaseLlmClient; + const mockLocalLiteRtLmClient = {} as LocalLiteRtLmClient; - const decision = await strategy.route(mockContext, mockConfig, mockClient); + const decision = await strategy.route( + mockContext, + mockConfig, + mockClient, + mockLocalLiteRtLmClient, + ); expect(decision).toEqual({ model: DEFAULT_GEMINI_MODEL, @@ -46,8 +53,14 @@ describe('DefaultStrategy', () => { getModel: vi.fn().mockReturnValue(PREVIEW_GEMINI_MODEL_AUTO), } as unknown as Config; const mockClient = {} as BaseLlmClient; + const mockLocalLiteRtLmClient = {} as LocalLiteRtLmClient; - const decision = await strategy.route(mockContext, mockConfig, mockClient); + const decision = await strategy.route( + mockContext, + mockConfig, + mockClient, + mockLocalLiteRtLmClient, + ); expect(decision).toEqual({ model: PREVIEW_GEMINI_MODEL, @@ -66,8 +79,14 @@ describe('DefaultStrategy', () => { getModel: vi.fn().mockReturnValue(GEMINI_MODEL_ALIAS_AUTO), } as unknown as Config; const mockClient = {} as BaseLlmClient; + const mockLocalLiteRtLmClient = {} as LocalLiteRtLmClient; - const decision = await strategy.route(mockContext, mockConfig, mockClient); + const decision = await strategy.route( + mockContext, + mockConfig, + mockClient, + mockLocalLiteRtLmClient, + ); expect(decision).toEqual({ model: PREVIEW_GEMINI_MODEL, @@ -87,8 +106,14 @@ describe('DefaultStrategy', () => { getModel: vi.fn().mockReturnValue(PREVIEW_GEMINI_FLASH_MODEL), } as unknown as Config; const mockClient = {} as BaseLlmClient; + const mockLocalLiteRtLmClient = {} as LocalLiteRtLmClient; - const decision = await strategy.route(mockContext, mockConfig, mockClient); + const decision = await strategy.route( + mockContext, + mockConfig, + mockClient, + mockLocalLiteRtLmClient, + ); expect(decision).toEqual({ model: PREVIEW_GEMINI_FLASH_MODEL, diff --git a/packages/core/src/routing/strategies/defaultStrategy.ts b/packages/core/src/routing/strategies/defaultStrategy.ts index 1f5b7e54c2..d380ba7ad2 100644 --- a/packages/core/src/routing/strategies/defaultStrategy.ts +++ b/packages/core/src/routing/strategies/defaultStrategy.ts @@ -12,6 +12,7 @@ import type { TerminalStrategy, } from '../routingStrategy.js'; import { resolveModel } from '../../config/models.js'; +import type { LocalLiteRtLmClient } from '../../core/localLiteRtLmClient.js'; export class DefaultStrategy implements TerminalStrategy { readonly name = 'default'; @@ -20,6 +21,7 @@ export class DefaultStrategy implements TerminalStrategy { _context: RoutingContext, config: Config, _baseLlmClient: BaseLlmClient, + _localLiteRtLmClient: LocalLiteRtLmClient, ): Promise { const defaultModel = resolveModel( config.getModel(), diff --git a/packages/core/src/routing/strategies/fallbackStrategy.test.ts b/packages/core/src/routing/strategies/fallbackStrategy.test.ts index d0be7938c4..ffe2ed6446 100644 --- a/packages/core/src/routing/strategies/fallbackStrategy.test.ts +++ b/packages/core/src/routing/strategies/fallbackStrategy.test.ts @@ -10,6 +10,7 @@ import type { RoutingContext } from '../routingStrategy.js'; import type { BaseLlmClient } from '../../core/baseLlmClient.js'; import type { Config } from '../../config/config.js'; import type { ModelAvailabilityService } from '../../availability/modelAvailabilityService.js'; +import type { LocalLiteRtLmClient } from '../../core/localLiteRtLmClient.js'; import { DEFAULT_GEMINI_MODEL, DEFAULT_GEMINI_FLASH_MODEL, @@ -32,6 +33,7 @@ describe('FallbackStrategy', () => { const strategy = new FallbackStrategy(); const mockContext = {} as RoutingContext; const mockClient = {} as BaseLlmClient; + const mockLocalLiteRtLmClient = {} as LocalLiteRtLmClient; let mockService: ModelAvailabilityService; let mockConfig: Config; @@ -51,7 +53,12 @@ describe('FallbackStrategy', () => { // Mock snapshot to return available vi.mocked(mockService.snapshot).mockReturnValue({ available: true }); - const decision = await strategy.route(mockContext, mockConfig, mockClient); + const decision = await strategy.route( + mockContext, + mockConfig, + mockClient, + mockLocalLiteRtLmClient, + ); expect(decision).toBeNull(); // Should check availability of the resolved model (DEFAULT_GEMINI_MODEL) expect(mockService.snapshot).toHaveBeenCalledWith(DEFAULT_GEMINI_MODEL); @@ -69,7 +76,12 @@ describe('FallbackStrategy', () => { skipped: [], }); - const decision = await strategy.route(mockContext, mockConfig, mockClient); + const decision = await strategy.route( + mockContext, + mockConfig, + mockClient, + mockLocalLiteRtLmClient, + ); expect(decision).toBeNull(); }); @@ -86,7 +98,12 @@ describe('FallbackStrategy', () => { skipped: [{ model: DEFAULT_GEMINI_MODEL, reason: 'quota' }], }); - const decision = await strategy.route(mockContext, mockConfig, mockClient); + const decision = await strategy.route( + mockContext, + mockConfig, + mockClient, + mockLocalLiteRtLmClient, + ); expect(decision).not.toBeNull(); expect(decision?.model).toBe(DEFAULT_GEMINI_FLASH_MODEL); @@ -101,7 +118,12 @@ describe('FallbackStrategy', () => { vi.mocked(mockService.snapshot).mockReturnValue({ available: true }); vi.mocked(mockConfig.getModel).mockReturnValue(DEFAULT_GEMINI_MODEL_AUTO); - const decision = await strategy.route(mockContext, mockConfig, mockClient); + const decision = await strategy.route( + mockContext, + mockConfig, + mockClient, + mockLocalLiteRtLmClient, + ); expect(decision).toBeNull(); // Important: check that it queried snapshot with the RESOLVED model, not 'auto' @@ -122,6 +144,7 @@ describe('FallbackStrategy', () => { contextWithRequestedModel, mockConfig, mockClient, + mockLocalLiteRtLmClient, ); expect(decision).toBeNull(); diff --git a/packages/core/src/routing/strategies/fallbackStrategy.ts b/packages/core/src/routing/strategies/fallbackStrategy.ts index a18e4fc4dd..21a080e9da 100644 --- a/packages/core/src/routing/strategies/fallbackStrategy.ts +++ b/packages/core/src/routing/strategies/fallbackStrategy.ts @@ -13,6 +13,7 @@ import type { RoutingDecision, RoutingStrategy, } from '../routingStrategy.js'; +import type { LocalLiteRtLmClient } from '../../core/localLiteRtLmClient.js'; export class FallbackStrategy implements RoutingStrategy { readonly name = 'fallback'; @@ -21,6 +22,7 @@ export class FallbackStrategy implements RoutingStrategy { context: RoutingContext, config: Config, _baseLlmClient: BaseLlmClient, + _localLiteRtLmClient: LocalLiteRtLmClient, ): Promise { const requestedModel = context.requestedModel ?? config.getModel(); const resolvedModel = resolveModel( diff --git a/packages/core/src/routing/strategies/gemmaClassifierStrategy.test.ts b/packages/core/src/routing/strategies/gemmaClassifierStrategy.test.ts new file mode 100644 index 0000000000..9425208fd7 --- /dev/null +++ b/packages/core/src/routing/strategies/gemmaClassifierStrategy.test.ts @@ -0,0 +1,324 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import type { Mock } from 'vitest'; +import { describe, it, expect, vi, beforeEach } from 'vitest'; +import { GemmaClassifierStrategy } from './gemmaClassifierStrategy.js'; +import type { RoutingContext } from '../routingStrategy.js'; +import type { Config } from '../../config/config.js'; +import type { BaseLlmClient } from '../../core/baseLlmClient.js'; +import { + DEFAULT_GEMINI_FLASH_MODEL, + DEFAULT_GEMINI_MODEL, +} from '../../config/models.js'; +import type { Content } from '@google/genai'; +import { debugLogger } from '../../utils/debugLogger.js'; +import type { LocalLiteRtLmClient } from '../../core/localLiteRtLmClient.js'; + +vi.mock('../../core/localLiteRtLmClient.js'); + +describe('GemmaClassifierStrategy', () => { + let strategy: GemmaClassifierStrategy; + let mockContext: RoutingContext; + let mockConfig: Config; + let mockBaseLlmClient: BaseLlmClient; + let mockLocalLiteRtLmClient: LocalLiteRtLmClient; + let mockGenerateJson: Mock; + + beforeEach(() => { + vi.clearAllMocks(); + mockGenerateJson = vi.fn(); + + mockConfig = { + getGemmaModelRouterSettings: vi.fn().mockReturnValue({ + enabled: true, + classifier: { model: 'gemma3-1b-gpu-custom' }, + }), + getModel: () => DEFAULT_GEMINI_MODEL, + getPreviewFeatures: () => false, + } as unknown as Config; + + strategy = new GemmaClassifierStrategy(); + mockContext = { + history: [], + request: 'simple task', + signal: new AbortController().signal, + }; + + mockBaseLlmClient = {} as BaseLlmClient; + mockLocalLiteRtLmClient = { + generateJson: mockGenerateJson, + } as unknown as LocalLiteRtLmClient; + }); + + it('should return null if gemma model router is disabled', async () => { + vi.mocked(mockConfig.getGemmaModelRouterSettings).mockReturnValue({ + enabled: false, + }); + + const decision = await strategy.route( + mockContext, + mockConfig, + mockBaseLlmClient, + mockLocalLiteRtLmClient, + ); + expect(decision).toBeNull(); + }); + + it('should throw an error if the model is not gemma3-1b-gpu-custom', async () => { + vi.mocked(mockConfig.getGemmaModelRouterSettings).mockReturnValue({ + enabled: true, + classifier: { model: 'other-model' }, + }); + + await expect( + strategy.route( + mockContext, + mockConfig, + mockBaseLlmClient, + mockLocalLiteRtLmClient, + ), + ).rejects.toThrow('Only gemma3-1b-gpu-custom has been tested'); + }); + + it('should call generateJson with the correct parameters', async () => { + const mockApiResponse = { + reasoning: 'Simple task', + model_choice: 'flash', + }; + mockGenerateJson.mockResolvedValue(mockApiResponse); + + await strategy.route( + mockContext, + mockConfig, + mockBaseLlmClient, + mockLocalLiteRtLmClient, + ); + + expect(mockGenerateJson).toHaveBeenCalledWith( + expect.any(Array), + expect.any(String), + expect.any(String), + expect.any(AbortSignal), + ); + }); + + it('should route to FLASH model for a simple task', async () => { + const mockApiResponse = { + reasoning: 'This is a simple task.', + model_choice: 'flash', + }; + mockGenerateJson.mockResolvedValue(mockApiResponse); + + const decision = await strategy.route( + mockContext, + mockConfig, + mockBaseLlmClient, + mockLocalLiteRtLmClient, + ); + + expect(mockGenerateJson).toHaveBeenCalledOnce(); + expect(decision).toEqual({ + model: DEFAULT_GEMINI_FLASH_MODEL, + metadata: { + source: 'GemmaClassifier', + latencyMs: expect.any(Number), + reasoning: mockApiResponse.reasoning, + }, + }); + }); + + it('should route to PRO model for a complex task', async () => { + const mockApiResponse = { + reasoning: 'This is a complex task.', + model_choice: 'pro', + }; + mockGenerateJson.mockResolvedValue(mockApiResponse); + mockContext.request = 'how do I build a spaceship?'; + + const decision = await strategy.route( + mockContext, + mockConfig, + mockBaseLlmClient, + mockLocalLiteRtLmClient, + ); + + expect(mockGenerateJson).toHaveBeenCalledOnce(); + expect(decision).toEqual({ + model: DEFAULT_GEMINI_MODEL, + metadata: { + source: 'GemmaClassifier', + latencyMs: expect.any(Number), + reasoning: mockApiResponse.reasoning, + }, + }); + }); + + it('should return null if the classifier API call fails', async () => { + const consoleWarnSpy = vi + .spyOn(debugLogger, 'warn') + .mockImplementation(() => {}); + const testError = new Error('API Failure'); + mockGenerateJson.mockRejectedValue(testError); + + const decision = await strategy.route( + mockContext, + mockConfig, + mockBaseLlmClient, + mockLocalLiteRtLmClient, + ); + + expect(decision).toBeNull(); + expect(consoleWarnSpy).toHaveBeenCalled(); + consoleWarnSpy.mockRestore(); + }); + + it('should return null if the classifier returns a malformed JSON object', async () => { + const consoleWarnSpy = vi + .spyOn(debugLogger, 'warn') + .mockImplementation(() => {}); + const malformedApiResponse = { + reasoning: 'This is a simple task.', + // model_choice is missing, which will cause a Zod parsing error. + }; + mockGenerateJson.mockResolvedValue(malformedApiResponse); + + const decision = await strategy.route( + mockContext, + mockConfig, + mockBaseLlmClient, + mockLocalLiteRtLmClient, + ); + + expect(decision).toBeNull(); + expect(consoleWarnSpy).toHaveBeenCalled(); + consoleWarnSpy.mockRestore(); + }); + + it('should filter out tool-related history before sending to classifier', async () => { + mockContext.history = [ + { role: 'user', parts: [{ text: 'call a tool' }] }, + { + role: 'model', + parts: [{ functionCall: { name: 'test_tool', args: {} } }], + }, + { + role: 'user', + parts: [ + { functionResponse: { name: 'test_tool', response: { ok: true } } }, + ], + }, + { role: 'user', parts: [{ text: 'another user turn' }] }, + ]; + const mockApiResponse = { + reasoning: 'Simple.', + model_choice: 'flash', + }; + mockGenerateJson.mockResolvedValue(mockApiResponse); + + await strategy.route( + mockContext, + mockConfig, + mockBaseLlmClient, + mockLocalLiteRtLmClient, + ); + + // Define a type for the arguments passed to the mock `generateJson` + type GenerateJsonCall = [Content[], string, string | undefined]; + const calls = mockGenerateJson.mock.calls as GenerateJsonCall[]; + const contents = calls[0][0]; + const lastTurn = contents.at(-1); + expect(lastTurn).toBeDefined(); + if (!lastTurn?.parts) { + // Fail test if parts is not defined. + expect(lastTurn?.parts).toBeDefined(); + return; + } + const expectedLastTurn = `You are provided with a **Chat History** and the user's **Current Request** below. + +#### Chat History: +call a tool + +another user turn + +#### Current Request: +"simple task" +`; + expect(lastTurn.parts.at(0)?.text).toEqual(expectedLastTurn); + }); + + it('should respect HISTORY_SEARCH_WINDOW and HISTORY_TURNS_FOR_CONTEXT', async () => { + const longHistory: Content[] = []; + for (let i = 0; i < 30; i++) { + longHistory.push({ role: 'user', parts: [{ text: `Message ${i}` }] }); + // Add noise that should be filtered + if (i % 2 === 0) { + longHistory.push({ + role: 'model', + parts: [{ functionCall: { name: 'noise', args: {} } }], + }); + } + } + mockContext.history = longHistory; + const mockApiResponse = { + reasoning: 'Simple.', + model_choice: 'flash', + }; + mockGenerateJson.mockResolvedValue(mockApiResponse); + + await strategy.route( + mockContext, + mockConfig, + mockBaseLlmClient, + mockLocalLiteRtLmClient, + ); + + const generateJsonCall = mockGenerateJson.mock.calls[0][0]; + + // There should be 1 item which is the flattened history. + expect(generateJsonCall).toHaveLength(1); + }); + + it('should filter out non-text parts from history', async () => { + mockContext.history = [ + { role: 'user', parts: [{ text: 'first message' }] }, + // This part has no `text` property and should be filtered out. + { role: 'user', parts: [{}] } as Content, + { role: 'user', parts: [{ text: 'second message' }] }, + ]; + const mockApiResponse = { + reasoning: 'Simple.', + model_choice: 'flash', + }; + mockGenerateJson.mockResolvedValue(mockApiResponse); + + await strategy.route( + mockContext, + mockConfig, + mockBaseLlmClient, + mockLocalLiteRtLmClient, + ); + + type GenerateJsonCall = [Content[], string, string | undefined]; + const calls = mockGenerateJson.mock.calls as GenerateJsonCall[]; + const contents = calls[0][0]; + const lastTurn = contents.at(-1); + expect(lastTurn).toBeDefined(); + + const expectedLastTurn = `You are provided with a **Chat History** and the user's **Current Request** below. + +#### Chat History: +first message + +second message + +#### Current Request: +"simple task" +`; + + expect(lastTurn!.parts!.at(0)!.text).toEqual(expectedLastTurn); + }); +}); diff --git a/packages/core/src/routing/strategies/gemmaClassifierStrategy.ts b/packages/core/src/routing/strategies/gemmaClassifierStrategy.ts new file mode 100644 index 0000000000..f1175cc101 --- /dev/null +++ b/packages/core/src/routing/strategies/gemmaClassifierStrategy.ts @@ -0,0 +1,232 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { z } from 'zod'; + +import type { BaseLlmClient } from '../../core/baseLlmClient.js'; +import type { + RoutingContext, + RoutingDecision, + RoutingStrategy, +} from '../routingStrategy.js'; +import { resolveClassifierModel } from '../../config/models.js'; +import { createUserContent, type Content, type Part } from '@google/genai'; +import type { Config } from '../../config/config.js'; +import { + isFunctionCall, + isFunctionResponse, +} from '../../utils/messageInspectors.js'; +import { debugLogger } from '../../utils/debugLogger.js'; +import type { LocalLiteRtLmClient } from '../../core/localLiteRtLmClient.js'; + +// The number of recent history turns to provide to the router for context. +const HISTORY_TURNS_FOR_CONTEXT = 4; +const HISTORY_SEARCH_WINDOW = 20; + +const FLASH_MODEL = 'flash'; +const PRO_MODEL = 'pro'; + +const COMPLEXITY_RUBRIC = `### Complexity Rubric +A task is COMPLEX (Choose \`${PRO_MODEL}\`) if it meets ONE OR MORE of the following criteria: +1. **High Operational Complexity (Est. 4+ Steps/Tool Calls):** Requires dependent actions, significant planning, or multiple coordinated changes. +2. **Strategic Planning & Conceptual Design:** Asking "how" or "why." Requires advice, architecture, or high-level strategy. +3. **High Ambiguity or Large Scope (Extensive Investigation):** Broadly defined requests requiring extensive investigation. +4. **Deep Debugging & Root Cause Analysis:** Diagnosing unknown or complex problems from symptoms. +A task is SIMPLE (Choose \`${FLASH_MODEL}\`) if it is highly specific, bounded, and has Low Operational Complexity (Est. 1-3 tool calls). Operational simplicity overrides strategic phrasing.`; + +const OUTPUT_FORMAT = `### Output Format +Respond *only* in JSON format like this: +{ + "reasoning": Your reasoning... + "model_choice": Either ${FLASH_MODEL} or ${PRO_MODEL} +} +And you must follow the following JSON schema: +{ + "type": "object", + "properties": { + "reasoning": { + "type": "string", + "description": "A brief summary of the user objective, followed by a step-by-step explanation for the model choice, referencing the rubric." + }, + "model_choice": { + "type": "string", + "enum": ["${FLASH_MODEL}", "${PRO_MODEL}"] + } + }, + "required": ["reasoning", "model_choice"] +} +You must ensure that your reasoning is no more than 2 sentences long and directly references the rubric criteria. +When making your decision, the user's request should be weighted much more heavily than the surrounding context when making your determination.`; + +const LITERT_GEMMA_CLASSIFIER_SYSTEM_PROMPT = `### Role +You are the **Lead Orchestrator** for an AI system. You do not talk to users. Your sole responsibility is to analyze the **Chat History** and delegate the **Current Request** to the most appropriate **Model** based on the request's complexity. + +### Models +Choose between \`${FLASH_MODEL}\` (SIMPLE) or \`${PRO_MODEL}\` (COMPLEX). +1. \`${FLASH_MODEL}\`: A fast, efficient model for simple, well-defined tasks. +2. \`${PRO_MODEL}\`: A powerful, advanced model for complex, open-ended, or multi-step tasks. + +${COMPLEXITY_RUBRIC} + +${OUTPUT_FORMAT} + +### Examples +**Example 1 (Strategic Planning):** +*User Prompt:* "How should I architect the data pipeline for this new analytics service?" +*Your JSON Output:* +{ + "reasoning": "The user is asking for high-level architectural design and strategy. This falls under 'Strategic Planning & Conceptual Design'.", + "model_choice": "${PRO_MODEL}" +} +**Example 2 (Simple Tool Use):** +*User Prompt:* "list the files in the current directory" +*Your JSON Output:* +{ + "reasoning": "This is a direct command requiring a single tool call (ls). It has Low Operational Complexity (1 step).", + "model_choice": "${FLASH_MODEL}" +} +**Example 3 (High Operational Complexity):** +*User Prompt:* "I need to add a new 'email' field to the User schema in 'src/models/user.ts', migrate the database, and update the registration endpoint." +*Your JSON Output:* +{ + "reasoning": "This request involves multiple coordinated steps across different files and systems. This meets the criteria for High Operational Complexity (4+ steps).", + "model_choice": "${PRO_MODEL}" +} +**Example 4 (Simple Read):** +*User Prompt:* "Read the contents of 'package.json'." +*Your JSON Output:* +{ + "reasoning": "This is a direct command requiring a single read. It has Low Operational Complexity (1 step).", + "model_choice": "${FLASH_MODEL}" +} +**Example 5 (Deep Debugging):** +*User Prompt:* "I'm getting an error 'Cannot read property 'map' of undefined' when I click the save button. Can you fix it?" +*Your JSON Output:* +{ + "reasoning": "The user is reporting an error symptom without a known cause. This requires investigation and falls under 'Deep Debugging'.", + "model_choice": "${PRO_MODEL}" +} +**Example 6 (Simple Edit despite Phrasing):** +*User Prompt:* "What is the best way to rename the variable 'data' to 'userData' in 'src/utils.js'?" +*Your JSON Output:* +{ + "reasoning": "Although the user uses strategic language ('best way'), the underlying task is a localized edit. The operational complexity is low (1-2 steps).", + "model_choice": "${FLASH_MODEL}" +} +`; + +const LITERT_GEMMA_CLASSIFIER_REMINDER = `### Reminder +You are a Task Routing AI. Your sole task is to analyze the preceding **Chat History** and **Current Request** and classify its complexity. + +${COMPLEXITY_RUBRIC} + +${OUTPUT_FORMAT} +`; + +const ClassifierResponseSchema = z.object({ + reasoning: z.string(), + model_choice: z.enum([FLASH_MODEL, PRO_MODEL]), +}); + +export class GemmaClassifierStrategy implements RoutingStrategy { + readonly name = 'gemma-classifier'; + + private flattenChatHistory(turns: Content[]): Content[] { + const formattedHistory = turns + .slice(0, -1) + .map((turn) => + turn.parts + ? turn.parts + .map((part) => part.text) + .filter(Boolean) + .join('\n') + : '', + ) + .filter(Boolean) + .join('\n\n'); + + const lastTurn = turns.at(-1); + const userRequest = + lastTurn?.parts + ?.map((part: Part) => part.text) + .filter(Boolean) + .join('\n\n') ?? ''; + + const finalPrompt = `You are provided with a **Chat History** and the user's **Current Request** below. + +#### Chat History: +${formattedHistory} + +#### Current Request: +"${userRequest}" +`; + return [createUserContent(finalPrompt)]; + } + + async route( + context: RoutingContext, + config: Config, + _baseLlmClient: BaseLlmClient, + client: LocalLiteRtLmClient, + ): Promise { + const startTime = Date.now(); + const gemmaRouterSettings = config.getGemmaModelRouterSettings(); + if (!gemmaRouterSettings?.enabled) { + return null; + } + + // Only the gemma3-1b-gpu-custom model has been tested and verified. + if (gemmaRouterSettings.classifier?.model !== 'gemma3-1b-gpu-custom') { + throw new Error('Only gemma3-1b-gpu-custom has been tested'); + } + + try { + const historySlice = context.history.slice(-HISTORY_SEARCH_WINDOW); + + // Filter out tool-related turns. + // TODO - Consider using function req/res if they help accuracy. + const cleanHistory = historySlice.filter( + (content) => !isFunctionCall(content) && !isFunctionResponse(content), + ); + + // Take the last N turns from the *cleaned* history. + const finalHistory = cleanHistory.slice(-HISTORY_TURNS_FOR_CONTEXT); + + const history = [...finalHistory, createUserContent(context.request)]; + const singleMessageHistory = this.flattenChatHistory(history); + + const jsonResponse = await client.generateJson( + singleMessageHistory, + LITERT_GEMMA_CLASSIFIER_SYSTEM_PROMPT, + LITERT_GEMMA_CLASSIFIER_REMINDER, + context.signal, + ); + + const routerResponse = ClassifierResponseSchema.parse(jsonResponse); + + const reasoning = routerResponse.reasoning; + const latencyMs = Date.now() - startTime; + const selectedModel = resolveClassifierModel( + context.requestedModel ?? config.getModel(), + routerResponse.model_choice, + ); + + return { + model: selectedModel, + metadata: { + source: 'GemmaClassifier', + latencyMs, + reasoning, + }, + }; + } catch (error) { + // If the classifier fails for any reason (API error, parsing error, etc.), + // we log it and return null to allow the composite strategy to proceed. + debugLogger.warn(`[Routing] GemmaClassifierStrategy failed:`, error); + return null; + } + } +} diff --git a/packages/core/src/routing/strategies/numericalClassifierStrategy.test.ts b/packages/core/src/routing/strategies/numericalClassifierStrategy.test.ts index b8f6c50282..77fc69a218 100644 --- a/packages/core/src/routing/strategies/numericalClassifierStrategy.test.ts +++ b/packages/core/src/routing/strategies/numericalClassifierStrategy.test.ts @@ -22,6 +22,7 @@ import { promptIdContext } from '../../utils/promptIdContext.js'; import type { Content } from '@google/genai'; import type { ResolvedModelConfig } from '../../services/modelConfigService.js'; import { debugLogger } from '../../utils/debugLogger.js'; +import type { LocalLiteRtLmClient } from '../../core/localLiteRtLmClient.js'; import { AuthType } from '../../core/contentGenerator.js'; vi.mock('../../core/baseLlmClient.js'); @@ -31,6 +32,7 @@ describe('NumericalClassifierStrategy', () => { let mockContext: RoutingContext; let mockConfig: Config; let mockBaseLlmClient: BaseLlmClient; + let mockLocalLiteRtLmClient: LocalLiteRtLmClient; let mockResolvedConfig: ResolvedModelConfig; beforeEach(() => { @@ -63,6 +65,7 @@ describe('NumericalClassifierStrategy', () => { mockBaseLlmClient = { generateJson: vi.fn(), } as unknown as BaseLlmClient; + mockLocalLiteRtLmClient = {} as LocalLiteRtLmClient; vi.spyOn(promptIdContext, 'getStore').mockReturnValue('test-prompt-id'); }); @@ -78,6 +81,7 @@ describe('NumericalClassifierStrategy', () => { mockContext, mockConfig, mockBaseLlmClient, + mockLocalLiteRtLmClient, ); expect(decision).toBeNull(); @@ -91,6 +95,7 @@ describe('NumericalClassifierStrategy', () => { mockContext, mockConfig, mockBaseLlmClient, + mockLocalLiteRtLmClient, ); expect(decision).toBeNull(); @@ -104,6 +109,7 @@ describe('NumericalClassifierStrategy', () => { mockContext, mockConfig, mockBaseLlmClient, + mockLocalLiteRtLmClient, ); expect(decision).toBeNull(); @@ -119,7 +125,12 @@ describe('NumericalClassifierStrategy', () => { mockApiResponse, ); - await strategy.route(mockContext, mockConfig, mockBaseLlmClient); + await strategy.route( + mockContext, + mockConfig, + mockBaseLlmClient, + mockLocalLiteRtLmClient, + ); const generateJsonCall = vi.mocked(mockBaseLlmClient.generateJson).mock .calls[0][0]; @@ -151,6 +162,7 @@ describe('NumericalClassifierStrategy', () => { mockContext, mockConfig, mockBaseLlmClient, + mockLocalLiteRtLmClient, ); expect(decision).toEqual({ @@ -177,6 +189,7 @@ describe('NumericalClassifierStrategy', () => { mockContext, mockConfig, mockBaseLlmClient, + mockLocalLiteRtLmClient, ); expect(decision).toEqual({ @@ -203,6 +216,7 @@ describe('NumericalClassifierStrategy', () => { mockContext, mockConfig, mockBaseLlmClient, + mockLocalLiteRtLmClient, ); expect(decision).toEqual({ @@ -229,6 +243,7 @@ describe('NumericalClassifierStrategy', () => { mockContext, mockConfig, mockBaseLlmClient, + mockLocalLiteRtLmClient, ); expect(decision).toEqual({ @@ -257,6 +272,7 @@ describe('NumericalClassifierStrategy', () => { mockContext, mockConfig, mockBaseLlmClient, + mockLocalLiteRtLmClient, ); expect(decision).toEqual({ @@ -283,6 +299,7 @@ describe('NumericalClassifierStrategy', () => { mockContext, mockConfig, mockBaseLlmClient, + mockLocalLiteRtLmClient, ); expect(decision).toEqual({ @@ -309,6 +326,7 @@ describe('NumericalClassifierStrategy', () => { mockContext, mockConfig, mockBaseLlmClient, + mockLocalLiteRtLmClient, ); expect(decision).toEqual({ @@ -337,6 +355,7 @@ describe('NumericalClassifierStrategy', () => { mockContext, mockConfig, mockBaseLlmClient, + mockLocalLiteRtLmClient, ); expect(decision).toEqual({ @@ -364,6 +383,7 @@ describe('NumericalClassifierStrategy', () => { mockContext, mockConfig, mockBaseLlmClient, + mockLocalLiteRtLmClient, ); expect(decision).toEqual({ @@ -391,6 +411,7 @@ describe('NumericalClassifierStrategy', () => { mockContext, mockConfig, mockBaseLlmClient, + mockLocalLiteRtLmClient, ); expect(decision).toEqual({ @@ -415,6 +436,7 @@ describe('NumericalClassifierStrategy', () => { mockContext, mockConfig, mockBaseLlmClient, + mockLocalLiteRtLmClient, ); expect(decision).toBeNull(); @@ -437,6 +459,7 @@ describe('NumericalClassifierStrategy', () => { mockContext, mockConfig, mockBaseLlmClient, + mockLocalLiteRtLmClient, ); expect(decision).toBeNull(); @@ -463,7 +486,12 @@ describe('NumericalClassifierStrategy', () => { mockApiResponse, ); - await strategy.route(mockContext, mockConfig, mockBaseLlmClient); + await strategy.route( + mockContext, + mockConfig, + mockBaseLlmClient, + mockLocalLiteRtLmClient, + ); const generateJsonCall = vi.mocked(mockBaseLlmClient.generateJson).mock .calls[0][0]; @@ -495,7 +523,12 @@ describe('NumericalClassifierStrategy', () => { mockApiResponse, ); - await strategy.route(mockContext, mockConfig, mockBaseLlmClient); + await strategy.route( + mockContext, + mockConfig, + mockBaseLlmClient, + mockLocalLiteRtLmClient, + ); const generateJsonCall = vi.mocked(mockBaseLlmClient.generateJson).mock .calls[0][0]; @@ -528,7 +561,12 @@ describe('NumericalClassifierStrategy', () => { mockApiResponse, ); - await strategy.route(mockContext, mockConfig, mockBaseLlmClient); + await strategy.route( + mockContext, + mockConfig, + mockBaseLlmClient, + mockLocalLiteRtLmClient, + ); const generateJsonCall = vi.mocked(mockBaseLlmClient.generateJson).mock .calls[0][0]; @@ -558,6 +596,7 @@ describe('NumericalClassifierStrategy', () => { mockContext, mockConfig, mockBaseLlmClient, + mockLocalLiteRtLmClient, ); expect(decision?.model).toBe(PREVIEW_GEMINI_3_1_MODEL); @@ -579,6 +618,7 @@ describe('NumericalClassifierStrategy', () => { mockContext, mockConfig, mockBaseLlmClient, + mockLocalLiteRtLmClient, ); expect(decision?.model).toBe(PREVIEW_GEMINI_3_1_CUSTOM_TOOLS_MODEL); @@ -601,6 +641,7 @@ describe('NumericalClassifierStrategy', () => { mockContext, mockConfig, mockBaseLlmClient, + mockLocalLiteRtLmClient, ); expect(decision?.model).toBe(PREVIEW_GEMINI_3_1_MODEL); diff --git a/packages/core/src/routing/strategies/numericalClassifierStrategy.ts b/packages/core/src/routing/strategies/numericalClassifierStrategy.ts index 32cc6ccbb7..39805fb43c 100644 --- a/packages/core/src/routing/strategies/numericalClassifierStrategy.ts +++ b/packages/core/src/routing/strategies/numericalClassifierStrategy.ts @@ -16,6 +16,7 @@ import { resolveClassifierModel, isGemini3Model } from '../../config/models.js'; import { createUserContent, Type } from '@google/genai'; import type { Config } from '../../config/config.js'; import { debugLogger } from '../../utils/debugLogger.js'; +import type { LocalLiteRtLmClient } from '../../core/localLiteRtLmClient.js'; import { LlmRole } from '../../telemetry/types.js'; import { AuthType } from '../../core/contentGenerator.js'; @@ -133,6 +134,7 @@ export class NumericalClassifierStrategy implements RoutingStrategy { context: RoutingContext, config: Config, baseLlmClient: BaseLlmClient, + _localLiteRtLmClient: LocalLiteRtLmClient, ): Promise { const startTime = Date.now(); try { diff --git a/packages/core/src/routing/strategies/overrideStrategy.test.ts b/packages/core/src/routing/strategies/overrideStrategy.test.ts index 73c1aeec62..804ee8f962 100644 --- a/packages/core/src/routing/strategies/overrideStrategy.test.ts +++ b/packages/core/src/routing/strategies/overrideStrategy.test.ts @@ -10,18 +10,25 @@ import type { RoutingContext } from '../routingStrategy.js'; import type { BaseLlmClient } from '../../core/baseLlmClient.js'; import type { Config } from '../../config/config.js'; import { DEFAULT_GEMINI_MODEL_AUTO } from '../../config/models.js'; +import type { LocalLiteRtLmClient } from '../../core/localLiteRtLmClient.js'; describe('OverrideStrategy', () => { const strategy = new OverrideStrategy(); const mockContext = {} as RoutingContext; const mockClient = {} as BaseLlmClient; + const mockLocalLiteRtLmClient = {} as LocalLiteRtLmClient; it('should return null when the override model is auto', async () => { const mockConfig = { getModel: () => DEFAULT_GEMINI_MODEL_AUTO, } as Config; - const decision = await strategy.route(mockContext, mockConfig, mockClient); + const decision = await strategy.route( + mockContext, + mockConfig, + mockClient, + mockLocalLiteRtLmClient, + ); expect(decision).toBeNull(); }); @@ -31,7 +38,12 @@ describe('OverrideStrategy', () => { getModel: () => overrideModel, } as Config; - const decision = await strategy.route(mockContext, mockConfig, mockClient); + const decision = await strategy.route( + mockContext, + mockConfig, + mockClient, + mockLocalLiteRtLmClient, + ); expect(decision).not.toBeNull(); expect(decision?.model).toBe(overrideModel); @@ -48,7 +60,12 @@ describe('OverrideStrategy', () => { getModel: () => overrideModel, } as Config; - const decision = await strategy.route(mockContext, mockConfig, mockClient); + const decision = await strategy.route( + mockContext, + mockConfig, + mockClient, + mockLocalLiteRtLmClient, + ); expect(decision).not.toBeNull(); expect(decision?.model).toBe(overrideModel); @@ -68,6 +85,7 @@ describe('OverrideStrategy', () => { contextWithRequestedModel, mockConfig, mockClient, + mockLocalLiteRtLmClient, ); expect(decision).not.toBeNull(); diff --git a/packages/core/src/routing/strategies/overrideStrategy.ts b/packages/core/src/routing/strategies/overrideStrategy.ts index 5101ba9fe7..9a89d2af70 100644 --- a/packages/core/src/routing/strategies/overrideStrategy.ts +++ b/packages/core/src/routing/strategies/overrideStrategy.ts @@ -12,6 +12,7 @@ import type { RoutingDecision, RoutingStrategy, } from '../routingStrategy.js'; +import type { LocalLiteRtLmClient } from '../../core/localLiteRtLmClient.js'; /** * Handles cases where the user explicitly specifies a model (override). @@ -23,6 +24,7 @@ export class OverrideStrategy implements RoutingStrategy { context: RoutingContext, config: Config, _baseLlmClient: BaseLlmClient, + _localLiteRtLmClient: LocalLiteRtLmClient, ): Promise { const overrideModel = context.requestedModel ?? config.getModel(); diff --git a/schemas/settings.schema.json b/schemas/settings.schema.json index 059584a73f..51bf9c84e2 100644 --- a/schemas/settings.schema.json +++ b/schemas/settings.schema.json @@ -1694,6 +1694,47 @@ "markdownDescription": "Enable web fetch behavior that bypasses LLM summarization.\n\n- Category: `Experimental`\n- Requires restart: `yes`\n- Default: `false`", "default": false, "type": "boolean" + }, + "gemmaModelRouter": { + "title": "Gemma Model Router", + "description": "Enable Gemma model router (experimental).", + "markdownDescription": "Enable Gemma model router (experimental).\n\n- Category: `Experimental`\n- Requires restart: `yes`\n- Default: `{}`", + "default": {}, + "type": "object", + "properties": { + "enabled": { + "title": "Enable Gemma Model Router", + "description": "Enable the Gemma Model Router. Requires a local endpoint serving Gemma via the Gemini API using LiteRT-LM shim.", + "markdownDescription": "Enable the Gemma Model Router. Requires a local endpoint serving Gemma via the Gemini API using LiteRT-LM shim.\n\n- Category: `Experimental`\n- Requires restart: `yes`\n- Default: `false`", + "default": false, + "type": "boolean" + }, + "classifier": { + "title": "Classifier", + "description": "Classifier configuration.", + "markdownDescription": "Classifier configuration.\n\n- Category: `Experimental`\n- Requires restart: `yes`\n- Default: `{}`", + "default": {}, + "type": "object", + "properties": { + "host": { + "title": "Host", + "description": "The host of the classifier.", + "markdownDescription": "The host of the classifier.\n\n- Category: `Experimental`\n- Requires restart: `yes`\n- Default: `http://localhost:9379`", + "default": "http://localhost:9379", + "type": "string" + }, + "model": { + "title": "Model", + "description": "The model to use for the classifier. Only tested on `gemma3-1b-gpu-custom`.", + "markdownDescription": "The model to use for the classifier. Only tested on `gemma3-1b-gpu-custom`.\n\n- Category: `Experimental`\n- Requires restart: `yes`\n- Default: `gemma3-1b-gpu-custom`", + "default": "gemma3-1b-gpu-custom", + "type": "string" + } + }, + "additionalProperties": false + } + }, + "additionalProperties": false } }, "additionalProperties": false From bc622a443e22e93ae3c3dec0d4ff3c3fcf9dcd7f Mon Sep 17 00:00:00 2001 From: Adib234 <30782825+Adib234@users.noreply.github.com> Date: Thu, 26 Feb 2026 19:30:23 -0500 Subject: [PATCH 13/41] docs(plan): update documentation regarding supporting editing of plan files during plan approval (#20452) Co-authored-by: Jerop Kipruto --- docs/cli/plan-mode.md | 5 +++++ docs/reference/keyboard-shortcuts.md | 12 ++++++------ packages/cli/src/config/keyBindings.ts | 2 +- 3 files changed, 12 insertions(+), 7 deletions(-) diff --git a/docs/cli/plan-mode.md b/docs/cli/plan-mode.md index fc09dd06d5..03dd92967f 100644 --- a/docs/cli/plan-mode.md +++ b/docs/cli/plan-mode.md @@ -107,6 +107,10 @@ structure, and consultation level are proportional to the task's complexity: and formally request approval. - **Approve:** Exit Plan Mode and start implementation. - **Iterate:** Provide feedback to refine the plan. + - **Refine manually:** Press **Ctrl + X** to open the plan file in your + [preferred external editor]. This allows you to manually refine the plan + steps before approval. The CLI will automatically refresh and show the + updated plan after you save and close the editor. For more complex or specialized planning tasks, you can [customize the planning workflow with skills](#customizing-planning-with-skills). @@ -305,3 +309,4 @@ performance. You can disable this automatic switching in your settings: https://github.com/google-gemini/gemini-cli/blob/main/packages/core/src/policy/policies/plan.toml [auto model]: /docs/reference/configuration.md#model-settings [model routing]: /docs/cli/telemetry.md#model-routing +[preferred external editor]: /docs/reference/configuration.md#general diff --git a/docs/reference/keyboard-shortcuts.md b/docs/reference/keyboard-shortcuts.md index 1402422c6b..4fc28804f7 100644 --- a/docs/reference/keyboard-shortcuts.md +++ b/docs/reference/keyboard-shortcuts.md @@ -87,12 +87,12 @@ available combinations. #### Text Input -| Action | Keys | -| ---------------------------------------------- | ----------------------------------------------------------------------------------------- | -| Submit the current prompt. | `Enter (no Shift, Alt, Ctrl, Cmd)` | -| Insert a newline without submitting. | `Ctrl + Enter`
`Cmd + Enter`
`Alt + Enter`
`Shift + Enter`
`Ctrl + J` | -| Open the current prompt in an external editor. | `Ctrl + X` | -| Paste from the clipboard. | `Ctrl + V`
`Cmd + V`
`Alt + V` | +| Action | Keys | +| ---------------------------------------------------------- | ----------------------------------------------------------------------------------------- | +| Submit the current prompt. | `Enter (no Shift, Alt, Ctrl, Cmd)` | +| Insert a newline without submitting. | `Ctrl + Enter`
`Cmd + Enter`
`Alt + Enter`
`Shift + Enter`
`Ctrl + J` | +| Open the current prompt or the plan in an external editor. | `Ctrl + X` | +| Paste from the clipboard. | `Ctrl + V`
`Cmd + V`
`Alt + V` | #### App Controls diff --git a/packages/cli/src/config/keyBindings.ts b/packages/cli/src/config/keyBindings.ts index 4813abd368..3122acef1d 100644 --- a/packages/cli/src/config/keyBindings.ts +++ b/packages/cli/src/config/keyBindings.ts @@ -489,7 +489,7 @@ export const commandDescriptions: Readonly> = { [Command.SUBMIT]: 'Submit the current prompt.', [Command.NEWLINE]: 'Insert a newline without submitting.', [Command.OPEN_EXTERNAL_EDITOR]: - 'Open the current prompt in an external editor.', + 'Open the current prompt or the plan in an external editor.', [Command.PASTE_CLIPBOARD]: 'Paste from the clipboard.', // App Controls From 049d48ef6068ca9862a4a09dcb7f6f112830f2d1 Mon Sep 17 00:00:00 2001 From: Jarrod Whelan <150866123+jwhelangoog@users.noreply.github.com> Date: Thu, 26 Feb 2026 16:30:43 -0800 Subject: [PATCH 14/41] test(cli): fix flaky ToolResultDisplay overflow test (#20518) --- .../ToolResultDisplayOverflow.test.tsx | 32 +++++++++---------- 1 file changed, 15 insertions(+), 17 deletions(-) diff --git a/packages/cli/src/ui/components/messages/ToolResultDisplayOverflow.test.tsx b/packages/cli/src/ui/components/messages/ToolResultDisplayOverflow.test.tsx index a196b8d989..a1d4106cea 100644 --- a/packages/cli/src/ui/components/messages/ToolResultDisplayOverflow.test.tsx +++ b/packages/cli/src/ui/components/messages/ToolResultDisplayOverflow.test.tsx @@ -8,7 +8,6 @@ import { describe, it, expect } from 'vitest'; import { ToolGroupMessage } from './ToolGroupMessage.js'; import { renderWithProviders } from '../../../test-utils/render.js'; import { StreamingState, type IndividualToolCallDisplay } from '../../types.js'; -import { OverflowProvider } from '../../contexts/OverflowContext.js'; import { waitFor } from '../../../test-utils/async.js'; import { CoreToolCallStatus } from '@google/gemini-cli-core'; @@ -32,16 +31,14 @@ describe('ToolResultDisplay Overflow', () => { }, ]; - const { lastFrame } = renderWithProviders( - - - , + const { lastFrame, waitUntilReady } = renderWithProviders( + , { uiState: { streamingState: StreamingState.Idle, @@ -51,12 +48,13 @@ describe('ToolResultDisplay Overflow', () => { }, ); - // ResizeObserver might take a tick - await waitFor(() => - expect(lastFrame()?.toLowerCase()).toContain( - 'press ctrl+o to show more lines', - ), - ); + await waitUntilReady(); + + // ResizeObserver might take a tick, though ToolGroupMessage calculates overflow synchronously + await waitFor(() => { + const frame = lastFrame(); + expect(frame.toLowerCase()).toContain('press ctrl+o to show more lines'); + }); const frame = lastFrame(); expect(frame).toBeDefined(); From 085441352bed3b8a5f00a2484b1cb9b75cacb426 Mon Sep 17 00:00:00 2001 From: Jarrod Whelan <150866123+jwhelangoog@users.noreply.github.com> Date: Thu, 26 Feb 2026 16:57:38 -0800 Subject: [PATCH 15/41] ui(cli): reduce length of Ctrl+O hint (#20490) --- packages/cli/src/ui/components/ToastDisplay.test.tsx | 8 ++++++-- packages/cli/src/ui/components/ToastDisplay.tsx | 2 +- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/packages/cli/src/ui/components/ToastDisplay.test.tsx b/packages/cli/src/ui/components/ToastDisplay.test.tsx index f2ef9a287b..668f91c8d9 100644 --- a/packages/cli/src/ui/components/ToastDisplay.test.tsx +++ b/packages/cli/src/ui/components/ToastDisplay.test.tsx @@ -187,7 +187,9 @@ describe('ToastDisplay', () => { constrainHeight: true, }); await waitUntilReady(); - expect(lastFrame()).toContain('Press Ctrl+O to show more lines'); + expect(lastFrame()).toContain( + 'Ctrl+O to show more lines of the last response', + ); }); it('renders collapse hint when showIsExpandableHint is true and constrainHeight is false', async () => { @@ -196,6 +198,8 @@ describe('ToastDisplay', () => { constrainHeight: false, }); await waitUntilReady(); - expect(lastFrame()).toContain('Press Ctrl+O to collapse lines'); + expect(lastFrame()).toContain( + 'Ctrl+O to collapse lines of the last response', + ); }); }); diff --git a/packages/cli/src/ui/components/ToastDisplay.tsx b/packages/cli/src/ui/components/ToastDisplay.tsx index e383201219..6fcef1667c 100644 --- a/packages/cli/src/ui/components/ToastDisplay.tsx +++ b/packages/cli/src/ui/components/ToastDisplay.tsx @@ -78,7 +78,7 @@ export const ToastDisplay: React.FC = () => { const action = uiState.constrainHeight ? 'show more' : 'collapse'; return ( - Press Ctrl+O to {action} lines for the most recent response + Ctrl+O to {action} lines of the last response ); } From ecfa4e0437dc1049fc6460b688f88d3af4c1c08f Mon Sep 17 00:00:00 2001 From: Dev Randalpura Date: Thu, 26 Feb 2026 17:31:21 -0800 Subject: [PATCH 16/41] fix(ui): correct styled table width calculations (#20042) --- .../src/ui/utils/InlineMarkdownRenderer.tsx | 194 ++++-- .../cli/src/ui/utils/TableRenderer.test.tsx | 131 +++- packages/cli/src/ui/utils/TableRenderer.tsx | 64 +- ...lates-column-widths-based-on-ren-.snap.svg | 39 ++ ...lates-width-correctly-for-conten-.snap.svg | 45 ++ ...not-parse-markdown-inside-code-s-.snap.svg | 40 ++ ...es-nested-markdown-styles-recurs-.snap.svg | 39 ++ ...dles-non-ASCII-characters-emojis-.snap.svg | 24 +- ...d-headers-without-showing-markers.snap.svg | 32 +- ...rer-renders-a-3x3-table-correctly.snap.svg | 34 +- ...h-mixed-content-lengths-correctly.snap.svg | 610 +++++++++--------- ...g-headers-and-4-columns-correctly.snap.svg | 60 +- ...ers-a-table-with-mixed-emojis-As-.snap.svg | 24 +- ...rs-a-table-with-only-Asian-chara-.snap.svg | 24 +- ...ers-a-table-with-only-emojis-and-.snap.svg | 24 +- ...ers-complex-markdown-in-rows-and-.snap.svg | 53 ++ ...rs-correctly-when-headers-are-em-.snap.svg | 8 +- ...rs-correctly-when-there-are-more-.snap.svg | 12 +- ...eaders-and-renders-them-correctly.snap.svg | 14 +- ...-wraps-all-long-columns-correctly.snap.svg | 52 +- ...olumns-with-punctuation-correctly.snap.svg | 50 +- ...wraps-long-cell-content-correctly.snap.svg | 26 +- ...-long-and-short-columns-correctly.snap.svg | 28 +- .../__snapshots__/TableRenderer.test.tsx.snap | 64 ++ .../src/ui/utils/markdownParsingUtils.test.ts | 223 +++++++ 25 files changed, 1312 insertions(+), 602 deletions(-) create mode 100644 packages/cli/src/ui/utils/__snapshots__/TableRenderer-TableRenderer-calculates-column-widths-based-on-ren-.snap.svg create mode 100644 packages/cli/src/ui/utils/__snapshots__/TableRenderer-TableRenderer-calculates-width-correctly-for-conten-.snap.svg create mode 100644 packages/cli/src/ui/utils/__snapshots__/TableRenderer-TableRenderer-does-not-parse-markdown-inside-code-s-.snap.svg create mode 100644 packages/cli/src/ui/utils/__snapshots__/TableRenderer-TableRenderer-handles-nested-markdown-styles-recurs-.snap.svg create mode 100644 packages/cli/src/ui/utils/__snapshots__/TableRenderer-TableRenderer-renders-complex-markdown-in-rows-and-.snap.svg create mode 100644 packages/cli/src/ui/utils/markdownParsingUtils.test.ts diff --git a/packages/cli/src/ui/utils/InlineMarkdownRenderer.tsx b/packages/cli/src/ui/utils/InlineMarkdownRenderer.tsx index 430b27eeb3..02a34842f4 100644 --- a/packages/cli/src/ui/utils/InlineMarkdownRenderer.tsx +++ b/packages/cli/src/ui/utils/InlineMarkdownRenderer.tsx @@ -6,6 +6,12 @@ import React from 'react'; import { Text } from 'ink'; +import chalk from 'chalk'; +import { + resolveColor, + INK_SUPPORTED_NAMES, + INK_NAME_TO_HEX_MAP, +} from '../themes/color-utils.js'; import { theme } from '../semantic-colors.js'; import { debugLogger } from '@google/gemini-cli-core'; import { stripUnsafeCharacters } from './textUtils.js'; @@ -23,46 +29,108 @@ interface RenderInlineProps { defaultColor?: string; } -const RenderInlineInternal: React.FC = ({ - text: rawText, - defaultColor, -}) => { - const text = stripUnsafeCharacters(rawText); +/** + * Helper to apply color to a string using ANSI escape codes, + * consistent with how Ink's colorize works. + */ +const ansiColorize = (str: string, color: string | undefined): string => { + if (!color) return str; + const resolved = resolveColor(color); + if (!resolved) return str; + + if (resolved.startsWith('#')) { + return chalk.hex(resolved)(str); + } + + const mappedHex = INK_NAME_TO_HEX_MAP[resolved]; + if (mappedHex) { + return chalk.hex(mappedHex)(str); + } + + if (INK_SUPPORTED_NAMES.has(resolved)) { + switch (resolved) { + case 'black': + return chalk.black(str); + case 'red': + return chalk.red(str); + case 'green': + return chalk.green(str); + case 'yellow': + return chalk.yellow(str); + case 'blue': + return chalk.blue(str); + case 'magenta': + return chalk.magenta(str); + case 'cyan': + return chalk.cyan(str); + case 'white': + return chalk.white(str); + case 'gray': + case 'grey': + return chalk.gray(str); + default: + return str; + } + } + + return str; +}; + +/** + * Converts markdown text into a string with ANSI escape codes. + * This mirrors the parsing logic in InlineMarkdownRenderer.tsx + */ +export const parseMarkdownToANSI = ( + text: string, + defaultColor?: string, +): string => { const baseColor = defaultColor ?? theme.text.primary; // Early return for plain text without markdown or URLs if (!/[*_~`<[https?:]/.test(text)) { - return {text}; + return ansiColorize(text, baseColor); } - const nodes: React.ReactNode[] = []; - let lastIndex = 0; + let result = ''; const inlineRegex = - /(\*\*.*?\*\*|\*.*?\*|_.*?_|~~.*?~~|\[.*?\]\(.*?\)|`+.+?`+|.*?<\/u>|https?:\/\/\S+)/g; + /(\*\*\*.*?\*\*\*|\*\*.*?\*\*|\*.*?\*|_.*?_|~~.*?~~|\[.*?\]\(.*?\)|`+.+?`+|.*?<\/u>|https?:\/\/\S+)/g; + let lastIndex = 0; let match; while ((match = inlineRegex.exec(text)) !== null) { if (match.index > lastIndex) { - nodes.push( - - {text.slice(lastIndex, match.index)} - , - ); + result += ansiColorize(text.slice(lastIndex, match.index), baseColor); } const fullMatch = match[0]; - let renderedNode: React.ReactNode = null; - const key = `m-${match.index}`; + let styledPart = ''; try { if ( - fullMatch.startsWith('**') && + fullMatch.endsWith('***') && + fullMatch.startsWith('***') && + fullMatch.length > (BOLD_MARKER_LENGTH + ITALIC_MARKER_LENGTH) * 2 + ) { + styledPart = chalk.bold( + chalk.italic( + parseMarkdownToANSI( + fullMatch.slice( + BOLD_MARKER_LENGTH + ITALIC_MARKER_LENGTH, + -BOLD_MARKER_LENGTH - ITALIC_MARKER_LENGTH, + ), + baseColor, + ), + ), + ); + } else if ( fullMatch.endsWith('**') && + fullMatch.startsWith('**') && fullMatch.length > BOLD_MARKER_LENGTH * 2 ) { - renderedNode = ( - - {fullMatch.slice(BOLD_MARKER_LENGTH, -BOLD_MARKER_LENGTH)} - + styledPart = chalk.bold( + parseMarkdownToANSI( + fullMatch.slice(BOLD_MARKER_LENGTH, -BOLD_MARKER_LENGTH), + baseColor, + ), ); } else if ( fullMatch.length > ITALIC_MARKER_LENGTH * 2 && @@ -77,23 +145,25 @@ const RenderInlineInternal: React.FC = ({ text.substring(inlineRegex.lastIndex, inlineRegex.lastIndex + 2), ) ) { - renderedNode = ( - - {fullMatch.slice(ITALIC_MARKER_LENGTH, -ITALIC_MARKER_LENGTH)} - + styledPart = chalk.italic( + parseMarkdownToANSI( + fullMatch.slice(ITALIC_MARKER_LENGTH, -ITALIC_MARKER_LENGTH), + baseColor, + ), ); } else if ( fullMatch.startsWith('~~') && fullMatch.endsWith('~~') && fullMatch.length > STRIKETHROUGH_MARKER_LENGTH * 2 ) { - renderedNode = ( - - {fullMatch.slice( + styledPart = chalk.strikethrough( + parseMarkdownToANSI( + fullMatch.slice( STRIKETHROUGH_MARKER_LENGTH, -STRIKETHROUGH_MARKER_LENGTH, - )} - + ), + baseColor, + ), ); } else if ( fullMatch.startsWith('`') && @@ -102,11 +172,7 @@ const RenderInlineInternal: React.FC = ({ ) { const codeMatch = fullMatch.match(/^(`+)(.+?)\1$/s); if (codeMatch && codeMatch[2]) { - renderedNode = ( - - {codeMatch[2]} - - ); + styledPart = ansiColorize(codeMatch[2], theme.text.accent); } } else if ( fullMatch.startsWith('[') && @@ -117,58 +183,54 @@ const RenderInlineInternal: React.FC = ({ if (linkMatch) { const linkText = linkMatch[1]; const url = linkMatch[2]; - renderedNode = ( - - {linkText} - ({url}) - - ); + styledPart = + parseMarkdownToANSI(linkText, baseColor) + + ansiColorize(' (', baseColor) + + ansiColorize(url, theme.text.link) + + ansiColorize(')', baseColor); } } else if ( fullMatch.startsWith('') && fullMatch.endsWith('') && fullMatch.length > - UNDERLINE_TAG_START_LENGTH + UNDERLINE_TAG_END_LENGTH - 1 // -1 because length is compared to combined length of start and end tags + UNDERLINE_TAG_START_LENGTH + UNDERLINE_TAG_END_LENGTH - 1 ) { - renderedNode = ( - - {fullMatch.slice( + styledPart = chalk.underline( + parseMarkdownToANSI( + fullMatch.slice( UNDERLINE_TAG_START_LENGTH, -UNDERLINE_TAG_END_LENGTH, - )} - + ), + baseColor, + ), ); } else if (fullMatch.match(/^https?:\/\//)) { - renderedNode = ( - - {fullMatch} - - ); + styledPart = ansiColorize(fullMatch, theme.text.link); } } catch (e) { debugLogger.warn('Error parsing inline markdown part:', fullMatch, e); - renderedNode = null; + styledPart = ''; } - nodes.push( - renderedNode ?? ( - - {fullMatch} - - ), - ); + result += styledPart || ansiColorize(fullMatch, baseColor); lastIndex = inlineRegex.lastIndex; } if (lastIndex < text.length) { - nodes.push( - - {text.slice(lastIndex)} - , - ); + result += ansiColorize(text.slice(lastIndex), baseColor); } - return <>{nodes.filter((node) => node !== null)}; + return result; +}; + +const RenderInlineInternal: React.FC = ({ + text: rawText, + defaultColor, +}) => { + const text = stripUnsafeCharacters(rawText); + const ansiText = parseMarkdownToANSI(text, defaultColor); + + return {ansiText}; }; export const RenderInline = React.memo(RenderInlineInternal); diff --git a/packages/cli/src/ui/utils/TableRenderer.test.tsx b/packages/cli/src/ui/utils/TableRenderer.test.tsx index e9d84e6649..3960e8befe 100644 --- a/packages/cli/src/ui/utils/TableRenderer.test.tsx +++ b/packages/cli/src/ui/utils/TableRenderer.test.tsx @@ -267,7 +267,6 @@ describe('TableRenderer', () => { await waitUntilReady(); const output = lastFrame(); - expect(output).toContain('Comprehensive Architectural'); expect(output).toContain('protocol buffers'); expect(output).toContain('exponential backoff'); @@ -378,4 +377,134 @@ describe('TableRenderer', () => { await expect(renderResult).toMatchSvgSnapshot(); unmount(); }); + + it.each([ + { + name: 'renders complex markdown in rows and calculates widths correctly', + headers: ['Feature', 'Markdown'], + rows: [ + ['Bold', '**Bold Text**'], + ['Italic', '_Italic Text_'], + ['Combined', '***Bold and Italic***'], + ['Link', '[Google](https://google.com)'], + ['Code', '`const x = 1`'], + ['Strikethrough', '~~Strike~~'], + ['Underline', 'Underline'], + ], + terminalWidth: 80, + waitForText: 'Bold Text', + assertions: (output: string) => { + expect(output).not.toContain('**Bold Text**'); + expect(output).toContain('Bold Text'); + expect(output).not.toContain('_Italic Text_'); + expect(output).toContain('Italic Text'); + expect(output).toContain('Bold and Italic'); + expect(output).toContain('Google'); + expect(output).toContain('https://google.com'); + expect(output).toContain('(https://google.com)'); + expect(output).toContain('const x = 1'); + expect(output).not.toContain('`const x = 1`'); + expect(output).toContain('Strike'); + expect(output).toContain('Underline'); + }, + }, + { + name: 'calculates column widths based on rendered text, not raw markdown', + headers: ['Col 1', 'Col 2', 'Col 3'], + rows: [ + ['**123456**', 'Normal', 'Short'], + ['Short', '**123456**', 'Normal'], + ['Normal', 'Short', '**123456**'], + ], + terminalWidth: 40, + waitForText: '123456', + assertions: (output: string) => { + expect(output).toContain('123456'); + const dataLines = output.split('\n').filter((l) => /123456/.test(l)); + expect(dataLines.length).toBe(3); + }, + }, + { + name: 'handles nested markdown styles recursively', + headers: ['Header 1', 'Header 2', 'Header 3'], + rows: [ + ['**Bold with _Italic_ and ~~Strike~~**', 'Normal', 'Short'], + ['Short', '**Bold with _Italic_ and ~~Strike~~**', 'Normal'], + ['Normal', 'Short', '**Bold with _Italic_ and ~~Strike~~**'], + ], + terminalWidth: 100, + waitForText: 'Bold with Italic and Strike', + assertions: (output: string) => { + expect(output).not.toContain('**'); + expect(output).not.toContain('_'); + expect(output).not.toContain('~~'); + expect(output).toContain('Bold with Italic and Strike'); + }, + }, + { + name: 'calculates width correctly for content with URLs and styles', + headers: ['Col 1', 'Col 2', 'Col 3'], + rows: [ + ['Visit [Google](https://google.com)', 'Plain Text', 'More Info'], + ['Info Here', 'Visit [Bing](https://bing.com)', 'Links'], + ['Check This', 'Search', 'Visit [Yahoo](https://yahoo.com)'], + ], + terminalWidth: 120, + waitForText: 'Visit Google', + assertions: (output: string) => { + expect(output).toContain('Visit Google'); + expect(output).toContain('Visit Bing'); + expect(output).toContain('Visit Yahoo'); + expect(output).toContain('https://google.com'); + expect(output).toContain('https://bing.com'); + expect(output).toContain('https://yahoo.com'); + expect(output).toContain('(https://google.com)'); + const dataLine = output + .split('\n') + .find((l) => l.includes('Visit Google')); + expect(dataLine).toContain('Visit Google'); + }, + }, + { + name: 'does not parse markdown inside code snippets', + headers: ['Col 1', 'Col 2', 'Col 3'], + rows: [ + ['`**not bold**`', '`_not italic_`', '`~~not strike~~`'], + ['`[not link](url)`', '`not underline`', '`https://not.link`'], + ['Normal Text', 'More Code: `*test*`', '`***nested***`'], + ], + terminalWidth: 100, + waitForText: '**not bold**', + assertions: (output: string) => { + expect(output).toContain('**not bold**'); + expect(output).toContain('_not italic_'); + expect(output).toContain('~~not strike~~'); + expect(output).toContain('[not link](url)'); + expect(output).toContain('not underline'); + expect(output).toContain('https://not.link'); + expect(output).toContain('***nested***'); + }, + }, + ])( + '$name', + async ({ headers, rows, terminalWidth, waitForText, assertions }) => { + const renderResult = renderWithProviders( + , + { width: terminalWidth }, + ); + const { lastFrame, waitUntilReady, unmount } = renderResult; + await waitUntilReady(); + + const output = lastFrame(); + expect(output).toBeDefined(); + expect(output).toContain(waitForText); + assertions(output); + await expect(renderResult).toMatchSvgSnapshot(); + unmount(); + }, + ); }); diff --git a/packages/cli/src/ui/utils/TableRenderer.tsx b/packages/cli/src/ui/utils/TableRenderer.tsx index ab1981762c..143b1fe015 100644 --- a/packages/cli/src/ui/utils/TableRenderer.tsx +++ b/packages/cli/src/ui/utils/TableRenderer.tsx @@ -5,18 +5,19 @@ */ import React, { useMemo } from 'react'; -import { Text, Box } from 'ink'; +import { styledCharsToString } from '@alcalzone/ansi-tokenize'; import { + Text, + Box, type StyledChar, toStyledCharacters, - styledCharsToString, styledCharsWidth, wordBreakStyledChars, wrapStyledChars, widestLineFromStyledChars, } from 'ink'; import { theme } from '../semantic-colors.js'; -import { RenderInline } from './InlineMarkdownRenderer.js'; +import { parseMarkdownToANSI } from './InlineMarkdownRenderer.js'; import { stripUnsafeCharacters } from './textUtils.js'; interface TableRendererProps { @@ -29,6 +30,19 @@ const MIN_COLUMN_WIDTH = 5; const COLUMN_PADDING = 2; const TABLE_MARGIN = 2; +/** + * Parses markdown to StyledChar array by first converting to ANSI. + * This ensures character counts are accurate (markdown markers are removed + * and styles are applied to the character's internal style object). + */ +const parseMarkdownToStyledChars = ( + text: string, + defaultColor?: string, +): StyledChar[] => { + const ansi = parseMarkdownToANSI(text, defaultColor); + return toStyledCharacters(ansi); +}; + const calculateWidths = (styledChars: StyledChar[]) => { const contentWidth = styledCharsWidth(styledChars); @@ -53,25 +67,26 @@ export const TableRenderer: React.FC = ({ rows, terminalWidth, }) => { - // Clean headers: remove bold markers since we already render headers as bold - // and having them can break wrapping when the markers are split across lines. - const cleanedHeaders = useMemo( - () => headers.map((header) => header.replace(/\*\*(.*?)\*\*/g, '$1')), - [headers], - ); - const styledHeaders = useMemo( () => - cleanedHeaders.map((header) => - toStyledCharacters(stripUnsafeCharacters(header)), + headers.map((header) => + parseMarkdownToStyledChars( + stripUnsafeCharacters(header), + theme.text.link, + ), ), - [cleanedHeaders], + [headers], ); const styledRows = useMemo( () => rows.map((row) => - row.map((cell) => toStyledCharacters(stripUnsafeCharacters(cell))), + row.map((cell) => + parseMarkdownToStyledChars( + stripUnsafeCharacters(cell), + theme.text.primary, + ), + ), ), [rows], ); @@ -132,7 +147,7 @@ export const TableRenderer: React.FC = ({ const scale = (availableWidth - finalTotalShortColumnWidth) / - (totalMinWidth - finalTotalShortColumnWidth); + (totalMinWidth - finalTotalShortColumnWidth) || 0; finalContentWidths = constraints.map((c) => { if (c.maxWidth <= MIN_COLUMN_WIDTH && finalTotalShortColumnWidth > 0) { return c.minWidth; @@ -201,6 +216,7 @@ export const TableRenderer: React.FC = ({ return { wrappedHeaders, wrappedRows, adjustedWidths }; }, [styledHeaders, styledRows, terminalWidth]); + // Helper function to render a cell with proper width const renderCell = ( content: ProcessedLine, @@ -216,10 +232,10 @@ export const TableRenderer: React.FC = ({ {isHeader ? ( - + {content.text} ) : ( - + {content.text} )} {' '.repeat(paddingNeeded)} @@ -253,18 +269,18 @@ export const TableRenderer: React.FC = ({ }); return ( - - {' '} + + {renderedCells.map((cell, index) => ( - {cell} + {cell} {index < renderedCells.length - 1 && ( - {' │ '} + )} - ))}{' '} + ))} - + ); }; @@ -274,7 +290,7 @@ export const TableRenderer: React.FC = ({ rowIndex?: number, isHeader = false, ): React.ReactNode => { - const key = isHeader ? 'header' : `${rowIndex}`; + const key = rowIndex === -1 ? 'header' : `${rowIndex}`; const maxHeight = Math.max(...wrappedCells.map((lines) => lines.length), 1); const visualRows: React.ReactNode[] = []; diff --git a/packages/cli/src/ui/utils/__snapshots__/TableRenderer-TableRenderer-calculates-column-widths-based-on-ren-.snap.svg b/packages/cli/src/ui/utils/__snapshots__/TableRenderer-TableRenderer-calculates-column-widths-based-on-ren-.snap.svg new file mode 100644 index 0000000000..e01d29e15d --- /dev/null +++ b/packages/cli/src/ui/utils/__snapshots__/TableRenderer-TableRenderer-calculates-column-widths-based-on-ren-.snap.svg @@ -0,0 +1,39 @@ + + + + + ┌────────┬────────┬────────┐ + + Col 1 + + Col 2 + + Col 3 + + ├────────┼────────┼────────┤ + + 123456 + + Normal + + Short + + + Short + + 123456 + + Normal + + + Normal + + Short + + 123456 + + └────────┴────────┴────────┘ + + \ No newline at end of file diff --git a/packages/cli/src/ui/utils/__snapshots__/TableRenderer-TableRenderer-calculates-width-correctly-for-conten-.snap.svg b/packages/cli/src/ui/utils/__snapshots__/TableRenderer-TableRenderer-calculates-width-correctly-for-conten-.snap.svg new file mode 100644 index 0000000000..f6f83c0cb0 --- /dev/null +++ b/packages/cli/src/ui/utils/__snapshots__/TableRenderer-TableRenderer-calculates-width-correctly-for-conten-.snap.svg @@ -0,0 +1,45 @@ + + + + + ┌───────────────────────────────────┬───────────────────────────────┬─────────────────────────────────┐ + + Col 1 + + Col 2 + + Col 3 + + ├───────────────────────────────────┼───────────────────────────────┼─────────────────────────────────┤ + + Visit Google ( + https://google.com + ) + + Plain Text + + More Info + + + Info Here + + Visit Bing ( + https://bing.com + ) + + Links + + + Check This + + Search + + Visit Yahoo ( + https://yahoo.com + ) + + └───────────────────────────────────┴───────────────────────────────┴─────────────────────────────────┘ + + \ No newline at end of file diff --git a/packages/cli/src/ui/utils/__snapshots__/TableRenderer-TableRenderer-does-not-parse-markdown-inside-code-s-.snap.svg b/packages/cli/src/ui/utils/__snapshots__/TableRenderer-TableRenderer-does-not-parse-markdown-inside-code-s-.snap.svg new file mode 100644 index 0000000000..68069bd0ab --- /dev/null +++ b/packages/cli/src/ui/utils/__snapshots__/TableRenderer-TableRenderer-does-not-parse-markdown-inside-code-s-.snap.svg @@ -0,0 +1,40 @@ + + + + + ┌─────────────────┬──────────────────────┬──────────────────┐ + + Col 1 + + Col 2 + + Col 3 + + ├─────────────────┼──────────────────────┼──────────────────┤ + + **not bold** + + _not italic_ + + ~~not strike~~ + + + [not link](url) + + <u>not underline</u> + + https://not.link + + + Normal Text + + More Code: + *test* + + ***nested*** + + └─────────────────┴──────────────────────┴──────────────────┘ + + \ No newline at end of file diff --git a/packages/cli/src/ui/utils/__snapshots__/TableRenderer-TableRenderer-handles-nested-markdown-styles-recurs-.snap.svg b/packages/cli/src/ui/utils/__snapshots__/TableRenderer-TableRenderer-handles-nested-markdown-styles-recurs-.snap.svg new file mode 100644 index 0000000000..3269e29f19 --- /dev/null +++ b/packages/cli/src/ui/utils/__snapshots__/TableRenderer-TableRenderer-handles-nested-markdown-styles-recurs-.snap.svg @@ -0,0 +1,39 @@ + + + + + ┌─────────────────────────────┬─────────────────────────────┬─────────────────────────────┐ + + Header 1 + + Header 2 + + Header 3 + + ├─────────────────────────────┼─────────────────────────────┼─────────────────────────────┤ + + Bold with Italic and Strike + + Normal + + Short + + + Short + + Bold with Italic and Strike + + Normal + + + Normal + + Short + + Bold with Italic and Strike + + └─────────────────────────────┴─────────────────────────────┴─────────────────────────────┘ + + \ No newline at end of file diff --git a/packages/cli/src/ui/utils/__snapshots__/TableRenderer-TableRenderer-handles-non-ASCII-characters-emojis-.snap.svg b/packages/cli/src/ui/utils/__snapshots__/TableRenderer-TableRenderer-handles-non-ASCII-characters-emojis-.snap.svg index d9612cce33..13898e8641 100644 --- a/packages/cli/src/ui/utils/__snapshots__/TableRenderer-TableRenderer-handles-non-ASCII-characters-emojis-.snap.svg +++ b/packages/cli/src/ui/utils/__snapshots__/TableRenderer-TableRenderer-handles-non-ASCII-characters-emojis-.snap.svg @@ -7,25 +7,25 @@ ┌──────────────┬────────────┬───────────────┐ Emoji 😃 - + Asian 汉字 - + Mixed 🚀 Text ├──────────────┼────────────┼───────────────┤ - Start 🌟 End - - 你好世界 - - Rocket 🚀 Man + Start 🌟 End + + 你好世界 + + Rocket 🚀 Man - Thumbs 👍 Up - - こんにちは - - Fire 🔥 + Thumbs 👍 Up + + こんにちは + + Fire 🔥 └──────────────┴────────────┴───────────────┘ diff --git a/packages/cli/src/ui/utils/__snapshots__/TableRenderer-TableRenderer-handles-wrapped-bold-headers-without-showing-markers.snap.svg b/packages/cli/src/ui/utils/__snapshots__/TableRenderer-TableRenderer-handles-wrapped-bold-headers-without-showing-markers.snap.svg index 0118d133cf..30d847e86c 100644 --- a/packages/cli/src/ui/utils/__snapshots__/TableRenderer-TableRenderer-handles-wrapped-bold-headers-without-showing-markers.snap.svg +++ b/packages/cli/src/ui/utils/__snapshots__/TableRenderer-TableRenderer-handles-wrapped-bold-headers-without-showing-markers.snap.svg @@ -7,40 +7,40 @@ ┌─────────────┬───────┬─────────┐ Very Long - + Short - + Another Bold Header - - + + Long That Will - - + + Header Wrap - - + + ├─────────────┼───────┼─────────┤ - Data 1 - - Data - - Data 3 + Data 1 + + Data + + Data 3 - - 2 - + + 2 + └─────────────┴───────┴─────────┘ diff --git a/packages/cli/src/ui/utils/__snapshots__/TableRenderer-TableRenderer-renders-a-3x3-table-correctly.snap.svg b/packages/cli/src/ui/utils/__snapshots__/TableRenderer-TableRenderer-renders-a-3x3-table-correctly.snap.svg index 84e4d856f6..dea907221c 100644 --- a/packages/cli/src/ui/utils/__snapshots__/TableRenderer-TableRenderer-renders-a-3x3-table-correctly.snap.svg +++ b/packages/cli/src/ui/utils/__snapshots__/TableRenderer-TableRenderer-renders-a-3x3-table-correctly.snap.svg @@ -7,32 +7,32 @@ ┌──────────────┬──────────────┬──────────────┐ Header 1 - + Header 2 - + Header 3 ├──────────────┼──────────────┼──────────────┤ - Row 1, Col 1 - - Row 1, Col 2 - - Row 1, Col 3 + Row 1, Col 1 + + Row 1, Col 2 + + Row 1, Col 3 - Row 2, Col 1 - - Row 2, Col 2 - - Row 2, Col 3 + Row 2, Col 1 + + Row 2, Col 2 + + Row 2, Col 3 - Row 3, Col 1 - - Row 3, Col 2 - - Row 3, Col 3 + Row 3, Col 1 + + Row 3, Col 2 + + Row 3, Col 3 └──────────────┴──────────────┴──────────────┘ diff --git a/packages/cli/src/ui/utils/__snapshots__/TableRenderer-TableRenderer-renders-a-complex-table-with-mixed-content-lengths-correctly.snap.svg b/packages/cli/src/ui/utils/__snapshots__/TableRenderer-TableRenderer-renders-a-complex-table-with-mixed-content-lengths-correctly.snap.svg index 95654cb4d8..f5a00dbe7c 100644 --- a/packages/cli/src/ui/utils/__snapshots__/TableRenderer-TableRenderer-renders-a-complex-table-with-mixed-content-lengths-correctly.snap.svg +++ b/packages/cli/src/ui/utils/__snapshots__/TableRenderer-TableRenderer-renders-a-complex-table-with-mixed-content-lengths-correctly.snap.svg @@ -7,394 +7,394 @@ ┌─────────────────────────────┬──────────────────────────────┬─────────────────────────────┬──────────────────────────────┬─────┬────────┬─────────┬───────┐ Comprehensive Architectural - + Implementation Details for - + Longitudinal Performance - + Strategic Security Framework - + Key - + Status - + Version - + Owner Specification for the - + the High-Throughput - + Analysis Across - + for Mitigating Sophisticated - - - - + + + + Distributed Infrastructure - + Asynchronous Message - + Multi-Regional Cloud - + Cross-Site Scripting - - - - + + + + Layer - + Processing Pipeline with - + Deployment Clusters - + Vulnerabilities - - - - + + + + - + Extended Scalability - - - - - - + + + + + + - + Features and Redundancy - - - - - - + + + + + + - + Protocols - - - - - - + + + + + + ├─────────────────────────────┼──────────────────────────────┼─────────────────────────────┼──────────────────────────────┼─────┼────────┼─────────┼───────┤ - The primary architecture - - Each message is processed - - Historical data indicates a - - A multi-layered defense - - INF - - Active - - v2.4 - - J. + The primary architecture + + Each message is processed + + Historical data indicates a + + A multi-layered defense + + INF + + Active + + v2.4 + + J. - utilizes a decoupled - - through a series of - - significant reduction in - - strategy incorporates - - - - - Doe + utilizes a decoupled + + through a series of + + significant reduction in + + strategy incorporates + + + + + Doe - microservices approach, - - specialized workers that - - tail latency when utilizing - - content security policies, - - - - + microservices approach, + + specialized workers that + + tail latency when utilizing + + content security policies, + + + + - leveraging container - - handle data transformation, - - edge computing nodes closer - - input sanitization - - - - + leveraging container + + handle data transformation, + + edge computing nodes closer + + input sanitization + + + + - orchestration for - - validation, and persistent - - to the geographic location - - libraries, and regular - - - - + orchestration for + + validation, and persistent + + to the geographic location + + libraries, and regular + + + + - scalability and fault - - storage using a persistent - - of the end-user base. - - automated penetration - - - - + scalability and fault + + storage using a persistent + + of the end-user base. + + automated penetration + + + + - tolerance in high-load - - queue. - - - testing routines. - - - - + tolerance in high-load + + queue. + + + testing routines. + + + + - scenarios. - - - Monitoring tools have - - - - - + scenarios. + + + Monitoring tools have + + + + + - - The pipeline features - - captured a steady increase - - Developers are required to - - - - + + The pipeline features + + captured a steady increase + + Developers are required to + + + + - This layer provides the - - built-in retry mechanisms - - in throughput efficiency - - undergo mandatory security - - - - + This layer provides the + + built-in retry mechanisms + + in throughput efficiency + + undergo mandatory security + + + + - fundamental building blocks - - with exponential backoff to - - since the introduction of - - training focusing on the - - - - + fundamental building blocks + + with exponential backoff to + + since the introduction of + + training focusing on the + + + + - for service discovery, load - - ensure message delivery - - the vectorized query engine - - OWASP Top Ten to ensure that - - - - + for service discovery, load + + ensure message delivery + + the vectorized query engine + + OWASP Top Ten to ensure that + + + + - balancing, and - - integrity even during - - in the primary data - - security is integrated into - - - - + balancing, and + + integrity even during + + in the primary data + + security is integrated into + + + + - inter-service communication - - transient network or service - - warehouse. - - the initial design phase. - - - - + inter-service communication + + transient network or service + + warehouse. + + the initial design phase. + + + + - via highly efficient - - failures. - - - - - - + via highly efficient + + failures. + + + + + + - protocol buffers. - - - Resource utilization - - The implementation of a - - - - + protocol buffers. + + + Resource utilization + + The implementation of a + + + + - - Horizontal autoscaling is - - metrics demonstrate that - - robust Identity and Access - - - - + + Horizontal autoscaling is + + metrics demonstrate that + + robust Identity and Access + + + + - Advanced telemetry and - - triggered automatically - - the transition to - - Management system ensures - - - - + Advanced telemetry and + + triggered automatically + + the transition to + + Management system ensures + + + + - logging integrations allow - - based on the depth of the - - serverless compute for - - that the principle of least - - - - + logging integrations allow + + based on the depth of the + + serverless compute for + + that the principle of least + + + + - for real-time monitoring of - - processing queue, ensuring - - intermittent tasks has - - privilege is strictly - - - - + for real-time monitoring of + + processing queue, ensuring + + intermittent tasks has + + privilege is strictly + + + + - system health and rapid - - consistent performance - - resulted in a thirty - - enforced across all - - - - + system health and rapid + + consistent performance + + resulted in a thirty + + enforced across all + + + + - identification of - - during unexpected traffic - - percent cost optimization. - - environments. - - - - + identification of + + during unexpected traffic + + percent cost optimization. + + environments. + + + + - bottlenecks within the - - spikes. - - - - - - + bottlenecks within the + + spikes. + + + + + + - service mesh. - - - - - - - + service mesh. + + + + + + + └─────────────────────────────┴──────────────────────────────┴─────────────────────────────┴──────────────────────────────┴─────┴────────┴─────────┴───────┘ diff --git a/packages/cli/src/ui/utils/__snapshots__/TableRenderer-TableRenderer-renders-a-table-with-long-headers-and-4-columns-correctly.snap.svg b/packages/cli/src/ui/utils/__snapshots__/TableRenderer-TableRenderer-renders-a-table-with-long-headers-and-4-columns-correctly.snap.svg index b4d6353c3c..8da55efa8b 100644 --- a/packages/cli/src/ui/utils/__snapshots__/TableRenderer-TableRenderer-renders-a-table-with-long-headers-and-4-columns-correctly.snap.svg +++ b/packages/cli/src/ui/utils/__snapshots__/TableRenderer-TableRenderer-renders-a-table-with-long-headers-and-4-columns-correctly.snap.svg @@ -7,56 +7,56 @@ ┌───────────────┬───────────────┬──────────────────┬──────────────────┐ Very Long - + Very Long - + Very Long Column - + Very Long Column Column Header - + Column Header - + Header Three - + Header Four One - + Two - - + + ├───────────────┼───────────────┼──────────────────┼──────────────────┤ - Data 1.1 - - Data 1.2 - - Data 1.3 - - Data 1.4 + Data 1.1 + + Data 1.2 + + Data 1.3 + + Data 1.4 - Data 2.1 - - Data 2.2 - - Data 2.3 - - Data 2.4 + Data 2.1 + + Data 2.2 + + Data 2.3 + + Data 2.4 - Data 3.1 - - Data 3.2 - - Data 3.3 - - Data 3.4 + Data 3.1 + + Data 3.2 + + Data 3.3 + + Data 3.4 └───────────────┴───────────────┴──────────────────┴──────────────────┘ diff --git a/packages/cli/src/ui/utils/__snapshots__/TableRenderer-TableRenderer-renders-a-table-with-mixed-emojis-As-.snap.svg b/packages/cli/src/ui/utils/__snapshots__/TableRenderer-TableRenderer-renders-a-table-with-mixed-emojis-As-.snap.svg index 707bf53f43..0db46485e0 100644 --- a/packages/cli/src/ui/utils/__snapshots__/TableRenderer-TableRenderer-renders-a-table-with-mixed-emojis-As-.snap.svg +++ b/packages/cli/src/ui/utils/__snapshots__/TableRenderer-TableRenderer-renders-a-table-with-mixed-emojis-As-.snap.svg @@ -7,25 +7,25 @@ ┌───────────────┬───────────────────┬────────────────┐ Mixed 😃 中文 - + Complex 🚀 日本語 - + Text 📝 한국어 ├───────────────┼───────────────────┼────────────────┤ - 你好 😃 - - こんにちは 🚀 - - 안녕하세요 📝 + 你好 😃 + + こんにちは 🚀 + + 안녕하세요 📝 - World 🌍 - - Code 💻 - - Pizza 🍕 + World 🌍 + + Code 💻 + + Pizza 🍕 └───────────────┴───────────────────┴────────────────┘ diff --git a/packages/cli/src/ui/utils/__snapshots__/TableRenderer-TableRenderer-renders-a-table-with-only-Asian-chara-.snap.svg b/packages/cli/src/ui/utils/__snapshots__/TableRenderer-TableRenderer-renders-a-table-with-only-Asian-chara-.snap.svg index 0f51eba244..b808d1e335 100644 --- a/packages/cli/src/ui/utils/__snapshots__/TableRenderer-TableRenderer-renders-a-table-with-only-Asian-chara-.snap.svg +++ b/packages/cli/src/ui/utils/__snapshots__/TableRenderer-TableRenderer-renders-a-table-with-only-Asian-chara-.snap.svg @@ -7,25 +7,25 @@ ┌──────────────┬─────────────────┬───────────────┐ Chinese 中文 - + Japanese 日本語 - + Korean 한국어 ├──────────────┼─────────────────┼───────────────┤ - 你好 - - こんにちは - - 안녕하세요 + 你好 + + こんにちは + + 안녕하세요 - 世界 - - 世界 - - 세계 + 世界 + + 世界 + + 세계 └──────────────┴─────────────────┴───────────────┘ diff --git a/packages/cli/src/ui/utils/__snapshots__/TableRenderer-TableRenderer-renders-a-table-with-only-emojis-and-.snap.svg b/packages/cli/src/ui/utils/__snapshots__/TableRenderer-TableRenderer-renders-a-table-with-only-emojis-and-.snap.svg index 1a849696dd..9277078253 100644 --- a/packages/cli/src/ui/utils/__snapshots__/TableRenderer-TableRenderer-renders-a-table-with-only-emojis-and-.snap.svg +++ b/packages/cli/src/ui/utils/__snapshots__/TableRenderer-TableRenderer-renders-a-table-with-only-emojis-and-.snap.svg @@ -7,25 +7,25 @@ ┌──────────┬───────────┬──────────┐ Happy 😀 - + Rocket 🚀 - + Heart ❤️ ├──────────┼───────────┼──────────┤ - Smile 😃 - - Fire 🔥 - - Love 💖 + Smile 😃 + + Fire 🔥 + + Love 💖 - Cool 😎 - - Star ⭐ - - Blue 💙 + Cool 😎 + + Star ⭐ + + Blue 💙 └──────────┴───────────┴──────────┘ diff --git a/packages/cli/src/ui/utils/__snapshots__/TableRenderer-TableRenderer-renders-complex-markdown-in-rows-and-.snap.svg b/packages/cli/src/ui/utils/__snapshots__/TableRenderer-TableRenderer-renders-complex-markdown-in-rows-and-.snap.svg new file mode 100644 index 0000000000..8b251c3ab2 --- /dev/null +++ b/packages/cli/src/ui/utils/__snapshots__/TableRenderer-TableRenderer-renders-complex-markdown-in-rows-and-.snap.svg @@ -0,0 +1,53 @@ + + + + + ┌───────────────┬─────────────────────────────┐ + + Feature + + Markdown + + ├───────────────┼─────────────────────────────┤ + + Bold + + Bold Text + + + Italic + + Italic Text + + + Combined + + Bold and Italic + + + Link + + Google ( + https://google.com + ) + + + Code + + const x = 1 + + + Strikethrough + + Strike + + + Underline + + Underline + + └───────────────┴─────────────────────────────┘ + + \ No newline at end of file diff --git a/packages/cli/src/ui/utils/__snapshots__/TableRenderer-TableRenderer-renders-correctly-when-headers-are-em-.snap.svg b/packages/cli/src/ui/utils/__snapshots__/TableRenderer-TableRenderer-renders-correctly-when-headers-are-em-.snap.svg index 2cc7b1cadd..b2523badcd 100644 --- a/packages/cli/src/ui/utils/__snapshots__/TableRenderer-TableRenderer-renders-correctly-when-headers-are-em-.snap.svg +++ b/packages/cli/src/ui/utils/__snapshots__/TableRenderer-TableRenderer-renders-correctly-when-headers-are-em-.snap.svg @@ -6,13 +6,13 @@ ┌────────┬────────┐ - + ├────────┼────────┤ - Data 1 - - Data 2 + Data 1 + + Data 2 └────────┴────────┘ diff --git a/packages/cli/src/ui/utils/__snapshots__/TableRenderer-TableRenderer-renders-correctly-when-there-are-more-.snap.svg b/packages/cli/src/ui/utils/__snapshots__/TableRenderer-TableRenderer-renders-correctly-when-there-are-more-.snap.svg index 452bb1fb12..89ad1cfb4c 100644 --- a/packages/cli/src/ui/utils/__snapshots__/TableRenderer-TableRenderer-renders-correctly-when-there-are-more-.snap.svg +++ b/packages/cli/src/ui/utils/__snapshots__/TableRenderer-TableRenderer-renders-correctly-when-there-are-more-.snap.svg @@ -7,17 +7,17 @@ ┌──────────┬──────────┬──────────┐ Header 1 - + Header 2 - + Header 3 ├──────────┼──────────┼──────────┤ - Data 1 - - Data 2 - + Data 1 + + Data 2 + └──────────┴──────────┴──────────┘ diff --git a/packages/cli/src/ui/utils/__snapshots__/TableRenderer-TableRenderer-strips-bold-markers-from-headers-and-renders-them-correctly.snap.svg b/packages/cli/src/ui/utils/__snapshots__/TableRenderer-TableRenderer-strips-bold-markers-from-headers-and-renders-them-correctly.snap.svg index 6de776060b..717a8803f8 100644 --- a/packages/cli/src/ui/utils/__snapshots__/TableRenderer-TableRenderer-strips-bold-markers-from-headers-and-renders-them-correctly.snap.svg +++ b/packages/cli/src/ui/utils/__snapshots__/TableRenderer-TableRenderer-strips-bold-markers-from-headers-and-renders-them-correctly.snap.svg @@ -7,18 +7,18 @@ ┌─────────────┬───────────────┬──────────────┐ Bold Header - + Normal Header - + Another Bold ├─────────────┼───────────────┼──────────────┤ - Data 1 - - Data 2 - - Data 3 + Data 1 + + Data 2 + + Data 3 └─────────────┴───────────────┴──────────────┘ diff --git a/packages/cli/src/ui/utils/__snapshots__/TableRenderer-TableRenderer-wraps-all-long-columns-correctly.snap.svg b/packages/cli/src/ui/utils/__snapshots__/TableRenderer-TableRenderer-wraps-all-long-columns-correctly.snap.svg index 4b459cfea0..e59cefbc72 100644 --- a/packages/cli/src/ui/utils/__snapshots__/TableRenderer-TableRenderer-wraps-all-long-columns-correctly.snap.svg +++ b/packages/cli/src/ui/utils/__snapshots__/TableRenderer-TableRenderer-wraps-all-long-columns-correctly.snap.svg @@ -7,45 +7,45 @@ ┌────────────────┬────────────────┬─────────────────┐ Col 1 - + Col 2 - + Col 3 ├────────────────┼────────────────┼─────────────────┤ - This is a very - - This is also a - - And this is the + This is a very + + This is also a + + And this is the - long text that - - very long text - - third long text + long text that + + very long text + + third long text - needs wrapping - - that needs - - that needs + needs wrapping + + that needs + + that needs - in column 1 - - wrapping in - - wrapping in + in column 1 + + wrapping in + + wrapping in - - column 2 - - column 3 + + column 2 + + column 3 └────────────────┴────────────────┴─────────────────┘ diff --git a/packages/cli/src/ui/utils/__snapshots__/TableRenderer-TableRenderer-wraps-columns-with-punctuation-correctly.snap.svg b/packages/cli/src/ui/utils/__snapshots__/TableRenderer-TableRenderer-wraps-columns-with-punctuation-correctly.snap.svg index 7173ce475f..42f7b188f8 100644 --- a/packages/cli/src/ui/utils/__snapshots__/TableRenderer-TableRenderer-wraps-columns-with-punctuation-correctly.snap.svg +++ b/packages/cli/src/ui/utils/__snapshots__/TableRenderer-TableRenderer-wraps-columns-with-punctuation-correctly.snap.svg @@ -7,44 +7,44 @@ ┌───────────────────┬───────────────┬─────────────────┐ Punctuation 1 - + Punctuation 2 - + Punctuation 3 ├───────────────────┼───────────────┼─────────────────┤ - Start. Stop. - - Semi; colon: - - At@ Hash# + Start. Stop. + + Semi; colon: + + At@ Hash# - Comma, separated. - - Pipe| Slash/ - - Dollar$ + Comma, separated. + + Pipe| Slash/ + + Dollar$ - Exclamation! - - Backslash\ - - Percent% Caret^ + Exclamation! + + Backslash\ + + Percent% Caret^ - Question? - - - Ampersand& + Question? + + + Ampersand& - hyphen-ated - - - Asterisk* + hyphen-ated + + + Asterisk* └───────────────────┴───────────────┴─────────────────┘ diff --git a/packages/cli/src/ui/utils/__snapshots__/TableRenderer-TableRenderer-wraps-long-cell-content-correctly.snap.svg b/packages/cli/src/ui/utils/__snapshots__/TableRenderer-TableRenderer-wraps-long-cell-content-correctly.snap.svg index 7f7b67a7dd..2cfd46bc54 100644 --- a/packages/cli/src/ui/utils/__snapshots__/TableRenderer-TableRenderer-wraps-long-cell-content-correctly.snap.svg +++ b/packages/cli/src/ui/utils/__snapshots__/TableRenderer-TableRenderer-wraps-long-cell-content-correctly.snap.svg @@ -7,28 +7,28 @@ ┌───────┬─────────────────────────────┬───────┐ Col 1 - + Col 2 - + Col 3 ├───────┼─────────────────────────────┼───────┤ - Short - - This is a very long cell - - Short + Short + + This is a very long cell + + Short - - content that should wrap to - + + content that should wrap to + - - multiple lines - + + multiple lines + └───────┴─────────────────────────────┴───────┘ diff --git a/packages/cli/src/ui/utils/__snapshots__/TableRenderer-TableRenderer-wraps-mixed-long-and-short-columns-correctly.snap.svg b/packages/cli/src/ui/utils/__snapshots__/TableRenderer-TableRenderer-wraps-mixed-long-and-short-columns-correctly.snap.svg index 3ff0542a26..0e5dbcbb30 100644 --- a/packages/cli/src/ui/utils/__snapshots__/TableRenderer-TableRenderer-wraps-mixed-long-and-short-columns-correctly.snap.svg +++ b/packages/cli/src/ui/utils/__snapshots__/TableRenderer-TableRenderer-wraps-mixed-long-and-short-columns-correctly.snap.svg @@ -7,29 +7,29 @@ ┌───────┬──────────────────────────┬────────┐ Short - + Long - + Medium ├───────┼──────────────────────────┼────────┤ - Tiny - - This is a very long text - - Not so + Tiny + + This is a very long text + + Not so - - that definitely needs to - - long + + that definitely needs to + + long - - wrap to the next line - + + wrap to the next line + └───────┴──────────────────────────┴────────┘ diff --git a/packages/cli/src/ui/utils/__snapshots__/TableRenderer.test.tsx.snap b/packages/cli/src/ui/utils/__snapshots__/TableRenderer.test.tsx.snap index 48bc00993a..9b5c1e875a 100644 --- a/packages/cli/src/ui/utils/__snapshots__/TableRenderer.test.tsx.snap +++ b/packages/cli/src/ui/utils/__snapshots__/TableRenderer.test.tsx.snap @@ -1,5 +1,53 @@ // Vitest Snapshot v1, https://vitest.dev/guide/snapshot.html +exports[`TableRenderer > 'calculates column widths based on ren…' 1`] = ` +" +┌────────┬────────┬────────┐ +│ Col 1 │ Col 2 │ Col 3 │ +├────────┼────────┼────────┤ +│ 123456 │ Normal │ Short │ +│ Short │ 123456 │ Normal │ +│ Normal │ Short │ 123456 │ +└────────┴────────┴────────┘ +" +`; + +exports[`TableRenderer > 'calculates width correctly for conten…' 1`] = ` +" +┌───────────────────────────────────┬───────────────────────────────┬─────────────────────────────────┐ +│ Col 1 │ Col 2 │ Col 3 │ +├───────────────────────────────────┼───────────────────────────────┼─────────────────────────────────┤ +│ Visit Google (https://google.com) │ Plain Text │ More Info │ +│ Info Here │ Visit Bing (https://bing.com) │ Links │ +│ Check This │ Search │ Visit Yahoo (https://yahoo.com) │ +└───────────────────────────────────┴───────────────────────────────┴─────────────────────────────────┘ +" +`; + +exports[`TableRenderer > 'does not parse markdown inside code s…' 1`] = ` +" +┌─────────────────┬──────────────────────┬──────────────────┐ +│ Col 1 │ Col 2 │ Col 3 │ +├─────────────────┼──────────────────────┼──────────────────┤ +│ **not bold** │ _not italic_ │ ~~not strike~~ │ +│ [not link](url) │ not underline │ https://not.link │ +│ Normal Text │ More Code: *test* │ ***nested*** │ +└─────────────────┴──────────────────────┴──────────────────┘ +" +`; + +exports[`TableRenderer > 'handles nested markdown styles recurs…' 1`] = ` +" +┌─────────────────────────────┬─────────────────────────────┬─────────────────────────────┐ +│ Header 1 │ Header 2 │ Header 3 │ +├─────────────────────────────┼─────────────────────────────┼─────────────────────────────┤ +│ Bold with Italic and Strike │ Normal │ Short │ +│ Short │ Bold with Italic and Strike │ Normal │ +│ Normal │ Short │ Bold with Italic and Strike │ +└─────────────────────────────┴─────────────────────────────┴─────────────────────────────┘ +" +`; + exports[`TableRenderer > 'handles non-ASCII characters (emojis …' 1`] = ` " ┌──────────────┬────────────┬───────────────┐ @@ -44,6 +92,22 @@ exports[`TableRenderer > 'renders a table with only emojis and …' 1`] = ` " `; +exports[`TableRenderer > 'renders complex markdown in rows and …' 1`] = ` +" +┌───────────────┬─────────────────────────────┐ +│ Feature │ Markdown │ +├───────────────┼─────────────────────────────┤ +│ Bold │ Bold Text │ +│ Italic │ Italic Text │ +│ Combined │ Bold and Italic │ +│ Link │ Google (https://google.com) │ +│ Code │ const x = 1 │ +│ Strikethrough │ Strike │ +│ Underline │ Underline │ +└───────────────┴─────────────────────────────┘ +" +`; + exports[`TableRenderer > 'renders correctly when headers are em…' 1`] = ` " ┌────────┬────────┐ diff --git a/packages/cli/src/ui/utils/markdownParsingUtils.test.ts b/packages/cli/src/ui/utils/markdownParsingUtils.test.ts new file mode 100644 index 0000000000..05f19f09f7 --- /dev/null +++ b/packages/cli/src/ui/utils/markdownParsingUtils.test.ts @@ -0,0 +1,223 @@ +/** + * @license + * Copyright 2025 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { describe, it, expect, beforeAll, vi } from 'vitest'; +import chalk from 'chalk'; +import { parseMarkdownToANSI } from './InlineMarkdownRenderer.js'; + +// Mock the theme to use explicit colors instead of empty strings from the default theme. +// This ensures that ansiColorize actually applies ANSI codes that we can verify. +vi.mock('../semantic-colors.js', () => ({ + theme: { + text: { + primary: 'white', + accent: 'cyan', + link: 'blue', + }, + }, +})); + +import { theme } from '../semantic-colors.js'; +import { resolveColor, INK_NAME_TO_HEX_MAP } from '../themes/color-utils.js'; +import { themeManager, DEFAULT_THEME } from '../themes/theme-manager.js'; + +describe('parsingUtils', () => { + beforeAll(() => { + themeManager.setActiveTheme(DEFAULT_THEME.name); + themeManager.setTerminalBackground(undefined); + }); + + /** + * Helper to replicate the colorization logic for expected values. + */ + const expectedColorize = (str: string, color: string) => { + const resolved = resolveColor(color); + if (!resolved) return str; + if (resolved.startsWith('#')) return chalk.hex(resolved)(str); + const mappedHex = INK_NAME_TO_HEX_MAP[resolved]; + if (mappedHex) return chalk.hex(mappedHex)(str); + + // Simple mapping for standard colors if they aren't in the hex map + switch (resolved) { + case 'black': + return chalk.black(str); + case 'red': + return chalk.red(str); + case 'green': + return chalk.green(str); + case 'yellow': + return chalk.yellow(str); + case 'blue': + return chalk.blue(str); + case 'magenta': + return chalk.magenta(str); + case 'cyan': + return chalk.cyan(str); + case 'white': + return chalk.white(str); + case 'gray': + case 'grey': + return chalk.gray(str); + default: + return str; + } + }; + + const primary = (str: string) => expectedColorize(str, theme.text.primary); + const accent = (str: string) => expectedColorize(str, theme.text.accent); + const link = (str: string) => expectedColorize(str, theme.text.link); + + describe('parseMarkdownToANSI', () => { + it('should return plain text with default color', () => { + const input = 'Hello world'; + const output = parseMarkdownToANSI(input); + expect(output).toBe(primary(input)); + }); + + it('should handle bold text', () => { + const input = 'This is **bold** text'; + const output = parseMarkdownToANSI(input); + expect(output).toBe( + `${primary('This is ')}${chalk.bold(primary('bold'))}${primary(' text')}`, + ); + }); + + it('should handle italic text with *', () => { + const input = 'This is *italic* text'; + const output = parseMarkdownToANSI(input); + expect(output).toBe( + `${primary('This is ')}${chalk.italic(primary('italic'))}${primary(' text')}`, + ); + }); + + it('should handle italic text with _', () => { + const input = 'This is _italic_ text'; + const output = parseMarkdownToANSI(input); + expect(output).toBe( + `${primary('This is ')}${chalk.italic(primary('italic'))}${primary(' text')}`, + ); + }); + + it('should handle bold italic text with ***', () => { + const input = 'This is ***bold italic*** text'; + const output = parseMarkdownToANSI(input); + expect(output).toBe( + `${primary('This is ')}${chalk.bold(chalk.italic(primary('bold italic')))}${primary(' text')}`, + ); + }); + + it('should handle strikethrough text', () => { + const input = 'This is ~~strikethrough~~ text'; + const output = parseMarkdownToANSI(input); + expect(output).toBe( + `${primary('This is ')}${chalk.strikethrough(primary('strikethrough'))}${primary(' text')}`, + ); + }); + + it('should handle inline code', () => { + const input = 'This is `code` text'; + const output = parseMarkdownToANSI(input); + expect(output).toBe( + `${primary('This is ')}${accent('code')}${primary(' text')}`, + ); + }); + + it('should handle links', () => { + const input = 'Check [this link](https://example.com)'; + const output = parseMarkdownToANSI(input); + expect(output).toBe( + `${primary('Check ')}${primary('this link')}${primary(' (')}${link( + 'https://example.com', + )}${primary(')')}`, + ); + }); + + it('should handle bare URLs', () => { + const input = 'Visit https://google.com now'; + const output = parseMarkdownToANSI(input); + expect(output).toBe( + `${primary('Visit ')}${link('https://google.com')}${primary(' now')}`, + ); + }); + + it('should handle underline tags', () => { + const input = 'This is underlined text'; + const output = parseMarkdownToANSI(input); + expect(output).toBe( + `${primary('This is ')}${chalk.underline(primary('underlined'))}${primary(' text')}`, + ); + }); + + it('should handle complex mixed markdown', () => { + const input = '**Bold** and *italic* and `code` and [link](url)'; + const output = parseMarkdownToANSI(input); + expect(output).toBe( + `${chalk.bold(primary('Bold'))}${primary(' and ')}${chalk.italic( + primary('italic'), + )}${primary(' and ')}${accent('code')}${primary(' and ')}${primary( + 'link', + )}${primary(' (')}${link('url')}${primary(')')}`, + ); + }); + + it('should respect custom default color', () => { + const customColor = 'cyan'; + const input = 'Hello **world**'; + const output = parseMarkdownToANSI(input, customColor); + const cyan = (str: string) => expectedColorize(str, 'cyan'); + expect(output).toBe(`${cyan('Hello ')}${chalk.bold(cyan('world'))}`); + }); + + it('should handle nested formatting in bold/italic', () => { + const input = '**Bold with *italic* inside**'; + const output = parseMarkdownToANSI(input); + expect(output).toBe( + chalk.bold( + `${primary('Bold with ')}${chalk.italic(primary('italic'))}${primary( + ' inside', + )}`, + ), + ); + }); + + it('should handle hex colors as default', () => { + const hexColor = '#ff00ff'; + const input = 'Hello **world**'; + const output = parseMarkdownToANSI(input, hexColor); + const magenta = (str: string) => chalk.hex('#ff00ff')(str); + expect(output).toBe( + `${magenta('Hello ')}${chalk.bold(magenta('world'))}`, + ); + }); + + it('should override default color with link color', () => { + const input = 'Check [link](url)'; + const output = parseMarkdownToANSI(input, 'red'); + const red = (str: string) => chalk.red(str); + expect(output).toBe( + `${red('Check ')}${red('link')}${red(' (')}${link('url')}${red(')')}`, + ); + }); + + it('should override default color with accent color for code', () => { + const input = 'Code: `const x = 1`'; + const output = parseMarkdownToANSI(input, 'green'); + const green = (str: string) => chalk.green(str); + const cyan = (str: string) => chalk.cyan(str); + expect(output).toBe(`${green('Code: ')}${cyan('const x = 1')}`); + }); + + it('should handle nested formatting with color overrides', () => { + const input = '**Bold with `code` inside**'; + const output = parseMarkdownToANSI(input); + expect(output).toBe( + chalk.bold( + `${primary('Bold with ')}${accent('code')}${primary(' inside')}`, + ), + ); + }); + }); +}); From 4b7ce1fe67516eba17b63e2db1947b55ecd5cccb Mon Sep 17 00:00:00 2001 From: Tommaso Sciortino Date: Thu, 26 Feb 2026 17:50:21 -0800 Subject: [PATCH 17/41] Avoid overaggressive unescaping (#20520) --- .../src/tools/confirmation-policy.test.ts | 1 + packages/core/src/tools/line-endings.test.ts | 17 +- packages/core/src/tools/write-file.test.ts | 160 ++--- packages/core/src/tools/write-file.ts | 43 +- packages/core/src/utils/editCorrector.test.ts | 631 +----------------- packages/core/src/utils/editCorrector.ts | 627 +---------------- 6 files changed, 110 insertions(+), 1369 deletions(-) diff --git a/packages/core/src/tools/confirmation-policy.test.ts b/packages/core/src/tools/confirmation-policy.test.ts index c6ad1f5e94..a20bb611e3 100644 --- a/packages/core/src/tools/confirmation-policy.test.ts +++ b/packages/core/src/tools/confirmation-policy.test.ts @@ -67,6 +67,7 @@ describe('Tool Confirmation Policy Updates', () => { getBaseLlmClient: () => ({}), getDisableLLMCorrection: () => true, getIdeMode: () => false, + getActiveModel: () => 'test-model', getWorkspaceContext: () => ({ isPathWithinWorkspace: () => true, getDirectories: () => [rootDir], diff --git a/packages/core/src/tools/line-endings.test.ts b/packages/core/src/tools/line-endings.test.ts index f62d684712..981e602b5b 100644 --- a/packages/core/src/tools/line-endings.test.ts +++ b/packages/core/src/tools/line-endings.test.ts @@ -25,10 +25,7 @@ import fs from 'node:fs'; import os from 'node:os'; import { GeminiClient } from '../core/client.js'; import type { BaseLlmClient } from '../core/baseLlmClient.js'; -import { - ensureCorrectEdit, - ensureCorrectFileContent, -} from '../utils/editCorrector.js'; +import { ensureCorrectFileContent } from '../utils/editCorrector.js'; import { StandardFileSystemService } from '../services/fileSystemService.js'; import { WorkspaceContext } from '../utils/workspaceContext.js'; import { @@ -52,7 +49,6 @@ vi.mock('../ide/ide-client.js', () => ({ let mockGeminiClientInstance: Mocked; let mockBaseLlmClientInstance: Mocked; -const mockEnsureCorrectEdit = vi.fn(); const mockEnsureCorrectFileContent = vi.fn(); // Mock Config @@ -81,6 +77,7 @@ const mockConfigInternal = { getGeminiMdFileCount: () => 0, setGeminiMdFileCount: vi.fn(), getDisableLLMCorrection: vi.fn(() => false), + getActiveModel: () => 'test-model', validatePathAccess: vi.fn().mockReturnValue(null), getToolRegistry: () => ({ @@ -120,7 +117,6 @@ describe('Line Ending Preservation', () => { generateJson: vi.fn(), } as unknown as Mocked; - vi.mocked(ensureCorrectEdit).mockImplementation(mockEnsureCorrectEdit); vi.mocked(ensureCorrectFileContent).mockImplementation( mockEnsureCorrectFileContent, ); @@ -177,14 +173,7 @@ describe('Line Ending Preservation', () => { const proposedContent = 'line1\nline2\nline3\n'; // Mock corrections to return proposed content as-is (but usually normalized) - mockEnsureCorrectEdit.mockResolvedValue({ - params: { - file_path: filePath, - old_string: originalContent, - new_string: proposedContent, - }, - occurrences: 1, - }); + mockEnsureCorrectFileContent.mockResolvedValue(proposedContent); const params = { file_path: filePath, content: proposedContent }; const invocation = tool.build(params); diff --git a/packages/core/src/tools/write-file.test.ts b/packages/core/src/tools/write-file.test.ts index 84fd4d93d7..0b978f14f9 100644 --- a/packages/core/src/tools/write-file.test.ts +++ b/packages/core/src/tools/write-file.test.ts @@ -23,7 +23,6 @@ import type { ToolResult, } from './tools.js'; import { ToolConfirmationOutcome } from './tools.js'; -import { type EditToolParams } from './edit.js'; import type { Config } from '../config/config.js'; import { ApprovalMode } from '../policy/types.js'; import type { ToolRegistry } from './tool-registry.js'; @@ -33,11 +32,7 @@ import fs from 'node:fs'; import os from 'node:os'; import { GeminiClient } from '../core/client.js'; import type { BaseLlmClient } from '../core/baseLlmClient.js'; -import type { CorrectedEditResult } from '../utils/editCorrector.js'; -import { - ensureCorrectEdit, - ensureCorrectFileContent, -} from '../utils/editCorrector.js'; +import { ensureCorrectFileContent } from '../utils/editCorrector.js'; import { StandardFileSystemService } from '../services/fileSystemService.js'; import type { DiffUpdateResult } from '../ide/ide-client.js'; import { IdeClient } from '../ide/ide-client.js'; @@ -61,7 +56,6 @@ vi.mock('../ide/ide-client.js', () => ({ let mockGeminiClientInstance: Mocked; let mockBaseLlmClientInstance: Mocked; let mockConfig: Config; -const mockEnsureCorrectEdit = vi.fn(); const mockEnsureCorrectFileContent = vi.fn(); const mockIdeClient = { openDiff: vi.fn(), @@ -69,7 +63,6 @@ const mockIdeClient = { }; // Wire up the mocked functions to be used by the actual module imports -vi.mocked(ensureCorrectEdit).mockImplementation(mockEnsureCorrectEdit); vi.mocked(ensureCorrectFileContent).mockImplementation( mockEnsureCorrectFileContent, ); @@ -110,6 +103,7 @@ const mockConfigInternal = { }) as unknown as ToolRegistry, isInteractive: () => false, getDisableLLMCorrection: vi.fn(() => true), + getActiveModel: () => 'test-model', storage: { getProjectTempDir: vi.fn().mockReturnValue('/tmp/project'), }, @@ -179,7 +173,6 @@ describe('WriteFileTool', () => { generateJson: vi.fn(), } as unknown as Mocked; - vi.mocked(ensureCorrectEdit).mockImplementation(mockEnsureCorrectEdit); vi.mocked(ensureCorrectFileContent).mockImplementation( mockEnsureCorrectFileContent, ); @@ -199,28 +192,9 @@ describe('WriteFileTool', () => { // Reset mocks before each test mockConfigInternal.getApprovalMode.mockReturnValue(ApprovalMode.DEFAULT); mockConfigInternal.setApprovalMode.mockClear(); - mockEnsureCorrectEdit.mockReset(); mockEnsureCorrectFileContent.mockReset(); // Default mock implementations that return valid structures - mockEnsureCorrectEdit.mockImplementation( - async ( - filePath: string, - _currentContent: string, - params: EditToolParams, - _client: GeminiClient, - _baseClient: BaseLlmClient, - signal?: AbortSignal, - ): Promise => { - if (signal?.aborted) { - return Promise.reject(new Error('Aborted')); - } - return Promise.resolve({ - params: { ...params, new_string: params.new_string ?? '' }, - occurrences: 1, - }); - }, - ); mockEnsureCorrectFileContent.mockImplementation( async ( content: string, @@ -369,15 +343,43 @@ describe('WriteFileTool', () => { mockBaseLlmClientInstance, abortSignal, true, + true, // aggressiveUnescape ); - expect(mockEnsureCorrectEdit).not.toHaveBeenCalled(); expect(result.correctedContent).toBe(correctedContent); expect(result.originalContent).toBe(''); expect(result.fileExists).toBe(false); expect(result.error).toBeUndefined(); }); - it('should call ensureCorrectEdit for an existing file', async () => { + it('should set aggressiveUnescape to false for gemini-3 models', async () => { + const filePath = path.join(rootDir, 'gemini3_file.txt'); + const proposedContent = 'Proposed new content.'; + const abortSignal = new AbortController().signal; + + const mockGemini3Config = { + ...mockConfig, + getActiveModel: () => 'gemini-3.0-pro', + } as unknown as Config; + + mockEnsureCorrectFileContent.mockResolvedValue('Corrected new content.'); + + await getCorrectedFileContent( + mockGemini3Config, + filePath, + proposedContent, + abortSignal, + ); + + expect(mockEnsureCorrectFileContent).toHaveBeenCalledWith( + proposedContent, + mockBaseLlmClientInstance, + abortSignal, + true, + false, // aggressiveUnescape + ); + }); + + it('should call ensureCorrectFileContent for an existing file', async () => { const filePath = path.join(rootDir, 'existing_corrected_file.txt'); const originalContent = 'Original existing content.'; const proposedContent = 'Proposed replacement content.'; @@ -386,14 +388,7 @@ describe('WriteFileTool', () => { fs.writeFileSync(filePath, originalContent, 'utf8'); // Ensure this mock is active and returns the correct structure - mockEnsureCorrectEdit.mockResolvedValue({ - params: { - file_path: filePath, - old_string: originalContent, - new_string: correctedProposedContent, - }, - occurrences: 1, - } as CorrectedEditResult); + mockEnsureCorrectFileContent.mockResolvedValue(correctedProposedContent); const result = await getCorrectedFileContent( mockConfig, @@ -402,20 +397,13 @@ describe('WriteFileTool', () => { abortSignal, ); - expect(mockEnsureCorrectEdit).toHaveBeenCalledWith( - filePath, - originalContent, - { - old_string: originalContent, - new_string: proposedContent, - file_path: filePath, - }, - mockGeminiClientInstance, + expect(mockEnsureCorrectFileContent).toHaveBeenCalledWith( + proposedContent, mockBaseLlmClientInstance, abortSignal, true, + true, // aggressiveUnescape ); - expect(mockEnsureCorrectFileContent).not.toHaveBeenCalled(); expect(result.correctedContent).toBe(correctedProposedContent); expect(result.originalContent).toBe(originalContent); expect(result.fileExists).toBe(true); @@ -441,7 +429,6 @@ describe('WriteFileTool', () => { ); expect(fsService.readTextFile).toHaveBeenCalledWith(filePath); - expect(mockEnsureCorrectEdit).not.toHaveBeenCalled(); expect(mockEnsureCorrectFileContent).not.toHaveBeenCalled(); expect(result.correctedContent).toBe(proposedContent); expect(result.originalContent).toBe(''); @@ -492,6 +479,7 @@ describe('WriteFileTool', () => { mockBaseLlmClientInstance, abortSignal, true, + true, // aggressiveUnescape ); expect(confirmation).toEqual( expect.objectContaining({ @@ -516,14 +504,7 @@ describe('WriteFileTool', () => { 'Corrected replacement for confirmation.'; fs.writeFileSync(filePath, originalContent, 'utf8'); - mockEnsureCorrectEdit.mockResolvedValue({ - params: { - file_path: filePath, - old_string: originalContent, - new_string: correctedProposedContent, - }, - occurrences: 1, - }); + mockEnsureCorrectFileContent.mockResolvedValue(correctedProposedContent); const params = { file_path: filePath, content: proposedContent }; const invocation = tool.build(params); @@ -531,18 +512,12 @@ describe('WriteFileTool', () => { abortSignal, )) as ToolEditConfirmationDetails; - expect(mockEnsureCorrectEdit).toHaveBeenCalledWith( - filePath, - originalContent, - { - old_string: originalContent, - new_string: proposedContent, - file_path: filePath, - }, - mockGeminiClientInstance, + expect(mockEnsureCorrectFileContent).toHaveBeenCalledWith( + proposedContent, mockBaseLlmClientInstance, abortSignal, true, + true, // aggressiveUnescape ); expect(confirmation).toEqual( expect.objectContaining({ @@ -738,6 +713,7 @@ describe('WriteFileTool', () => { mockBaseLlmClientInstance, abortSignal, true, + true, // aggressiveUnescape ); expect(result.llmContent).toMatch( /Successfully created and wrote to new file/, @@ -768,14 +744,7 @@ describe('WriteFileTool', () => { const correctedProposedContent = 'Corrected overwrite for execute.'; fs.writeFileSync(filePath, initialContent, 'utf8'); - mockEnsureCorrectEdit.mockResolvedValue({ - params: { - file_path: filePath, - old_string: initialContent, - new_string: correctedProposedContent, - }, - occurrences: 1, - }); + mockEnsureCorrectFileContent.mockResolvedValue(correctedProposedContent); const params = { file_path: filePath, content: proposedContent }; const invocation = tool.build(params); @@ -784,18 +753,12 @@ describe('WriteFileTool', () => { const result = await invocation.execute(abortSignal); - expect(mockEnsureCorrectEdit).toHaveBeenCalledWith( - filePath, - initialContent, - { - old_string: initialContent, - new_string: proposedContent, - file_path: filePath, - }, - mockGeminiClientInstance, + expect(mockEnsureCorrectFileContent).toHaveBeenCalledWith( + proposedContent, mockBaseLlmClientInstance, abortSignal, true, + true, // aggressiveUnescape ); expect(result.llmContent).toMatch(/Successfully overwrote file/); const writtenContent = await fsService.readTextFile(filePath); @@ -892,14 +855,7 @@ describe('WriteFileTool', () => { newLines[50] = 'Line 51 Modified'; // Modify one line in the middle const newContent = newLines.join('\n'); - mockEnsureCorrectEdit.mockResolvedValue({ - params: { - file_path: filePath, - old_string: originalContent, - new_string: newContent, - }, - occurrences: 1, - }); + mockEnsureCorrectFileContent.mockResolvedValue(newContent); const params = { file_path: filePath, content: newContent }; const invocation = tool.build(params); @@ -1072,13 +1028,13 @@ describe('WriteFileTool', () => { mockBaseLlmClientInstance, abortSignal, true, + true, // aggressiveUnescape ); - expect(mockEnsureCorrectEdit).not.toHaveBeenCalled(); expect(result.correctedContent).toBe(proposedContent); expect(result.fileExists).toBe(false); }); - it('should call ensureCorrectEdit with disableLLMCorrection=true for an existing file when disabled', async () => { + it('should call ensureCorrectFileContent with disableLLMCorrection=true for an existing file when disabled', async () => { const filePath = path.join(rootDir, 'existing_file_no_correction.txt'); const originalContent = 'Original content.'; const proposedContent = 'Proposed content.'; @@ -1086,14 +1042,7 @@ describe('WriteFileTool', () => { mockConfigInternal.getDisableLLMCorrection.mockReturnValue(true); // Ensure the mock returns the content passed to it - mockEnsureCorrectEdit.mockResolvedValue({ - params: { - file_path: filePath, - old_string: originalContent, - new_string: proposedContent, - }, - occurrences: 1, - }); + mockEnsureCorrectFileContent.mockResolvedValue(proposedContent); const result = await getCorrectedFileContent( mockConfig, @@ -1102,16 +1051,13 @@ describe('WriteFileTool', () => { abortSignal, ); - expect(mockEnsureCorrectEdit).toHaveBeenCalledWith( - filePath, - originalContent, - expect.anything(), // params object - mockGeminiClientInstance, + expect(mockEnsureCorrectFileContent).toHaveBeenCalledWith( + proposedContent, mockBaseLlmClientInstance, abortSignal, true, + true, // aggressiveUnescape ); - expect(mockEnsureCorrectFileContent).not.toHaveBeenCalled(); expect(result.correctedContent).toBe(proposedContent); expect(result.originalContent).toBe(originalContent); expect(result.fileExists).toBe(true); diff --git a/packages/core/src/tools/write-file.ts b/packages/core/src/tools/write-file.ts index d7708d767a..1c8a230001 100644 --- a/packages/core/src/tools/write-file.ts +++ b/packages/core/src/tools/write-file.ts @@ -26,10 +26,7 @@ import { BaseDeclarativeTool, BaseToolInvocation, Kind } from './tools.js'; import { ToolErrorType } from './tool-error.js'; import { makeRelative, shortenPath } from '../utils/paths.js'; import { getErrorMessage, isNodeError } from '../utils/errors.js'; -import { - ensureCorrectEdit, - ensureCorrectFileContent, -} from '../utils/editCorrector.js'; +import { ensureCorrectFileContent } from '../utils/editCorrector.js'; import { detectLineEnding } from '../utils/textUtils.js'; import { DEFAULT_DIFF_OPTIONS, getDiffStat } from './diffOptions.js'; import { getDiffContextSnippet } from './diff-utils.js'; @@ -48,6 +45,7 @@ import { debugLogger } from '../utils/debugLogger.js'; import { WRITE_FILE_DEFINITION } from './definitions/coreTools.js'; import { resolveToolDeclaration } from './definitions/resolver.js'; import { detectOmissionPlaceholders } from './omissionPlaceholderDetector.js'; +import { isGemini3Model } from '../config/models.js'; /** * Parameters for the WriteFile tool @@ -113,35 +111,16 @@ export async function getCorrectedFileContent( } } - // If readError is set, we have returned. - // So, file was either read successfully (fileExists=true, originalContent set) - // or it was ENOENT (fileExists=false, originalContent=''). + const aggressiveUnescape = !isGemini3Model(config.getActiveModel()); + + correctedContent = await ensureCorrectFileContent( + proposedContent, + config.getBaseLlmClient(), + abortSignal, + config.getDisableLLMCorrection(), + aggressiveUnescape, + ); - if (fileExists) { - // This implies originalContent is available - const { params: correctedParams } = await ensureCorrectEdit( - filePath, - originalContent, - { - old_string: originalContent, // Treat entire current content as old_string - new_string: proposedContent, - file_path: filePath, - }, - config.getGeminiClient(), - config.getBaseLlmClient(), - abortSignal, - config.getDisableLLMCorrection(), - ); - correctedContent = correctedParams.new_string; - } else { - // This implies new file (ENOENT) - correctedContent = await ensureCorrectFileContent( - proposedContent, - config.getBaseLlmClient(), - abortSignal, - config.getDisableLLMCorrection(), - ); - } return { originalContent, correctedContent, fileExists }; } diff --git a/packages/core/src/utils/editCorrector.test.ts b/packages/core/src/utils/editCorrector.test.ts index 35b126a5ea..533b49b9e4 100644 --- a/packages/core/src/utils/editCorrector.test.ts +++ b/packages/core/src/utils/editCorrector.test.ts @@ -5,10 +5,8 @@ */ /* eslint-disable @typescript-eslint/no-explicit-any */ -import type { Mock, Mocked } from 'vitest'; +import type { Mocked } from 'vitest'; import { vi, describe, it, expect, beforeEach } from 'vitest'; -import * as fs from 'node:fs'; -import { EDIT_TOOL_NAME } from '../tools/tool-names.js'; import type { BaseLlmClient } from '../core/baseLlmClient.js'; // MOCKS @@ -16,75 +14,16 @@ let callCount = 0; const mockResponses: any[] = []; let mockGenerateJson: any; -let mockStartChat: any; -let mockSendMessageStream: any; -vi.mock('fs', () => ({ - statSync: vi.fn(), - mkdirSync: vi.fn(), - createWriteStream: vi.fn(() => ({ - write: vi.fn(), - on: vi.fn(), - })), -})); - -vi.mock('../core/client.js', () => ({ - GeminiClient: vi.fn().mockImplementation(function ( - this: any, - _config: Config, - ) { - this.startChat = (...params: any[]) => mockStartChat(...params); - this.sendMessageStream = (...params: any[]) => - mockSendMessageStream(...params); - return this; - }), -})); // END MOCKS import { - countOccurrences, - ensureCorrectEdit, ensureCorrectFileContent, unescapeStringForGeminiBug, resetEditCorrectorCaches_TEST_ONLY, } from './editCorrector.js'; -import { GeminiClient } from '../core/client.js'; -import type { Config } from '../config/config.js'; -import { ToolRegistry } from '../tools/tool-registry.js'; - -vi.mock('../tools/tool-registry.js'); describe('editCorrector', () => { - describe('countOccurrences', () => { - it('should return 0 for empty string', () => { - expect(countOccurrences('', 'a')).toBe(0); - }); - it('should return 0 for empty substring', () => { - expect(countOccurrences('abc', '')).toBe(0); - }); - it('should return 0 if substring is not found', () => { - expect(countOccurrences('abc', 'd')).toBe(0); - }); - it('should return 1 if substring is found once', () => { - expect(countOccurrences('abc', 'b')).toBe(1); - }); - it('should return correct count for multiple occurrences', () => { - expect(countOccurrences('ababa', 'a')).toBe(3); - expect(countOccurrences('ababab', 'ab')).toBe(3); - }); - it('should count non-overlapping occurrences', () => { - expect(countOccurrences('aaaaa', 'aa')).toBe(2); - expect(countOccurrences('ababab', 'aba')).toBe(1); - }); - it('should correctly count occurrences when substring is longer', () => { - expect(countOccurrences('abc', 'abcdef')).toBe(0); - }); - it('should be case-sensitive', () => { - expect(countOccurrences('abcABC', 'a')).toBe(1); - expect(countOccurrences('abcABC', 'A')).toBe(1); - }); - }); - describe('unescapeStringForGeminiBug', () => { it('should unescape common sequences', () => { expect(unescapeStringForGeminiBug('\\n')).toBe('\n'); @@ -156,542 +95,6 @@ describe('editCorrector', () => { }); }); - describe('ensureCorrectEdit', () => { - let mockGeminiClientInstance: Mocked; - let mockBaseLlmClientInstance: Mocked; - let mockToolRegistry: Mocked; - let mockConfigInstance: Config; - const abortSignal = new AbortController().signal; - - beforeEach(() => { - mockToolRegistry = new ToolRegistry( - {} as Config, - {} as any, - ) as Mocked; - const configParams = { - apiKey: 'test-api-key', - model: 'test-model', - sandbox: false as boolean | string, - targetDir: '/test', - debugMode: false, - question: undefined as string | undefined, - - coreTools: undefined as string[] | undefined, - toolDiscoveryCommand: undefined as string | undefined, - toolCallCommand: undefined as string | undefined, - mcpServerCommand: undefined as string | undefined, - mcpServers: undefined as Record | undefined, - userAgent: 'test-agent', - userMemory: '', - geminiMdFileCount: 0, - alwaysSkipModificationConfirmation: false, - }; - mockConfigInstance = { - ...configParams, - getApiKey: vi.fn(() => configParams.apiKey), - getModel: vi.fn(() => configParams.model), - getSandbox: vi.fn(() => configParams.sandbox), - getTargetDir: vi.fn(() => configParams.targetDir), - getToolRegistry: vi.fn(() => mockToolRegistry), - getDebugMode: vi.fn(() => configParams.debugMode), - getQuestion: vi.fn(() => configParams.question), - - getCoreTools: vi.fn(() => configParams.coreTools), - getToolDiscoveryCommand: vi.fn(() => configParams.toolDiscoveryCommand), - getToolCallCommand: vi.fn(() => configParams.toolCallCommand), - getMcpServerCommand: vi.fn(() => configParams.mcpServerCommand), - getMcpServers: vi.fn(() => configParams.mcpServers), - getUserAgent: vi.fn(() => configParams.userAgent), - getUserMemory: vi.fn(() => configParams.userMemory), - setUserMemory: vi.fn((mem: string) => { - configParams.userMemory = mem; - }), - getGeminiMdFileCount: vi.fn(() => configParams.geminiMdFileCount), - setGeminiMdFileCount: vi.fn((count: number) => { - configParams.geminiMdFileCount = count; - }), - getAlwaysSkipModificationConfirmation: vi.fn( - () => configParams.alwaysSkipModificationConfirmation, - ), - setAlwaysSkipModificationConfirmation: vi.fn((skip: boolean) => { - configParams.alwaysSkipModificationConfirmation = skip; - }), - getQuotaErrorOccurred: vi.fn().mockReturnValue(false), - setQuotaErrorOccurred: vi.fn(), - } as unknown as Config; - - callCount = 0; - mockResponses.length = 0; - mockGenerateJson = vi - .fn() - .mockImplementation((_contents, _schema, signal) => { - // Check if the signal is aborted. If so, throw an error or return a specific response. - if (signal && signal.aborted) { - return Promise.reject(new Error('Aborted')); // Or some other specific error/response - } - const response = mockResponses[callCount]; - callCount++; - if (response === undefined) return Promise.resolve({}); - return Promise.resolve(response); - }); - mockStartChat = vi.fn(); - mockSendMessageStream = vi.fn(); - - mockGeminiClientInstance = new GeminiClient( - mockConfigInstance, - ) as Mocked; - mockGeminiClientInstance.getHistory = vi.fn().mockReturnValue([]); - mockBaseLlmClientInstance = { - generateJson: mockGenerateJson, - config: { - generationConfigService: { - getResolvedConfig: vi.fn().mockReturnValue({ - model: 'edit-corrector', - generateContentConfig: {}, - }), - }, - }, - } as unknown as Mocked; - resetEditCorrectorCaches_TEST_ONLY(); - }); - - describe('Scenario Group 1: originalParams.old_string matches currentContent directly', () => { - it('Test 1.1: old_string (no literal \\), new_string (escaped by Gemini) -> new_string unescaped', async () => { - const currentContent = 'This is a test string to find me.'; - const originalParams = { - file_path: '/test/file.txt', - old_string: 'find me', - new_string: 'replace with \\"this\\"', - }; - mockResponses.push({ - corrected_new_string_escaping: 'replace with "this"', - }); - const result = await ensureCorrectEdit( - '/test/file.txt', - currentContent, - originalParams, - mockGeminiClientInstance, - mockBaseLlmClientInstance, - abortSignal, - false, - ); - expect(mockGenerateJson).toHaveBeenCalledTimes(1); - expect(result.params.new_string).toBe('replace with "this"'); - expect(result.params.old_string).toBe('find me'); - expect(result.occurrences).toBe(1); - }); - it('Test 1.2: old_string (no literal \\), new_string (correctly formatted) -> new_string unchanged', async () => { - const currentContent = 'This is a test string to find me.'; - const originalParams = { - file_path: '/test/file.txt', - old_string: 'find me', - new_string: 'replace with this', - }; - const result = await ensureCorrectEdit( - '/test/file.txt', - currentContent, - originalParams, - mockGeminiClientInstance, - mockBaseLlmClientInstance, - abortSignal, - false, - ); - expect(mockGenerateJson).toHaveBeenCalledTimes(0); - expect(result.params.new_string).toBe('replace with this'); - expect(result.params.old_string).toBe('find me'); - expect(result.occurrences).toBe(1); - }); - it('Test 1.3: old_string (with literal \\), new_string (escaped by Gemini) -> new_string unchanged (still escaped)', async () => { - const currentContent = 'This is a test string to find\\me.'; - const originalParams = { - file_path: '/test/file.txt', - old_string: 'find\\me', - new_string: 'replace with \\"this\\"', - }; - mockResponses.push({ - corrected_new_string_escaping: 'replace with "this"', - }); - const result = await ensureCorrectEdit( - '/test/file.txt', - currentContent, - originalParams, - mockGeminiClientInstance, - mockBaseLlmClientInstance, - abortSignal, - false, - ); - expect(mockGenerateJson).toHaveBeenCalledTimes(1); - expect(result.params.new_string).toBe('replace with "this"'); - expect(result.params.old_string).toBe('find\\me'); - expect(result.occurrences).toBe(1); - }); - it('Test 1.4: old_string (with literal \\), new_string (correctly formatted) -> new_string unchanged', async () => { - const currentContent = 'This is a test string to find\\me.'; - const originalParams = { - file_path: '/test/file.txt', - old_string: 'find\\me', - new_string: 'replace with this', - }; - const result = await ensureCorrectEdit( - '/test/file.txt', - currentContent, - originalParams, - mockGeminiClientInstance, - mockBaseLlmClientInstance, - abortSignal, - false, - ); - expect(mockGenerateJson).toHaveBeenCalledTimes(0); - expect(result.params.new_string).toBe('replace with this'); - expect(result.params.old_string).toBe('find\\me'); - expect(result.occurrences).toBe(1); - }); - }); - - describe('Scenario Group 2: originalParams.old_string does NOT match, but unescapeStringForGeminiBug(originalParams.old_string) DOES match', () => { - it('Test 2.1: old_string (over-escaped, no intended literal \\), new_string (escaped by Gemini) -> new_string unescaped', async () => { - const currentContent = 'This is a test string to find "me".'; - const originalParams = { - file_path: '/test/file.txt', - old_string: 'find \\"me\\"', - new_string: 'replace with \\"this\\"', - }; - mockResponses.push({ corrected_new_string: 'replace with "this"' }); - const result = await ensureCorrectEdit( - '/test/file.txt', - currentContent, - originalParams, - mockGeminiClientInstance, - mockBaseLlmClientInstance, - abortSignal, - false, - ); - expect(mockGenerateJson).toHaveBeenCalledTimes(1); - expect(result.params.new_string).toBe('replace with "this"'); - expect(result.params.old_string).toBe('find "me"'); - expect(result.occurrences).toBe(1); - }); - it('Test 2.2: old_string (over-escaped, no intended literal \\), new_string (correctly formatted) -> new_string unescaped (harmlessly)', async () => { - const currentContent = 'This is a test string to find "me".'; - const originalParams = { - file_path: '/test/file.txt', - old_string: 'find \\"me\\"', - new_string: 'replace with this', - }; - const result = await ensureCorrectEdit( - '/test/file.txt', - currentContent, - originalParams, - mockGeminiClientInstance, - mockBaseLlmClientInstance, - abortSignal, - false, - ); - expect(mockGenerateJson).toHaveBeenCalledTimes(0); - expect(result.params.new_string).toBe('replace with this'); - expect(result.params.old_string).toBe('find "me"'); - expect(result.occurrences).toBe(1); - }); - it('Test 2.3: old_string (over-escaped, with intended literal \\), new_string (simple) -> new_string corrected', async () => { - const currentContent = 'This is a test string to find \\me.'; - const originalParams = { - file_path: '/test/file.txt', - old_string: 'find \\\\me', - new_string: 'replace with foobar', - }; - const result = await ensureCorrectEdit( - '/test/file.txt', - currentContent, - originalParams, - mockGeminiClientInstance, - mockBaseLlmClientInstance, - abortSignal, - false, - ); - expect(mockGenerateJson).toHaveBeenCalledTimes(0); - expect(result.params.new_string).toBe('replace with foobar'); - expect(result.params.old_string).toBe('find \\me'); - expect(result.occurrences).toBe(1); - }); - }); - - describe('Scenario Group 3: LLM Correction Path', () => { - it('Test 3.1: old_string (no literal \\), new_string (escaped by Gemini), LLM re-escapes new_string -> final new_string is double unescaped', async () => { - const currentContent = 'This is a test string to corrected find me.'; - const originalParams = { - file_path: '/test/file.txt', - old_string: 'find me', - new_string: 'replace with \\\\"this\\\\"', - }; - const llmNewString = 'LLM says replace with "that"'; - mockResponses.push({ corrected_new_string_escaping: llmNewString }); - const result = await ensureCorrectEdit( - '/test/file.txt', - currentContent, - originalParams, - mockGeminiClientInstance, - mockBaseLlmClientInstance, - abortSignal, - false, - ); - expect(mockGenerateJson).toHaveBeenCalledTimes(1); - expect(result.params.new_string).toBe(llmNewString); - expect(result.params.old_string).toBe('find me'); - expect(result.occurrences).toBe(1); - }); - it('Test 3.2: old_string (with literal \\), new_string (escaped by Gemini), LLM re-escapes new_string -> final new_string is unescaped once', async () => { - const currentContent = 'This is a test string to corrected find me.'; - const originalParams = { - file_path: '/test/file.txt', - old_string: 'find\\me', - new_string: 'replace with \\\\"this\\\\"', - }; - const llmCorrectedOldString = 'corrected find me'; - const llmNewString = 'LLM says replace with "that"'; - mockResponses.push({ corrected_target_snippet: llmCorrectedOldString }); - mockResponses.push({ corrected_new_string: llmNewString }); - const result = await ensureCorrectEdit( - '/test/file.txt', - currentContent, - originalParams, - mockGeminiClientInstance, - mockBaseLlmClientInstance, - abortSignal, - false, - ); - expect(mockGenerateJson).toHaveBeenCalledTimes(2); - expect(result.params.new_string).toBe(llmNewString); - expect(result.params.old_string).toBe(llmCorrectedOldString); - expect(result.occurrences).toBe(1); - }); - it('Test 3.3: old_string needs LLM, new_string is fine -> old_string corrected, new_string original', async () => { - const currentContent = 'This is a test string to be corrected.'; - const originalParams = { - file_path: '/test/file.txt', - old_string: 'fiiind me', - new_string: 'replace with "this"', - }; - const llmCorrectedOldString = 'to be corrected'; - mockResponses.push({ corrected_target_snippet: llmCorrectedOldString }); - const result = await ensureCorrectEdit( - '/test/file.txt', - currentContent, - originalParams, - mockGeminiClientInstance, - mockBaseLlmClientInstance, - abortSignal, - false, - ); - expect(mockGenerateJson).toHaveBeenCalledTimes(1); - expect(result.params.new_string).toBe('replace with "this"'); - expect(result.params.old_string).toBe(llmCorrectedOldString); - expect(result.occurrences).toBe(1); - }); - it('Test 3.4: LLM correction path, correctNewString returns the originalNewString it was passed (which was unescaped) -> final new_string is unescaped', async () => { - const currentContent = 'This is a test string to corrected find me.'; - const originalParams = { - file_path: '/test/file.txt', - old_string: 'find me', - new_string: 'replace with \\\\"this\\\\"', - }; - const newStringForLLMAndReturnedByLLM = 'replace with "this"'; - mockResponses.push({ - corrected_new_string_escaping: newStringForLLMAndReturnedByLLM, - }); - const result = await ensureCorrectEdit( - '/test/file.txt', - currentContent, - originalParams, - mockGeminiClientInstance, - mockBaseLlmClientInstance, - abortSignal, - false, - ); - expect(mockGenerateJson).toHaveBeenCalledTimes(1); - expect(result.params.new_string).toBe(newStringForLLMAndReturnedByLLM); - expect(result.occurrences).toBe(1); - }); - }); - - describe('Scenario Group 4: No Match Found / Multiple Matches', () => { - it('Test 4.1: No version of old_string (original, unescaped, LLM-corrected) matches -> returns original params, 0 occurrences', async () => { - const currentContent = 'This content has nothing to find.'; - const originalParams = { - file_path: '/test/file.txt', - old_string: 'nonexistent string', - new_string: 'some new string', - }; - mockResponses.push({ corrected_target_snippet: 'still nonexistent' }); - const result = await ensureCorrectEdit( - '/test/file.txt', - currentContent, - originalParams, - mockGeminiClientInstance, - mockBaseLlmClientInstance, - abortSignal, - false, - ); - expect(mockGenerateJson).toHaveBeenCalledTimes(1); - expect(result.params).toEqual(originalParams); - expect(result.occurrences).toBe(0); - }); - it('Test 4.2: unescapedOldStringAttempt results in >1 occurrences -> returns original params, count occurrences', async () => { - const currentContent = - 'This content has find "me" and also find "me" again.'; - const originalParams = { - file_path: '/test/file.txt', - old_string: 'find "me"', - new_string: 'some new string', - }; - const result = await ensureCorrectEdit( - '/test/file.txt', - currentContent, - originalParams, - mockGeminiClientInstance, - mockBaseLlmClientInstance, - abortSignal, - false, - ); - expect(mockGenerateJson).toHaveBeenCalledTimes(0); - expect(result.params).toEqual(originalParams); - expect(result.occurrences).toBe(2); - }); - }); - - describe('Scenario Group 5: Specific unescapeStringForGeminiBug checks (integrated into ensureCorrectEdit)', () => { - it('Test 5.1: old_string needs LLM to become currentContent, new_string also needs correction', async () => { - const currentContent = 'const x = "a\nbc\\"def\\"'; - const originalParams = { - file_path: '/test/file.txt', - old_string: 'const x = \\"a\\nbc\\\\"def\\\\"', - new_string: 'const y = \\"new\\nval\\\\"content\\\\"', - }; - const expectedFinalNewString = 'const y = "new\nval\\"content\\"'; - mockResponses.push({ corrected_target_snippet: currentContent }); - mockResponses.push({ corrected_new_string: expectedFinalNewString }); - const result = await ensureCorrectEdit( - '/test/file.txt', - currentContent, - originalParams, - mockGeminiClientInstance, - mockBaseLlmClientInstance, - abortSignal, - false, - ); - expect(mockGenerateJson).toHaveBeenCalledTimes(2); - expect(result.params.old_string).toBe(currentContent); - expect(result.params.new_string).toBe(expectedFinalNewString); - expect(result.occurrences).toBe(1); - }); - }); - - describe('Scenario Group 6: Concurrent Edits', () => { - it('Test 6.1: should return early if file was modified by another process', async () => { - const filePath = '/test/file.txt'; - const currentContent = - 'This content has been modified by someone else.'; - const originalParams = { - file_path: filePath, - old_string: 'nonexistent string', - new_string: 'some new string', - }; - - const now = Date.now(); - const lastEditTime = now - 5000; // 5 seconds ago - - // Mock the file's modification time to be recent - vi.spyOn(fs, 'statSync').mockReturnValue({ - mtimeMs: now, - } as fs.Stats); - - // Mock the last edit timestamp from our history to be in the past - const history = [ - { - role: 'model', - parts: [ - { - functionResponse: { - name: EDIT_TOOL_NAME, - id: `${EDIT_TOOL_NAME}-${lastEditTime}-123`, - response: { - output: { - llmContent: `Successfully modified file: ${filePath}`, - }, - }, - }, - }, - ], - }, - ]; - (mockGeminiClientInstance.getHistory as Mock).mockReturnValue(history); - - const result = await ensureCorrectEdit( - filePath, - currentContent, - originalParams, - mockGeminiClientInstance, - mockBaseLlmClientInstance, - abortSignal, - false, - ); - - expect(result.occurrences).toBe(0); - expect(result.params).toEqual(originalParams); - }); - }); - - describe('Scenario Group 7: Trimming with Newline Preservation', () => { - it('Test 7.1: should preserve trailing newlines in new_string when trimming is applied', async () => { - const currentContent = ' find me'; // Matches old_string initially - const originalParams = { - file_path: '/test/file.txt', - old_string: ' find me', // Matches, but has whitespace to trim - new_string: ' replaced\n\n', // Needs trimming but preserve newlines - }; - - const result = await ensureCorrectEdit( - '/test/file.txt', - currentContent, - originalParams, - mockGeminiClientInstance, - mockBaseLlmClientInstance, - abortSignal, - false, - ); - - // old_string should be trimmed to 'find me' because 'find me' also exists uniquely in ' find me' - expect(result.params.old_string).toBe('find me'); - // new_string should be trimmed of spaces but keep ALL newlines - expect(result.params.new_string).toBe('replaced\n\n'); - expect(result.occurrences).toBe(1); - }); - - it('Test 7.2: should handle trailing newlines separated by spaces (regression fix)', async () => { - const currentContent = 'find me '; // Matches old_string initially - const originalParams = { - file_path: '/test/file.txt', - old_string: 'find me ', // Trailing space - new_string: 'replaced \n \n', // Trailing newlines with spaces - }; - - const result = await ensureCorrectEdit( - '/test/file.txt', - currentContent, - originalParams, - mockGeminiClientInstance, - mockBaseLlmClientInstance, - abortSignal, - false, - ); - - expect(result.params.old_string).toBe('find me'); - // Should capture both newlines and join them, stripping the space between - expect(result.params.new_string).toBe('replaced\n\n'); - expect(result.occurrences).toBe(1); - }); - }); - }); - describe('ensureCorrectFileContent', () => { let mockBaseLlmClientInstance: Mocked; const abortSignal = new AbortController().signal; @@ -811,5 +214,37 @@ describe('editCorrector', () => { expect(result).toBe(correctedContent); }); + + it('should return unescaped content when LLM is disabled and aggressiveUnescape is true', async () => { + const content = 'LaTeX command \\\\title{Example}'; + // unescapeStringForGeminiBug would change \\\\title to \title (literal tab and "itle") + const expected = 'LaTeX command \title{Example}'; + + const result = await ensureCorrectFileContent( + content, + mockBaseLlmClientInstance, + abortSignal, + true, // disableLLMCorrection + true, // aggressiveUnescape + ); + + expect(result).toBe(expected); + expect(mockGenerateJson).not.toHaveBeenCalled(); + }); + + it('should return original content when LLM is disabled and aggressiveUnescape is false', async () => { + const content = 'LaTeX command \\\\title{Example}'; + + const result = await ensureCorrectFileContent( + content, + mockBaseLlmClientInstance, + abortSignal, + true, // disableLLMCorrection + false, // aggressiveUnescape + ); + + expect(result).toBe(content); + expect(mockGenerateJson).not.toHaveBeenCalled(); + }); }); }); diff --git a/packages/core/src/utils/editCorrector.ts b/packages/core/src/utils/editCorrector.ts index 660bff0b17..f8ff81b97e 100644 --- a/packages/core/src/utils/editCorrector.ts +++ b/packages/core/src/utils/editCorrector.ts @@ -5,21 +5,7 @@ */ import type { Content } from '@google/genai'; -import type { GeminiClient } from '../core/client.js'; import type { BaseLlmClient } from '../core/baseLlmClient.js'; -import type { EditToolParams } from '../tools/edit.js'; -import { - EDIT_TOOL_NAME, - GREP_TOOL_NAME, - READ_FILE_TOOL_NAME, - READ_MANY_FILES_TOOL_NAME, - WRITE_FILE_TOOL_NAME, -} from '../tools/tool-names.js'; -import { - isFunctionResponse, - isFunctionCall, -} from '../utils/messageInspectors.js'; -import * as fs from 'node:fs'; import { promptIdContext } from './promptIdContext.js'; import { debugLogger } from './debugLogger.js'; import { LRUCache } from 'mnemonist'; @@ -39,336 +25,34 @@ function getPromptId(): string { const MAX_CACHE_SIZE = 50; -// Cache for ensureCorrectEdit results -const editCorrectionCache = new LRUCache( - MAX_CACHE_SIZE, -); - // Cache for ensureCorrectFileContent results const fileContentCorrectionCache = new LRUCache(MAX_CACHE_SIZE); -/** - * Defines the structure of the parameters within CorrectedEditResult - */ -interface CorrectedEditParams { - file_path: string; - old_string: string; - new_string: string; -} - -/** - * Defines the result structure for ensureCorrectEdit. - */ -export interface CorrectedEditResult { - params: CorrectedEditParams; - occurrences: number; -} - -/** - * Extracts the timestamp from the .id value, which is in format - * -- - * @param fcnId the ID value of a functionCall or functionResponse object - * @returns -1 if the timestamp could not be extracted, else the timestamp (as a number) - */ -function getTimestampFromFunctionId(fcnId: string): number { - const idParts = fcnId.split('-'); - if (idParts.length > 2) { - const timestamp = parseInt(idParts[1], 10); - if (!isNaN(timestamp)) { - return timestamp; - } - } - return -1; -} - -/** - * Will look through the gemini client history and determine when the most recent - * edit to a target file occurred. If no edit happened, it will return -1 - * @param filePath the path to the file - * @param client the geminiClient, so that we can get the history - * @returns a DateTime (as a number) of when the last edit occurred, or -1 if no edit was found. - */ -async function findLastEditTimestamp( - filePath: string, - client: GeminiClient, -): Promise { - const history = client.getHistory() ?? []; - - // Tools that may reference the file path in their FunctionResponse `output`. - const toolsInResp = new Set([ - WRITE_FILE_TOOL_NAME, - EDIT_TOOL_NAME, - READ_MANY_FILES_TOOL_NAME, - GREP_TOOL_NAME, - ]); - // Tools that may reference the file path in their FunctionCall `args`. - const toolsInCall = new Set([...toolsInResp, READ_FILE_TOOL_NAME]); - - // Iterate backwards to find the most recent relevant action. - for (const entry of history.slice().reverse()) { - if (!entry.parts) continue; - - for (const part of entry.parts) { - let id: string | undefined; - let content: unknown; - - // Check for a relevant FunctionCall with the file path in its arguments. - if ( - isFunctionCall(entry) && - part.functionCall?.name && - toolsInCall.has(part.functionCall.name) - ) { - id = part.functionCall.id; - content = part.functionCall.args; - } - // Check for a relevant FunctionResponse with the file path in its output. - else if ( - isFunctionResponse(entry) && - part.functionResponse?.name && - toolsInResp.has(part.functionResponse.name) - ) { - const { response } = part.functionResponse; - if (response && !('error' in response) && 'output' in response) { - id = part.functionResponse.id; - content = response['output']; - } - } - - if (!id || content === undefined) continue; - - // Use the "blunt hammer" approach to find the file path in the content. - // Note that the tool response data is inconsistent in their formatting - // with successes and errors - so, we just check for the existence - // as the best guess to if error/failed occurred with the response. - const stringified = JSON.stringify(content); - if ( - !stringified.includes('Error') && // only applicable for functionResponse - !stringified.includes('Failed') && // only applicable for functionResponse - stringified.includes(filePath) - ) { - return getTimestampFromFunctionId(id); - } - } - } - - return -1; -} - -/** - * Attempts to correct edit parameters if the original old_string is not found. - * It tries unescaping, and then LLM-based correction. - * Results are cached to avoid redundant processing. - * - * @param currentContent The current content of the file. - * @param originalParams The original EditToolParams - * @param client The GeminiClient for LLM calls. - * @returns A promise resolving to an object containing the (potentially corrected) - * EditToolParams (as CorrectedEditParams) and the final occurrences count. - */ -export async function ensureCorrectEdit( - filePath: string, - currentContent: string, - originalParams: EditToolParams, // This is the EditToolParams from edit.ts, without 'corrected' - geminiClient: GeminiClient, - baseLlmClient: BaseLlmClient, - abortSignal: AbortSignal, - disableLLMCorrection: boolean, -): Promise { - const cacheKey = `${currentContent}---${originalParams.old_string}---${originalParams.new_string}`; - const cachedResult = editCorrectionCache.get(cacheKey); - if (cachedResult) { - return cachedResult; - } - - let finalNewString = originalParams.new_string; - const newStringPotentiallyEscaped = - unescapeStringForGeminiBug(originalParams.new_string) !== - originalParams.new_string; - - const allowMultiple = originalParams.allow_multiple ?? false; - - let finalOldString = originalParams.old_string; - let occurrences = countOccurrences(currentContent, finalOldString); - - const isOccurrencesMatch = allowMultiple - ? occurrences > 0 - : occurrences === 1; - - if (isOccurrencesMatch) { - if (newStringPotentiallyEscaped && !disableLLMCorrection) { - finalNewString = await correctNewStringEscaping( - baseLlmClient, - finalOldString, - originalParams.new_string, - abortSignal, - ); - } - } else if (occurrences > 1 && !allowMultiple) { - // If user doesn't allow multiple but found multiple, return as-is (will fail validation later) - const result: CorrectedEditResult = { - params: { ...originalParams }, - occurrences, - }; - editCorrectionCache.set(cacheKey, result); - return result; - } else { - // occurrences is 0 or some other unexpected state initially - const unescapedOldStringAttempt = unescapeStringForGeminiBug( - originalParams.old_string, - ); - occurrences = countOccurrences(currentContent, unescapedOldStringAttempt); - - const isUnescapedOccurrencesMatch = allowMultiple - ? occurrences > 0 - : occurrences === 1; - - if (isUnescapedOccurrencesMatch) { - finalOldString = unescapedOldStringAttempt; - if (newStringPotentiallyEscaped && !disableLLMCorrection) { - finalNewString = await correctNewString( - baseLlmClient, - originalParams.old_string, // original old - unescapedOldStringAttempt, // corrected old - originalParams.new_string, // original new (which is potentially escaped) - abortSignal, - ); - } - } else if (occurrences === 0) { - if (filePath) { - // In order to keep from clobbering edits made outside our system, - // let's check if there was a more recent edit to the file than what - // our system has done - const lastEditedByUsTime = await findLastEditTimestamp( - filePath, - geminiClient, - ); - - // Add a 1-second buffer to account for timing inaccuracies. If the file - // was modified more than a second after the last edit tool was run, we - // can assume it was modified by something else. - if (lastEditedByUsTime > 0) { - const stats = fs.statSync(filePath); - const diff = stats.mtimeMs - lastEditedByUsTime; - if (diff > 2000) { - // Hard coded for 2 seconds - // This file was edited sooner - const result: CorrectedEditResult = { - params: { ...originalParams }, - occurrences: 0, // Explicitly 0 as LLM failed - }; - editCorrectionCache.set(cacheKey, result); - return result; - } - } - } - - if (disableLLMCorrection) { - const result: CorrectedEditResult = { - params: { ...originalParams }, - occurrences: 0, - }; - editCorrectionCache.set(cacheKey, result); - return result; - } - - const llmCorrectedOldString = await correctOldStringMismatch( - baseLlmClient, - currentContent, - unescapedOldStringAttempt, - abortSignal, - ); - const llmOldOccurrences = countOccurrences( - currentContent, - llmCorrectedOldString, - ); - - const isLlmOccurrencesMatch = allowMultiple - ? llmOldOccurrences > 0 - : llmOldOccurrences === 1; - - if (isLlmOccurrencesMatch) { - finalOldString = llmCorrectedOldString; - occurrences = llmOldOccurrences; - - if (newStringPotentiallyEscaped) { - const baseNewStringForLLMCorrection = unescapeStringForGeminiBug( - originalParams.new_string, - ); - finalNewString = await correctNewString( - baseLlmClient, - originalParams.old_string, // original old - llmCorrectedOldString, // corrected old - baseNewStringForLLMCorrection, // base new for correction - abortSignal, - ); - } - } else { - // LLM correction also failed for old_string - const result: CorrectedEditResult = { - params: { ...originalParams }, - occurrences: 0, // Explicitly 0 as LLM failed - }; - editCorrectionCache.set(cacheKey, result); - return result; - } - } else { - // Unescaping old_string resulted in > 1 occurrence but not allowMultiple - const result: CorrectedEditResult = { - params: { ...originalParams }, - occurrences, // This will be > 1 - }; - editCorrectionCache.set(cacheKey, result); - return result; - } - } - - const { targetString, pair } = trimPairIfPossible( - finalOldString, - finalNewString, - currentContent, - allowMultiple, - ); - finalOldString = targetString; - finalNewString = pair; - - // Final result construction - const result: CorrectedEditResult = { - params: { - file_path: originalParams.file_path, - old_string: finalOldString, - new_string: finalNewString, - }, - occurrences: countOccurrences(currentContent, finalOldString), // Recalculate occurrences with the final old_string - }; - editCorrectionCache.set(cacheKey, result); - return result; -} - export async function ensureCorrectFileContent( content: string, baseLlmClient: BaseLlmClient, abortSignal: AbortSignal, disableLLMCorrection: boolean = true, + aggressiveUnescape: boolean = false, ): Promise { const cachedResult = fileContentCorrectionCache.get(content); if (cachedResult) { return cachedResult; } - const contentPotentiallyEscaped = - unescapeStringForGeminiBug(content) !== content; - if (!contentPotentiallyEscaped) { + const unescapedContent = unescapeStringForGeminiBug(content); + if (unescapedContent === content) { fileContentCorrectionCache.set(content, content); return content; } if (disableLLMCorrection) { - // If we can't use LLM, we should at least use the unescaped content - // as it's likely better than the original if it was detected as potentially escaped. - // unescapeStringForGeminiBug is a heuristic, not an LLM call. - const unescaped = unescapeStringForGeminiBug(content); - fileContentCorrectionCache.set(content, unescaped); - return unescaped; + if (aggressiveUnescape) { + fileContentCorrectionCache.set(content, unescapedContent); + return unescapedContent; + } + fileContentCorrectionCache.set(content, content); + return content; } const correctedContent = await correctStringEscaping( @@ -380,242 +64,6 @@ export async function ensureCorrectFileContent( return correctedContent; } -// Define the expected JSON schema for the LLM response for old_string correction -const OLD_STRING_CORRECTION_SCHEMA: Record = { - type: 'object', - properties: { - corrected_target_snippet: { - type: 'string', - description: - 'The corrected version of the target snippet that exactly and uniquely matches a segment within the provided file content.', - }, - }, - required: ['corrected_target_snippet'], -}; - -export async function correctOldStringMismatch( - baseLlmClient: BaseLlmClient, - fileContent: string, - problematicSnippet: string, - abortSignal: AbortSignal, -): Promise { - const prompt = ` -Context: A process needs to find an exact literal, unique match for a specific text snippet within a file's content. The provided snippet failed to match exactly. This is most likely because it has been overly escaped. - -Task: Analyze the provided file content and the problematic target snippet. Identify the segment in the file content that the snippet was *most likely* intended to match. Output the *exact*, literal text of that segment from the file content. Focus *only* on removing extra escape characters and correcting formatting, whitespace, or minor differences to achieve a PERFECT literal match. The output must be the exact literal text as it appears in the file. - -Problematic target snippet: -\`\`\` -${problematicSnippet} -\`\`\` - -File Content: -\`\`\` -${fileContent} -\`\`\` - -For example, if the problematic target snippet was "\\\\\\nconst greeting = \`Hello \\\\\`\${name}\\\\\`\`;" and the file content had content that looked like "\nconst greeting = \`Hello ${'\\`'}\${name}${'\\`'}\`;", then corrected_target_snippet should likely be "\nconst greeting = \`Hello ${'\\`'}\${name}${'\\`'}\`;" to fix the incorrect escaping to match the original file content. -If the differences are only in whitespace or formatting, apply similar whitespace/formatting changes to the corrected_target_snippet. - -Return ONLY the corrected target snippet in the specified JSON format with the key 'corrected_target_snippet'. If no clear, unique match can be found, return an empty string for 'corrected_target_snippet'. -`.trim(); - - const contents: Content[] = [{ role: 'user', parts: [{ text: prompt }] }]; - - try { - const result = await baseLlmClient.generateJson({ - modelConfigKey: { model: 'edit-corrector' }, - contents, - schema: OLD_STRING_CORRECTION_SCHEMA, - abortSignal, - systemInstruction: CODE_CORRECTION_SYSTEM_PROMPT, - promptId: getPromptId(), - role: LlmRole.UTILITY_EDIT_CORRECTOR, - }); - - if ( - result && - typeof result['corrected_target_snippet'] === 'string' && - result['corrected_target_snippet'].length > 0 - ) { - return result['corrected_target_snippet']; - } else { - return problematicSnippet; - } - } catch (error) { - if (abortSignal.aborted) { - throw error; - } - - debugLogger.warn( - 'Error during LLM call for old string snippet correction:', - error, - ); - - return problematicSnippet; - } -} - -// Define the expected JSON schema for the new_string correction LLM response -const NEW_STRING_CORRECTION_SCHEMA: Record = { - type: 'object', - properties: { - corrected_new_string: { - type: 'string', - description: - 'The original_new_string adjusted to be a suitable replacement for the corrected_old_string, while maintaining the original intent of the change.', - }, - }, - required: ['corrected_new_string'], -}; - -/** - * Adjusts the new_string to align with a corrected old_string, maintaining the original intent. - */ -export async function correctNewString( - baseLlmClient: BaseLlmClient, - originalOldString: string, - correctedOldString: string, - originalNewString: string, - abortSignal: AbortSignal, -): Promise { - if (originalOldString === correctedOldString) { - return originalNewString; - } - - const prompt = ` -Context: A text replacement operation was planned. The original text to be replaced (original_old_string) was slightly different from the actual text in the file (corrected_old_string). The original_old_string has now been corrected to match the file content. -We now need to adjust the replacement text (original_new_string) so that it makes sense as a replacement for the corrected_old_string, while preserving the original intent of the change. - -original_old_string (what was initially intended to be found): -\`\`\` -${originalOldString} -\`\`\` - -corrected_old_string (what was actually found in the file and will be replaced): -\`\`\` -${correctedOldString} -\`\`\` - -original_new_string (what was intended to replace original_old_string): -\`\`\` -${originalNewString} -\`\`\` - -Task: Based on the differences between original_old_string and corrected_old_string, and the content of original_new_string, generate a corrected_new_string. This corrected_new_string should be what original_new_string would have been if it was designed to replace corrected_old_string directly, while maintaining the spirit of the original transformation. - -For example, if original_old_string was "\\\\\\nconst greeting = \`Hello \\\\\`\${name}\\\\\`\`;" and corrected_old_string is "\nconst greeting = \`Hello ${'\\`'}\${name}${'\\`'}\`;", and original_new_string was "\\\\\\nconst greeting = \`Hello \\\\\`\${name} \${lastName}\\\\\`\`;", then corrected_new_string should likely be "\nconst greeting = \`Hello ${'\\`'}\${name} \${lastName}${'\\`'}\`;" to fix the incorrect escaping. -If the differences are only in whitespace or formatting, apply similar whitespace/formatting changes to the corrected_new_string. - -Return ONLY the corrected string in the specified JSON format with the key 'corrected_new_string'. If no adjustment is deemed necessary or possible, return the original_new_string. - `.trim(); - - const contents: Content[] = [{ role: 'user', parts: [{ text: prompt }] }]; - - try { - const result = await baseLlmClient.generateJson({ - modelConfigKey: { model: 'edit-corrector' }, - contents, - schema: NEW_STRING_CORRECTION_SCHEMA, - abortSignal, - systemInstruction: CODE_CORRECTION_SYSTEM_PROMPT, - promptId: getPromptId(), - role: LlmRole.UTILITY_EDIT_CORRECTOR, - }); - - if ( - result && - typeof result['corrected_new_string'] === 'string' && - result['corrected_new_string'].length > 0 - ) { - return result['corrected_new_string']; - } else { - return originalNewString; - } - } catch (error) { - if (abortSignal.aborted) { - throw error; - } - - debugLogger.warn('Error during LLM call for new_string correction:', error); - return originalNewString; - } -} - -const CORRECT_NEW_STRING_ESCAPING_SCHEMA: Record = { - type: 'object', - properties: { - corrected_new_string_escaping: { - type: 'string', - description: - 'The new_string with corrected escaping, ensuring it is a proper replacement for the old_string, especially considering potential over-escaping issues from previous LLM generations.', - }, - }, - required: ['corrected_new_string_escaping'], -}; - -export async function correctNewStringEscaping( - baseLlmClient: BaseLlmClient, - oldString: string, - potentiallyProblematicNewString: string, - abortSignal: AbortSignal, -): Promise { - const prompt = ` -Context: A text replacement operation is planned. The text to be replaced (old_string) has been correctly identified in the file. However, the replacement text (new_string) might have been improperly escaped by a previous LLM generation (e.g. too many backslashes for newlines like \\n instead of \n, or unnecessarily quotes like \\"Hello\\" instead of "Hello"). - -old_string (this is the exact text that will be replaced): -\`\`\` -${oldString} -\`\`\` - -potentially_problematic_new_string (this is the text that should replace old_string, but MIGHT have bad escaping, or might be entirely correct): -\`\`\` -${potentiallyProblematicNewString} -\`\`\` - -Task: Analyze the potentially_problematic_new_string. If it's syntactically invalid due to incorrect escaping (e.g., "\n", "\t", "\\", "\\'", "\\""), correct the invalid syntax. The goal is to ensure the new_string, when inserted into the code, will be a valid and correctly interpreted. - -For example, if old_string is "foo" and potentially_problematic_new_string is "bar\\nbaz", the corrected_new_string_escaping should be "bar\nbaz". -If potentially_problematic_new_string is console.log(\\"Hello World\\"), it should be console.log("Hello World"). - -Return ONLY the corrected string in the specified JSON format with the key 'corrected_new_string_escaping'. If no escaping correction is needed, return the original potentially_problematic_new_string. - `.trim(); - - const contents: Content[] = [{ role: 'user', parts: [{ text: prompt }] }]; - - try { - const result = await baseLlmClient.generateJson({ - modelConfigKey: { model: 'edit-corrector' }, - contents, - schema: CORRECT_NEW_STRING_ESCAPING_SCHEMA, - abortSignal, - systemInstruction: CODE_CORRECTION_SYSTEM_PROMPT, - promptId: getPromptId(), - role: LlmRole.UTILITY_EDIT_CORRECTOR, - }); - - if ( - result && - typeof result['corrected_new_string_escaping'] === 'string' && - result['corrected_new_string_escaping'].length > 0 - ) { - return result['corrected_new_string_escaping']; - } else { - return potentiallyProblematicNewString; - } - } catch (error) { - if (abortSignal.aborted) { - throw error; - } - - debugLogger.warn( - 'Error during LLM call for new_string escaping correction:', - error, - ); - return potentiallyProblematicNewString; - } -} - const CORRECT_STRING_ESCAPING_SCHEMA: Record = { type: 'object', properties: { @@ -684,46 +132,6 @@ Return ONLY the corrected string in the specified JSON format with the key 'corr } } -function trimPreservingTrailingNewline(str: string): string { - const trimmedEnd = str.trimEnd(); - const trailingWhitespace = str.slice(trimmedEnd.length); - const trailingNewlines = trailingWhitespace.replace(/[^\r\n]/g, ''); - return str.trim() + trailingNewlines; -} - -function trimPairIfPossible( - target: string, - trimIfTargetTrims: string, - currentContent: string, - allowMultiple: boolean, -) { - const trimmedTargetString = trimPreservingTrailingNewline(target); - if (target.length !== trimmedTargetString.length) { - const trimmedTargetOccurrences = countOccurrences( - currentContent, - trimmedTargetString, - ); - - const isMatch = allowMultiple - ? trimmedTargetOccurrences > 0 - : trimmedTargetOccurrences === 1; - - if (isMatch) { - const trimmedReactiveString = - trimPreservingTrailingNewline(trimIfTargetTrims); - return { - targetString: trimmedTargetString, - pair: trimmedReactiveString, - }; - } - } - - return { - targetString: target, - pair: trimIfTargetTrims, - }; -} - /** * Unescapes a string that might have been overly escaped by an LLM. */ @@ -770,23 +178,6 @@ export function unescapeStringForGeminiBug(inputString: string): string { ); } -/** - * Counts occurrences of a substring in a string - */ -export function countOccurrences(str: string, substr: string): number { - if (substr === '') { - return 0; - } - let count = 0; - let pos = str.indexOf(substr); - while (pos !== -1) { - count++; - pos = str.indexOf(substr, pos + substr.length); // Start search after the current match - } - return count; -} - export function resetEditCorrectorCaches_TEST_ONLY() { - editCorrectionCache.clear(); fileContentCorrectionCache.clear(); } From b1befee8fbd29414e143c962d39d1ea2b46aba0b Mon Sep 17 00:00:00 2001 From: heaventourist Date: Thu, 26 Feb 2026 18:26:16 -0800 Subject: [PATCH 18/41] feat(telemetry) Instrument traces with more attributes and make them available to OTEL users (#20237) Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> Co-authored-by: Jerop Kipruto Co-authored-by: MD. MOHIBUR RAHMAN <35300157+mrpmohiburrahman@users.noreply.github.com> Co-authored-by: Jeffrey Ying Co-authored-by: Bryan Morgan Co-authored-by: joshualitt Co-authored-by: Dev Randalpura Co-authored-by: Google Admin Co-authored-by: Ben Knutson --- docs/cli/telemetry.md | 50 ++++- docs/local-development.md | 88 ++++---- integration-tests/acp-telemetry.test.ts | 1 - .../cli/src/ui/hooks/useGeminiStream.test.tsx | 65 ++++++ packages/cli/src/ui/hooks/useGeminiStream.ts | 7 +- .../core/src/agents/subagent-tool.test.ts | 39 ++++ packages/core/src/agents/subagent-tool.ts | 22 +- .../core/src/core/coreToolScheduler.test.ts | 45 ++++- packages/core/src/core/coreToolScheduler.ts | 3 +- .../src/core/loggingContentGenerator.test.ts | 170 +++++++++++++++- .../core/src/core/loggingContentGenerator.ts | 68 +++++-- packages/core/src/scheduler/scheduler.test.ts | 30 ++- packages/core/src/scheduler/scheduler.ts | 13 +- .../src/scheduler/scheduler_parallel.test.ts | 30 ++- .../core/src/scheduler/tool-executor.test.ts | 78 +++++++- packages/core/src/scheduler/tool-executor.ts | 52 +++-- packages/core/src/telemetry/constants.ts | 31 +++ packages/core/src/telemetry/index.ts | 1 + packages/core/src/telemetry/trace.test.ts | 188 ++++++++++++++++++ packages/core/src/telemetry/trace.ts | 49 +++-- packages/sdk/src/agent.integration.test.ts | 9 +- 21 files changed, 903 insertions(+), 136 deletions(-) create mode 100644 packages/core/src/telemetry/trace.test.ts diff --git a/docs/cli/telemetry.md b/docs/cli/telemetry.md index b04d2e0173..28eaed8bd5 100644 --- a/docs/cli/telemetry.md +++ b/docs/cli/telemetry.md @@ -176,11 +176,12 @@ Sends telemetry directly to Google Cloud services. No collector needed. } ``` 2. Run Gemini CLI and send prompts. -3. View logs and metrics: +3. View logs, metrics, and traces: - Open the Google Cloud Console in your browser after sending prompts: - - Logs: https://console.cloud.google.com/logs/ - - Metrics: https://console.cloud.google.com/monitoring/metrics-explorer - - Traces: https://console.cloud.google.com/traces/list + - Logs (Logs Explorer): https://console.cloud.google.com/logs/ + - Metrics (Metrics Explorer): + https://console.cloud.google.com/monitoring/metrics-explorer + - Traces (Trace Explorer): https://console.cloud.google.com/traces/list ### Collector-based export (advanced) @@ -208,11 +209,12 @@ forward data to Google Cloud. - Save collector logs to `~/.gemini/tmp//otel/collector-gcp.log` - Stop collector on exit (e.g. `Ctrl+C`) 3. Run Gemini CLI and send prompts. -4. View logs and metrics: +4. View logs, metrics, and traces: - Open the Google Cloud Console in your browser after sending prompts: - - Logs: https://console.cloud.google.com/logs/ - - Metrics: https://console.cloud.google.com/monitoring/metrics-explorer - - Traces: https://console.cloud.google.com/traces/list + - Logs (Logs Explorer): https://console.cloud.google.com/logs/ + - Metrics (Metrics Explorer): + https://console.cloud.google.com/monitoring/metrics-explorer + - Traces (Trace Explorer): https://console.cloud.google.com/traces/list - Open `~/.gemini/tmp//otel/collector-gcp.log` to view local collector logs. @@ -270,10 +272,10 @@ For local development and debugging, you can capture telemetry data locally: 3. View traces at http://localhost:16686 and logs/metrics in the collector log file. -## Logs and metrics +## Logs, metrics, and traces -The following section describes the structure of logs and metrics generated for -Gemini CLI. +The following section describes the structure of logs, metrics, and traces +generated for Gemini CLI. The `session.id`, `installation.id`, `active_approval_mode`, and `user.email` (available only when authenticated with a Google account) are included as common @@ -824,6 +826,32 @@ Optional performance monitoring for startup, CPU/memory, and phase timing. - `current_value` (number) - `baseline_value` (number) +### Traces + +Traces offer a granular, "under-the-hood" view of every agent and backend +operation. By providing a high-fidelity execution map, they enable precise +debugging of complex tool interactions and deep performance optimization. Each +trace captures rich, consistent metadata via custom span attributes: + +- `gen_ai.operation.name` (string): The high-level operation kind (e.g. + "tool_call", "llm_call"). +- `gen_ai.agent.name` (string): The service agent identifier ("gemini-cli"). +- `gen_ai.agent.description` (string): The service agent description. +- `gen_ai.input.messages` (string): Input messages or metadata specific to the + operation. +- `gen_ai.output.messages` (string): Output messages or metadata generated from + the operation. +- `gen_ai.request.model` (string): The request model name. +- `gen_ai.response.model` (string): The response model name. +- `gen_ai.system_instructions` (json string): The system instructions. +- `gen_ai.prompt.name` (string): The prompt name. +- `gen_ai.tool.name` (string): The executed tool's name. +- `gen_ai.tool.call_id` (string): The generated specific ID of the tool call. +- `gen_ai.tool.description` (string): The executed tool's description. +- `gen_ai.tool.definitions` (json string): The executed tool's description. +- `gen_ai.conversation.id` (string): The current CLI session ID. +- Additional user-defined Custom Attributes passed via the span's configuration. + #### GenAI semantic convention The following metrics comply with [OpenTelemetry GenAI semantic conventions] for diff --git a/docs/local-development.md b/docs/local-development.md index e194307eae..f710e3b00e 100644 --- a/docs/local-development.md +++ b/docs/local-development.md @@ -1,23 +1,21 @@ # Local development guide This guide provides instructions for setting up and using local development -features, such as development tracing. +features, such as tracing. -## Development tracing +## Tracing -Development traces (dev traces) are OpenTelemetry (OTel) traces that help you -debug your code by instrumenting interesting events like model calls, tool -scheduler, tool calls, etc. +Traces are OpenTelemetry (OTel) records that help you debug your code by +instrumenting key events like model calls, tool scheduler operations, and tool +calls. -Dev traces are verbose and are specifically meant for understanding agent -behavior and debugging issues. They are disabled by default. +Traces provide deep visibility into agent behavior and are invaluable for +debugging complex issues. They are captured automatically when telemetry is +enabled. -To enable dev traces, set the `GEMINI_DEV_TRACING=true` environment variable -when running Gemini CLI. +### Viewing traces -### Viewing dev traces - -You can view dev traces using either Jaeger or the Genkit Developer UI. +You can view traces using either Jaeger or the Genkit Developer UI. #### Using Genkit @@ -37,13 +35,12 @@ Genkit provides a web-based UI for viewing traces and other telemetry data. Genkit Developer UI: http://localhost:4000 ``` -2. **Run Gemini CLI with dev tracing:** +2. **Run Gemini CLI:** - In a separate terminal, run your Gemini CLI command with the - `GEMINI_DEV_TRACING` environment variable: + In a separate terminal, run your Gemini CLI command: ```bash - GEMINI_DEV_TRACING=true gemini + gemini ``` 3. **View the traces:** @@ -53,7 +50,7 @@ Genkit provides a web-based UI for viewing traces and other telemetry data. #### Using Jaeger -You can view dev traces in the Jaeger UI. To get started, follow these steps: +You can view traces in the Jaeger UI. To get started, follow these steps: 1. **Start the telemetry collector:** @@ -67,13 +64,12 @@ You can view dev traces in the Jaeger UI. To get started, follow these steps: This command also configures your workspace for local telemetry and provides a link to the Jaeger UI (usually `http://localhost:16686`). -2. **Run Gemini CLI with dev tracing:** +2. **Run Gemini CLI:** - In a separate terminal, run your Gemini CLI command with the - `GEMINI_DEV_TRACING` environment variable: + In a separate terminal, run your Gemini CLI command: ```bash - GEMINI_DEV_TRACING=true gemini + gemini ``` 3. **View the traces:** @@ -84,10 +80,10 @@ You can view dev traces in the Jaeger UI. To get started, follow these steps: For more detailed information on telemetry, see the [telemetry documentation](./cli/telemetry.md). -### Instrumenting code with dev traces +### Instrumenting code with traces -You can add dev traces to your own code for more detailed instrumentation. This -is useful for debugging and understanding the flow of execution. +You can add traces to your own code for more detailed instrumentation. This is +useful for debugging and understanding the flow of execution. Use the `runInDevTraceSpan` function to wrap any section of code in a trace span. @@ -96,29 +92,39 @@ Here is a basic example: ```typescript import { runInDevTraceSpan } from '@google/gemini-cli-core'; +import { GeminiCliOperation } from '@google/gemini-cli-core/lib/telemetry/constants.js'; -await runInDevTraceSpan({ name: 'my-custom-span' }, async ({ metadata }) => { - // The `metadata` object allows you to record the input and output of the - // operation as well as other attributes. - metadata.input = { key: 'value' }; - // Set custom attributes. - metadata.attributes['gen_ai.request.model'] = 'gemini-4.0-mega'; +await runInDevTraceSpan( + { + operation: GeminiCliOperation.ToolCall, + attributes: { + [GEN_AI_AGENT_NAME]: 'gemini-cli', + }, + }, + async ({ metadata }) => { + // The `metadata` object allows you to record the input and output of the + // operation as well as other attributes. + metadata.input = { key: 'value' }; + // Set custom attributes. + metadata.attributes['custom.attribute'] = 'custom.value'; - // Your code to be traced goes here - try { - const output = await somethingRisky(); - metadata.output = output; - return output; - } catch (e) { - metadata.error = e; - throw e; - } -}); + // Your code to be traced goes here + try { + const output = await somethingRisky(); + metadata.output = output; + return output; + } catch (e) { + metadata.error = e; + throw e; + } + }, +); ``` In this example: -- `name`: The name of the span, which will be displayed in the trace. +- `operation`: The operation type of the span, represented by the + `GeminiCliOperation` enum. - `metadata.input`: (Optional) An object containing the input data for the traced operation. - `metadata.output`: (Optional) An object containing the output data from the diff --git a/integration-tests/acp-telemetry.test.ts b/integration-tests/acp-telemetry.test.ts index 970239de9e..393156df3e 100644 --- a/integration-tests/acp-telemetry.test.ts +++ b/integration-tests/acp-telemetry.test.ts @@ -72,7 +72,6 @@ describe('ACP telemetry', () => { GEMINI_TELEMETRY_ENABLED: 'true', GEMINI_TELEMETRY_TARGET: 'local', GEMINI_TELEMETRY_OUTFILE: telemetryPath, - // GEMINI_DEV_TRACING not set: fake responses aren't instrumented for spans }, }, ); diff --git a/packages/cli/src/ui/hooks/useGeminiStream.test.tsx b/packages/cli/src/ui/hooks/useGeminiStream.test.tsx index 82bda12caa..eb7e029b95 100644 --- a/packages/cli/src/ui/hooks/useGeminiStream.test.tsx +++ b/packages/cli/src/ui/hooks/useGeminiStream.test.tsx @@ -25,6 +25,7 @@ import type { Config, EditorType, AnyToolInvocation, + SpanMetadata, } from '@google/gemini-cli-core'; import { CoreToolCallStatus, @@ -39,6 +40,7 @@ import { coreEvents, CoreEvent, MCPDiscoveryState, + GeminiCliOperation, getPlanModeExitMessage, } from '@google/gemini-cli-core'; import type { Part, PartListUnion } from '@google/genai'; @@ -101,6 +103,19 @@ const MockValidationRequiredError = vi.hoisted( }, ); +const mockRunInDevTraceSpan = vi.hoisted(() => + vi.fn(async (opts, fn) => { + const metadata: SpanMetadata = { + name: opts.operation, + attributes: opts.attributes || {}, + }; + return await fn({ + metadata, + endSpan: vi.fn(), + }); + }), +); + vi.mock('@google/gemini-cli-core', async (importOriginal) => { const actualCoreModule = (await importOriginal()) as any; return { @@ -113,6 +128,7 @@ vi.mock('@google/gemini-cli-core', async (importOriginal) => { tokenLimit: vi.fn().mockReturnValue(100), // Mock tokenLimit recordToolCallInteractions: vi.fn().mockResolvedValue(undefined), getCodeAssistServer: vi.fn().mockReturnValue(undefined), + runInDevTraceSpan: mockRunInDevTraceSpan, }; }); @@ -794,6 +810,23 @@ describe('useGeminiStream', () => { item.text.includes('Got it. Focusing on tests only.'), ), ).toBe(true); + + expect(mockRunInDevTraceSpan).toHaveBeenCalledWith( + expect.objectContaining({ + operation: GeminiCliOperation.SystemPrompt, + }), + expect.any(Function), + ); + + const spanArgs = mockRunInDevTraceSpan.mock.calls[0]; + const fn = spanArgs[1]; + const metadata = { attributes: {} }; + await act(async () => { + await fn({ metadata, endSpan: vi.fn() }); + }); + expect(metadata).toMatchObject({ + input: sentParts, + }); }); it('should handle all tool calls being cancelled', async () => { @@ -2452,6 +2485,11 @@ describe('useGeminiStream', () => { // This is the core fix validation: Rationale comes before tools are even scheduled (awaited) expect(rationaleIndex).toBeLessThan(scheduleIndex); expect(rationaleIndex).toBeLessThan(toolGroupIndex); + + // Ensure all state updates from recursive submitQuery are settled + await waitFor(() => { + expect(result.current.streamingState).toBe(StreamingState.Idle); + }); }); it('should process @include commands, adding user turn after processing to prevent race conditions', async () => { @@ -3554,4 +3592,31 @@ describe('useGeminiStream', () => { expect(result.current.pendingHistoryItems.length).toEqual(0); }); }); + + it('should trace UserPrompt telemetry on submitQuery', async () => { + const { result } = renderTestHook(); + + mockSendMessageStream.mockReturnValue( + (async function* () { + yield { type: ServerGeminiEventType.Content, value: 'Response' }; + })(), + ); + + await act(async () => { + await result.current.submitQuery('telemetry test query'); + }); + + const userPromptCall = mockRunInDevTraceSpan.mock.calls.find( + (call) => + call[0].operation === GeminiCliOperation.UserPrompt || + call[0].operation === 'UserPrompt', + ); + expect(userPromptCall).toBeDefined(); + + const spanMetadata = {} as SpanMetadata; + await act(async () => { + await userPromptCall![1]({ metadata: spanMetadata, endSpan: vi.fn() }); + }); + expect(spanMetadata.input).toBe('telemetry test query'); + }); }); diff --git a/packages/cli/src/ui/hooks/useGeminiStream.ts b/packages/cli/src/ui/hooks/useGeminiStream.ts index 34380e78ab..2a63d618e9 100644 --- a/packages/cli/src/ui/hooks/useGeminiStream.ts +++ b/packages/cli/src/ui/hooks/useGeminiStream.ts @@ -36,6 +36,7 @@ import { CoreToolCallStatus, buildUserSteeringHintPrompt, generateSteeringAckMessage, + GeminiCliOperation, getPlanModeExitMessage, } from '@google/gemini-cli-core'; import type { @@ -1262,7 +1263,11 @@ export const useGeminiStream = ( prompt_id?: string, ) => runInDevTraceSpan( - { name: 'submitQuery' }, + { + operation: options?.isContinuation + ? GeminiCliOperation.SystemPrompt + : GeminiCliOperation.UserPrompt, + }, async ({ metadata: spanMetadata }) => { spanMetadata.input = query; diff --git a/packages/core/src/agents/subagent-tool.test.ts b/packages/core/src/agents/subagent-tool.test.ts index 40db4822a2..74f0051351 100644 --- a/packages/core/src/agents/subagent-tool.test.ts +++ b/packages/core/src/agents/subagent-tool.test.ts @@ -23,10 +23,30 @@ import type { ToolInvocation, ToolResult, } from '../tools/tools.js'; +import { + GeminiCliOperation, + GEN_AI_AGENT_DESCRIPTION, + GEN_AI_AGENT_NAME, +} from '../telemetry/constants.js'; import type { ToolRegistry } from 'src/tools/tool-registry.js'; vi.mock('./subagent-tool-wrapper.js'); +// Mock runInDevTraceSpan +const runInDevTraceSpan = vi.hoisted(() => + vi.fn(async (opts, fn) => { + const metadata = { attributes: opts.attributes || {} }; + return fn({ + metadata, + endSpan: vi.fn(), + }); + }), +); + +vi.mock('../telemetry/trace.js', () => ({ + runInDevTraceSpan, +})); + const MockSubagentToolWrapper = vi.mocked(SubagentToolWrapper); const testDefinition: LocalAgentDefinition = { @@ -155,6 +175,25 @@ describe('SubAgentInvocation', () => { abortSignal, updateOutput, ); + + expect(runInDevTraceSpan).toHaveBeenCalledWith( + expect.objectContaining({ + operation: GeminiCliOperation.AgentCall, + attributes: expect.objectContaining({ + [GEN_AI_AGENT_NAME]: testDefinition.name, + [GEN_AI_AGENT_DESCRIPTION]: testDefinition.description, + }), + }), + expect.any(Function), + ); + + // Verify metadata was set on the span + const spanCallback = vi.mocked(runInDevTraceSpan).mock.calls[0][1]; + const mockMetadata = { input: undefined, output: undefined }; + const mockSpan = { metadata: mockMetadata, endSpan: vi.fn() }; + await spanCallback(mockSpan as Parameters[0]); + expect(mockMetadata.input).toBe(params); + expect(mockMetadata.output).toBe(mockResult); }); describe('withUserHints', () => { diff --git a/packages/core/src/agents/subagent-tool.ts b/packages/core/src/agents/subagent-tool.ts index 8584ae97f1..3ecff4e969 100644 --- a/packages/core/src/agents/subagent-tool.ts +++ b/packages/core/src/agents/subagent-tool.ts @@ -20,6 +20,12 @@ import type { AgentDefinition, AgentInputs } from './types.js'; import { SubagentToolWrapper } from './subagent-tool-wrapper.js'; import { SchemaValidator } from '../utils/schemaValidator.js'; import { formatUserHintsForModel } from '../utils/fastAckHelper.js'; +import { runInDevTraceSpan } from '../telemetry/trace.js'; +import { + GeminiCliOperation, + GEN_AI_AGENT_DESCRIPTION, + GEN_AI_AGENT_NAME, +} from '../telemetry/constants.js'; export class SubagentTool extends BaseDeclarativeTool { constructor( @@ -167,7 +173,21 @@ class SubAgentInvocation extends BaseToolInvocation { this.withUserHints(this.params), ); - return invocation.execute(signal, updateOutput); + return runInDevTraceSpan( + { + operation: GeminiCliOperation.AgentCall, + attributes: { + [GEN_AI_AGENT_NAME]: this.definition.name, + [GEN_AI_AGENT_DESCRIPTION]: this.definition.description, + }, + }, + async ({ metadata }) => { + metadata.input = this.params; + const result = await invocation.execute(signal, updateOutput); + metadata.output = result; + return result; + }, + ); } private withUserHints(agentArgs: AgentInputs): AgentInputs { diff --git a/packages/core/src/core/coreToolScheduler.test.ts b/packages/core/src/core/coreToolScheduler.test.ts index 844d930ea2..6bdad0dddb 100644 --- a/packages/core/src/core/coreToolScheduler.test.ts +++ b/packages/core/src/core/coreToolScheduler.test.ts @@ -14,16 +14,14 @@ import { type ErroredToolCall, CoreToolCallStatus, } from '../scheduler/types.js'; -import type { - ToolCallConfirmationDetails, - ToolConfirmationPayload, - ToolInvocation, - ToolResult, - Config, - ToolRegistry, - MessageBus, -} from '../index.js'; import { + type ToolCallConfirmationDetails, + type ToolConfirmationPayload, + type ToolInvocation, + type ToolResult, + type Config, + type ToolRegistry, + type MessageBus, DEFAULT_TRUNCATE_TOOL_OUTPUT_THRESHOLD, BaseDeclarativeTool, BaseToolInvocation, @@ -33,6 +31,8 @@ import { HookSystem, PolicyDecision, ToolErrorType, + DiscoveredMCPTool, + GeminiCliOperation, } from '../index.js'; import { createMockMessageBus } from '../test-utils/mock-message-bus.js'; import { @@ -43,12 +43,22 @@ import { import * as modifiableToolModule from '../tools/modifiable-tool.js'; import { DEFAULT_GEMINI_MODEL } from '../config/models.js'; import type { PolicyEngine } from '../policy/policy-engine.js'; -import { DiscoveredMCPTool } from '../tools/mcp-tool.js'; +import { runInDevTraceSpan, type SpanMetadata } from '../telemetry/trace.js'; vi.mock('fs/promises', () => ({ writeFile: vi.fn(), })); +vi.mock('../telemetry/trace.js', () => ({ + runInDevTraceSpan: vi.fn(async (opts, fn) => { + const metadata = { attributes: opts.attributes || {} }; + return fn({ + metadata, + endSpan: vi.fn(), + }); + }), +})); + class TestApprovalTool extends BaseDeclarativeTool<{ id: string }, ToolResult> { static readonly Name = 'testApprovalTool'; @@ -362,6 +372,21 @@ describe('CoreToolScheduler', () => { const completedCalls = onAllToolCallsComplete.mock .calls[0][0] as ToolCall[]; expect(completedCalls[0].status).toBe(CoreToolCallStatus.Cancelled); + + expect(runInDevTraceSpan).toHaveBeenCalledWith( + expect.objectContaining({ + operation: GeminiCliOperation.ScheduleToolCalls, + }), + expect.any(Function), + ); + + const spanArgs = vi.mocked(runInDevTraceSpan).mock.calls[0]; + const fn = spanArgs[1]; + const metadata: SpanMetadata = { name: '', attributes: {} }; + await fn({ metadata, endSpan: vi.fn() }); + expect(metadata).toMatchObject({ + input: [request], + }); }); it('should cancel all tools when cancelAll is called', async () => { diff --git a/packages/core/src/core/coreToolScheduler.ts b/packages/core/src/core/coreToolScheduler.ts index c2381e4b43..f8d1b260fd 100644 --- a/packages/core/src/core/coreToolScheduler.ts +++ b/packages/core/src/core/coreToolScheduler.ts @@ -46,6 +46,7 @@ import { CoreToolCallStatus } from '../scheduler/types.js'; import { ToolExecutor } from '../scheduler/tool-executor.js'; import { DiscoveredMCPTool } from '../tools/mcp-tool.js'; import { getPolicyDenialError } from '../scheduler/policy.js'; +import { GeminiCliOperation } from '../telemetry/constants.js'; export type { ToolCall, @@ -424,7 +425,7 @@ export class CoreToolScheduler { signal: AbortSignal, ): Promise { return runInDevTraceSpan( - { name: 'schedule' }, + { operation: GeminiCliOperation.ScheduleToolCalls }, async ({ metadata: spanMetadata }) => { spanMetadata.input = request; if (this.isRunning() || this.isScheduling) { diff --git a/packages/core/src/core/loggingContentGenerator.test.ts b/packages/core/src/core/loggingContentGenerator.test.ts index 01a7162f1d..fc9103491c 100644 --- a/packages/core/src/core/loggingContentGenerator.test.ts +++ b/packages/core/src/core/loggingContentGenerator.test.ts @@ -15,7 +15,13 @@ vi.mock('../telemetry/loggers.js', () => ({ })); const runInDevTraceSpan = vi.hoisted(() => - vi.fn(async (meta, fn) => fn({ metadata: {}, endSpan: vi.fn() })), + vi.fn(async (opts, fn) => { + const metadata = { attributes: opts.attributes || {} }; + return fn({ + metadata, + endSpan: vi.fn(), + }); + }), ); vi.mock('../telemetry/trace.js', () => ({ @@ -38,6 +44,16 @@ import type { Config } from '../config/config.js'; import { UserTierId } from '../code_assist/types.js'; import { ApiRequestEvent, LlmRole } from '../telemetry/types.js'; import { FatalAuthenticationError } from '../utils/errors.js'; +import { + GeminiCliOperation, + GEN_AI_PROMPT_NAME, + GEN_AI_REQUEST_MODEL, + GEN_AI_SYSTEM_INSTRUCTIONS, + GEN_AI_TOOL_DEFINITIONS, + GEN_AI_USAGE_INPUT_TOKENS, + GEN_AI_USAGE_OUTPUT_TOKENS, +} from '../telemetry/constants.js'; +import { type SpanMetadata } from '../telemetry/trace.js'; describe('LoggingContentGenerator', () => { let wrapped: ContentGenerator; @@ -73,10 +89,20 @@ describe('LoggingContentGenerator', () => { const req = { contents: [{ role: 'user', parts: [{ text: 'hello' }] }], model: 'gemini-pro', + config: { + systemInstruction: { parts: [{ text: 'system instructions' }] }, + tools: [{ functionDeclarations: [{ name: 'myTool' }] }], + }, }; const userPromptId = 'prompt-123'; const response: GenerateContentResponse = { - candidates: [], + candidates: [ + { + content: { + parts: [{ text: 'hello' }], + }, + }, + ], usageMetadata: { promptTokenCount: 1, candidatesTokenCount: 2, @@ -113,12 +139,47 @@ describe('LoggingContentGenerator', () => { ); const responseEvent = vi.mocked(logApiResponse).mock.calls[0][1]; expect(responseEvent.duration_ms).toBe(1000); + + expect(runInDevTraceSpan).toHaveBeenCalledWith( + expect.objectContaining({ + operation: GeminiCliOperation.LLMCall, + attributes: expect.objectContaining({ + [GEN_AI_REQUEST_MODEL]: 'gemini-pro', + [GEN_AI_PROMPT_NAME]: userPromptId, + [GEN_AI_SYSTEM_INSTRUCTIONS]: JSON.stringify( + req.config.systemInstruction, + ), + [GEN_AI_TOOL_DEFINITIONS]: JSON.stringify(req.config.tools), + }), + }), + expect.any(Function), + ); + + const spanArgs = vi.mocked(runInDevTraceSpan).mock.calls[0]; + const fn = spanArgs[1]; + const metadata: SpanMetadata = { name: '', attributes: {} }; + await fn({ metadata, endSpan: vi.fn() }); + + expect(metadata).toMatchObject({ + input: req.contents, + output: response.candidates?.[0]?.content, + attributes: { + [GEN_AI_USAGE_INPUT_TOKENS]: 1, + [GEN_AI_USAGE_OUTPUT_TOKENS]: 2, + }, + }); }); it('should log error on failure', async () => { const req = { contents: [{ role: 'user', parts: [{ text: 'hello' }] }], model: 'gemini-pro', + config: { + systemInstruction: { + parts: [{ text: 'stream system instructions' }], + }, + tools: [{ functionDeclarations: [{ name: 'streamTool' }] }], + }, }; const userPromptId = 'prompt-123'; const error = new Error('test error'); @@ -126,7 +187,7 @@ describe('LoggingContentGenerator', () => { const startTime = new Date('2025-01-01T00:00:00.000Z'); vi.setSystemTime(startTime); - const promise = loggingContentGenerator.generateContent( + let promise = loggingContentGenerator.generateContent( req, userPromptId, LlmRole.MAIN, @@ -142,6 +203,32 @@ describe('LoggingContentGenerator', () => { ); const errorEvent = vi.mocked(logApiError).mock.calls[0][1]; expect(errorEvent.duration_ms).toBe(1000); + + expect(runInDevTraceSpan).toHaveBeenCalledWith( + expect.objectContaining({ + operation: GeminiCliOperation.LLMCall, + attributes: expect.objectContaining({ + [GEN_AI_REQUEST_MODEL]: 'gemini-pro', + [GEN_AI_PROMPT_NAME]: userPromptId, + [GEN_AI_SYSTEM_INSTRUCTIONS]: JSON.stringify( + req.config.systemInstruction, + ), + [GEN_AI_TOOL_DEFINITIONS]: JSON.stringify(req.config.tools), + }), + }), + expect.any(Function), + ); + + const spanArgs = vi.mocked(runInDevTraceSpan).mock.calls[0]; + const fn = spanArgs[1]; + const metadata: SpanMetadata = { name: '', attributes: {} }; + promise = fn({ metadata, endSpan: vi.fn() }); + + await expect(promise).rejects.toThrow(error); + + expect(metadata).toMatchObject({ + error, + }); }); describe('error type extraction', () => { @@ -163,10 +250,22 @@ describe('LoggingContentGenerator', () => { const req = { contents: [{ role: 'user', parts: [{ text: 'hello' }] }], model: 'gemini-pro', + config: { + systemInstruction: { + parts: [{ text: 'stream system instructions' }], + }, + tools: [{ functionDeclarations: [{ name: 'streamTool' }] }], + }, }; const userPromptId = 'prompt-123'; const response = { - candidates: [], + candidates: [ + { + content: { + parts: [{ text: 'hello' }], + }, + }, + ], usageMetadata: { promptTokenCount: 1, candidatesTokenCount: 2, @@ -186,7 +285,7 @@ describe('LoggingContentGenerator', () => { vi.setSystemTime(startTime); - const stream = await loggingContentGenerator.generateContentStream( + let stream = await loggingContentGenerator.generateContentStream( req, userPromptId, @@ -211,6 +310,44 @@ describe('LoggingContentGenerator', () => { ); const responseEvent = vi.mocked(logApiResponse).mock.calls[0][1]; expect(responseEvent.duration_ms).toBe(1000); + + expect(runInDevTraceSpan).toHaveBeenCalledWith( + expect.objectContaining({ + operation: GeminiCliOperation.LLMCall, + noAutoEnd: true, + attributes: expect.objectContaining({ + [GEN_AI_REQUEST_MODEL]: 'gemini-pro', + [GEN_AI_PROMPT_NAME]: userPromptId, + [GEN_AI_SYSTEM_INSTRUCTIONS]: JSON.stringify( + req.config.systemInstruction, + ), + [GEN_AI_TOOL_DEFINITIONS]: JSON.stringify(req.config.tools), + }), + }), + expect.any(Function), + ); + + const spanArgs = vi.mocked(runInDevTraceSpan).mock.calls[0]; + const fn = spanArgs[1]; + const metadata: SpanMetadata = { name: '', attributes: {} }; + + vi.mocked(wrapped.generateContentStream).mockResolvedValue( + createAsyncGenerator(), + ); + stream = await fn({ metadata, endSpan: vi.fn() }); + + for await (const _ of stream) { + // consume stream + } + + expect(metadata).toMatchObject({ + input: req.contents, + output: [response.candidates?.[0]?.content], + attributes: { + [GEN_AI_USAGE_INPUT_TOKENS]: 1, + [GEN_AI_USAGE_OUTPUT_TOKENS]: 2, + }, + }); }); it('should log error on failure', async () => { @@ -328,6 +465,9 @@ describe('LoggingContentGenerator', () => { const req = { contents: [{ role: 'user', parts: [] }], model: 'gemini-pro', + config: { + mimeType: 'text/plain', + }, }; const response: EmbedContentResponse = { embeddings: [{ values: [] }] }; vi.mocked(wrapped.embedContent).mockResolvedValue(response); @@ -336,6 +476,26 @@ describe('LoggingContentGenerator', () => { expect(wrapped.embedContent).toHaveBeenCalledWith(req); expect(result).toBe(response); + + expect(runInDevTraceSpan).toHaveBeenCalledWith( + expect.objectContaining({ + operation: GeminiCliOperation.LLMCall, + attributes: expect.objectContaining({ + [GEN_AI_REQUEST_MODEL]: req.model, + }), + }), + expect.any(Function), + ); + + const spanArgs = vi.mocked(runInDevTraceSpan).mock.calls[0]; + const fn = spanArgs[1]; + const metadata: SpanMetadata = { name: '', attributes: {} }; + await fn({ metadata, endSpan: vi.fn() }); + + expect(metadata).toMatchObject({ + input: req.contents, + output: response, + }); }); }); diff --git a/packages/core/src/core/loggingContentGenerator.ts b/packages/core/src/core/loggingContentGenerator.ts index 013600a0b9..c1ec554d6d 100644 --- a/packages/core/src/core/loggingContentGenerator.ts +++ b/packages/core/src/core/loggingContentGenerator.ts @@ -37,6 +37,16 @@ import { isStructuredError } from '../utils/quotaErrorDetection.js'; import { runInDevTraceSpan, type SpanMetadata } from '../telemetry/trace.js'; import { debugLogger } from '../utils/debugLogger.js'; import { getErrorType } from '../utils/errors.js'; +import { + GeminiCliOperation, + GEN_AI_PROMPT_NAME, + GEN_AI_REQUEST_MODEL, + GEN_AI_SYSTEM_INSTRUCTIONS, + GEN_AI_TOOL_DEFINITIONS, + GEN_AI_USAGE_INPUT_TOKENS, + GEN_AI_USAGE_OUTPUT_TOKENS, +} from '../telemetry/constants.js'; +import { safeJsonStringify } from '../utils/safeJsonStringify.js'; import { isMcpToolName } from '../tools/mcp-tool.js'; import { estimateTokenCountSync } from '../utils/tokenCalculation.js'; @@ -303,10 +313,18 @@ export class LoggingContentGenerator implements ContentGenerator { ): Promise { return runInDevTraceSpan( { - name: 'generateContent', + operation: GeminiCliOperation.LLMCall, + attributes: { + [GEN_AI_REQUEST_MODEL]: req.model, + [GEN_AI_PROMPT_NAME]: userPromptId, + [GEN_AI_SYSTEM_INSTRUCTIONS]: safeJsonStringify( + req.config?.systemInstruction ?? [], + ), + [GEN_AI_TOOL_DEFINITIONS]: safeJsonStringify(req.config?.tools ?? []), + }, }, async ({ metadata: spanMetadata }) => { - spanMetadata.input = { request: req, userPromptId, model: req.model }; + spanMetadata.input = req.contents; const startTime = Date.now(); const contents: Content[] = toContents(req.contents); @@ -326,10 +344,11 @@ export class LoggingContentGenerator implements ContentGenerator { userPromptId, role, ); - spanMetadata.output = { - response, - usageMetadata: response.usageMetadata, - }; + spanMetadata.output = response.candidates?.[0]?.content ?? null; + spanMetadata.attributes[GEN_AI_USAGE_INPUT_TOKENS] = + response.usageMetadata?.promptTokenCount ?? 0; + spanMetadata.attributes[GEN_AI_USAGE_OUTPUT_TOKENS] = + response.usageMetadata?.candidatesTokenCount ?? 0; const durationMs = Date.now() - startTime; this._logApiResponse( contents, @@ -355,6 +374,7 @@ export class LoggingContentGenerator implements ContentGenerator { .catch((e) => debugLogger.debug('quota refresh failed', e)); return response; } catch (error) { + spanMetadata.error = error; const durationMs = Date.now() - startTime; this._logApiError( durationMs, @@ -379,11 +399,20 @@ export class LoggingContentGenerator implements ContentGenerator { ): Promise> { return runInDevTraceSpan( { - name: 'generateContentStream', + operation: GeminiCliOperation.LLMCall, noAutoEnd: true, + attributes: { + [GEN_AI_REQUEST_MODEL]: req.model, + [GEN_AI_PROMPT_NAME]: userPromptId, + [GEN_AI_SYSTEM_INSTRUCTIONS]: safeJsonStringify( + req.config?.systemInstruction ?? [], + ), + [GEN_AI_TOOL_DEFINITIONS]: safeJsonStringify(req.config?.tools ?? []), + }, }, async ({ metadata: spanMetadata, endSpan }) => { - spanMetadata.input = { request: req, userPromptId, model: req.model }; + spanMetadata.input = req.contents; + const startTime = Date.now(); const serverDetails = this._getEndpointUrl( req, @@ -488,13 +517,15 @@ export class LoggingContentGenerator implements ContentGenerator { this.config .refreshUserQuotaIfStale() .catch((e) => debugLogger.debug('quota refresh failed', e)); - spanMetadata.output = { - streamChunks: responses.map((r) => ({ - content: r.candidates?.[0]?.content ?? null, - })), - usageMetadata: lastUsageMetadata, - durationMs, - }; + spanMetadata.output = responses.map( + (response) => response.candidates?.[0]?.content ?? null, + ); + if (lastUsageMetadata) { + spanMetadata.attributes[GEN_AI_USAGE_INPUT_TOKENS] = + lastUsageMetadata.promptTokenCount ?? 0; + spanMetadata.attributes[GEN_AI_USAGE_OUTPUT_TOKENS] = + lastUsageMetadata.candidatesTokenCount ?? 0; + } } catch (error) { spanMetadata.error = error; const durationMs = Date.now() - startTime; @@ -523,10 +554,13 @@ export class LoggingContentGenerator implements ContentGenerator { ): Promise { return runInDevTraceSpan( { - name: 'embedContent', + operation: GeminiCliOperation.LLMCall, + attributes: { + [GEN_AI_REQUEST_MODEL]: req.model, + }, }, async ({ metadata: spanMetadata }) => { - spanMetadata.input = { request: req }; + spanMetadata.input = req.contents; const output = await this.wrapped.embedContent(req); spanMetadata.output = output; return output; diff --git a/packages/core/src/scheduler/scheduler.test.ts b/packages/core/src/scheduler/scheduler.test.ts index fd5c56221b..b2c1adade0 100644 --- a/packages/core/src/scheduler/scheduler.test.ts +++ b/packages/core/src/scheduler/scheduler.test.ts @@ -20,10 +20,18 @@ vi.mock('node:crypto', () => ({ randomUUID: vi.fn(), })); +const runInDevTraceSpan = vi.hoisted(() => + vi.fn(async (opts, fn) => { + const metadata = { attributes: opts.attributes || {} }; + return fn({ + metadata, + endSpan: vi.fn(), + }); + }), +); + vi.mock('../telemetry/trace.js', () => ({ - runInDevTraceSpan: vi.fn(async (_opts, fn) => - fn({ metadata: { input: {}, output: {} } }), - ), + runInDevTraceSpan, })); import { logToolCall } from '../telemetry/loggers.js'; @@ -81,6 +89,7 @@ import type { } from './types.js'; import { CoreToolCallStatus, ROOT_SCHEDULER_ID } from './types.js'; import { ToolErrorType } from '../tools/tool-error.js'; +import { GeminiCliOperation } from '../telemetry/constants.js'; import * as ToolUtils from '../utils/tool-utils.js'; import type { EditorType } from '../utils/editor.js'; import { @@ -366,6 +375,21 @@ describe('Scheduler (Orchestrator)', () => { }), ]), ); + + expect(runInDevTraceSpan).toHaveBeenCalledWith( + expect.objectContaining({ + operation: GeminiCliOperation.ScheduleToolCalls, + }), + expect.any(Function), + ); + + const spanArgs = vi.mocked(runInDevTraceSpan).mock.calls[0]; + const fn = spanArgs[1]; + const metadata = { attributes: {} }; + await fn({ metadata, endSpan: vi.fn() }); + expect(metadata).toMatchObject({ + input: [req1], + }); }); it('should set approvalMode to PLAN when config returns PLAN', async () => { diff --git a/packages/core/src/scheduler/scheduler.ts b/packages/core/src/scheduler/scheduler.ts index 44a16b7988..fa3b4526ad 100644 --- a/packages/core/src/scheduler/scheduler.ts +++ b/packages/core/src/scheduler/scheduler.ts @@ -46,6 +46,7 @@ import { CoreEvent, type McpProgressPayload, } from '../utils/events.js'; +import { GeminiCliOperation } from '../telemetry/constants.js'; interface SchedulerQueueItem { requests: ToolCallRequestInfo[]; @@ -186,16 +187,22 @@ export class Scheduler { signal: AbortSignal, ): Promise { return runInDevTraceSpan( - { name: 'schedule' }, + { operation: GeminiCliOperation.ScheduleToolCalls }, async ({ metadata: spanMetadata }) => { const requests = Array.isArray(request) ? request : [request]; + spanMetadata.input = requests; + let toolCallResponse: CompletedToolCall[] = []; + if (this.isProcessing || this.state.isActive) { - return this._enqueueRequest(requests, signal); + toolCallResponse = await this._enqueueRequest(requests, signal); + } else { + toolCallResponse = await this._startBatch(requests, signal); } - return this._startBatch(requests, signal); + spanMetadata.output = toolCallResponse; + return toolCallResponse; }, ); } diff --git a/packages/core/src/scheduler/scheduler_parallel.test.ts b/packages/core/src/scheduler/scheduler_parallel.test.ts index 824cdc4a16..9febf494c0 100644 --- a/packages/core/src/scheduler/scheduler_parallel.test.ts +++ b/packages/core/src/scheduler/scheduler_parallel.test.ts @@ -20,10 +20,18 @@ vi.mock('node:crypto', () => ({ randomUUID: vi.fn(), })); +const runInDevTraceSpan = vi.hoisted(() => + vi.fn(async (opts, fn) => { + const metadata = { name: '', attributes: opts.attributes || {} }; + return fn({ + metadata, + endSpan: vi.fn(), + }); + }), +); + vi.mock('../telemetry/trace.js', () => ({ - runInDevTraceSpan: vi.fn(async (_opts, fn) => - fn({ metadata: { input: {}, output: {} } }), - ), + runInDevTraceSpan, })); vi.mock('../telemetry/loggers.js', () => ({ logToolCall: vi.fn(), @@ -71,6 +79,7 @@ import type { ToolCall, } from './types.js'; import { ROOT_SCHEDULER_ID } from './types.js'; +import { GeminiCliOperation } from '../telemetry/constants.js'; import type { EditorType } from '../utils/editor.js'; describe('Scheduler Parallel Execution', () => { @@ -306,6 +315,21 @@ describe('Scheduler Parallel Execution', () => { ); expect(executionLog).toContain('end-call-3'); + + expect(runInDevTraceSpan).toHaveBeenCalledWith( + expect.objectContaining({ + operation: GeminiCliOperation.ScheduleToolCalls, + }), + expect.any(Function), + ); + + const spanArgs = vi.mocked(runInDevTraceSpan).mock.calls[0]; + const fn = spanArgs[1]; + const metadata = { name: '', attributes: {} }; + await fn({ metadata, endSpan: vi.fn() }); + expect(metadata).toMatchObject({ + input: [req1, req2, req3], + }); }); it('should execute non-read-only tools sequentially', async () => { diff --git a/packages/core/src/scheduler/tool-executor.test.ts b/packages/core/src/scheduler/tool-executor.test.ts index 29db841aac..0d77204f4e 100644 --- a/packages/core/src/scheduler/tool-executor.test.ts +++ b/packages/core/src/scheduler/tool-executor.test.ts @@ -6,8 +6,11 @@ import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest'; import { ToolExecutor } from './tool-executor.js'; -import type { Config, AnyToolInvocation } from '../index.js'; -import type { ToolResult } from '../tools/tools.js'; +import { + type Config, + type ToolResult, + type AnyToolInvocation, +} from '../index.js'; import { makeFakeConfig } from '../test-utils/config.js'; import { MockTool } from '../test-utils/mock-tool.js'; import type { ScheduledToolCall } from './types.js'; @@ -17,6 +20,12 @@ import * as fileUtils from '../utils/fileUtils.js'; import * as coreToolHookTriggers from '../core/coreToolHookTriggers.js'; import { ShellToolInvocation } from '../tools/shell.js'; import { createMockMessageBus } from '../test-utils/mock-message-bus.js'; +import { + GeminiCliOperation, + GEN_AI_TOOL_CALL_ID, + GEN_AI_TOOL_DESCRIPTION, + GEN_AI_TOOL_NAME, +} from '../telemetry/constants.js'; // Mock file utils vi.mock('../utils/fileUtils.js', () => ({ @@ -28,6 +37,24 @@ vi.mock('../utils/fileUtils.js', () => ({ vi.mock('../core/coreToolHookTriggers.js', () => ({ executeToolWithHooks: vi.fn(), })); +// Mock runInDevTraceSpan +const runInDevTraceSpan = vi.hoisted(() => + vi.fn(async (opts, fn) => { + const metadata = { attributes: opts.attributes || {} }; + return fn({ + metadata, + endSpan: vi.fn(), + }); + }), +); + +vi.mock('../index.js', async (importOriginal) => { + const actual = await importOriginal>(); + return { + ...actual, + runInDevTraceSpan, + }; +}); describe('ToolExecutor', () => { let config: Config; @@ -57,6 +84,7 @@ describe('ToolExecutor', () => { it('should execute a tool successfully', async () => { const mockTool = new MockTool({ name: 'testTool', + description: 'Mock description', execute: async () => ({ llmContent: 'Tool output', returnDisplay: 'Tool output', @@ -97,11 +125,37 @@ describe('ToolExecutor', () => { ?.response as Record; expect(response).toEqual({ output: 'Tool output' }); } + + expect(runInDevTraceSpan).toHaveBeenCalledWith( + expect.objectContaining({ + operation: GeminiCliOperation.ToolCall, + attributes: expect.objectContaining({ + [GEN_AI_TOOL_NAME]: 'testTool', + [GEN_AI_TOOL_CALL_ID]: 'call-1', + [GEN_AI_TOOL_DESCRIPTION]: 'Mock description', + }), + }), + expect.any(Function), + ); + + const spanArgs = vi.mocked(runInDevTraceSpan).mock.calls[0]; + const fn = spanArgs[1]; + const metadata = { attributes: {} }; + await fn({ metadata, endSpan: vi.fn() }); + expect(metadata).toMatchObject({ + input: scheduledCall.request, + output: { + ...result, + durationMs: expect.any(Number), + endTime: expect.any(Number), + }, + }); }); it('should handle execution errors', async () => { const mockTool = new MockTool({ name: 'failTool', + description: 'Mock description', }); const invocation = mockTool.build({}); @@ -134,6 +188,26 @@ describe('ToolExecutor', () => { if (result.status === CoreToolCallStatus.Error) { expect(result.response.error?.message).toBe('Tool Failed'); } + + expect(runInDevTraceSpan).toHaveBeenCalledWith( + expect.objectContaining({ + operation: GeminiCliOperation.ToolCall, + attributes: expect.objectContaining({ + [GEN_AI_TOOL_NAME]: 'failTool', + [GEN_AI_TOOL_CALL_ID]: 'call-2', + [GEN_AI_TOOL_DESCRIPTION]: 'Mock description', + }), + }), + expect.any(Function), + ); + + const spanArgs = vi.mocked(runInDevTraceSpan).mock.calls[0]; + const fn = spanArgs[1]; + const metadata = { attributes: {} }; + await fn({ metadata, endSpan: vi.fn() }); + expect(metadata).toMatchObject({ + error: new Error('Tool Failed'), + }); }); it('should return cancelled result when signal is aborted', async () => { diff --git a/packages/core/src/scheduler/tool-executor.ts b/packages/core/src/scheduler/tool-executor.ts index 9ae00b24a7..7903266fe1 100644 --- a/packages/core/src/scheduler/tool-executor.ts +++ b/packages/core/src/scheduler/tool-executor.ts @@ -34,6 +34,12 @@ import type { CancelledToolCall, } from './types.js'; import { CoreToolCallStatus } from './types.js'; +import { + GeminiCliOperation, + GEN_AI_TOOL_CALL_ID, + GEN_AI_TOOL_DESCRIPTION, + GEN_AI_TOOL_NAME, +} from '../telemetry/constants.js'; export interface ToolExecutionContext { call: ToolCall; @@ -70,11 +76,17 @@ export class ToolExecutor { return runInDevTraceSpan( { - name: tool.name, - attributes: { type: 'tool-call' }, + operation: GeminiCliOperation.ToolCall, + attributes: { + [GEN_AI_TOOL_NAME]: toolName, + [GEN_AI_TOOL_CALL_ID]: callId, + [GEN_AI_TOOL_DESCRIPTION]: tool.description, + }, }, async ({ metadata: spanMetadata }) => { - spanMetadata.input = { request }; + spanMetadata.input = request; + + let completedToolCall: CompletedToolCall; try { let promise: Promise; @@ -116,21 +128,23 @@ export class ToolExecutor { } const toolResult: ToolResult = await promise; - spanMetadata.output = toolResult; if (signal.aborted) { - return this.createCancelledResult( + completedToolCall = this.createCancelledResult( call, 'User cancelled tool execution.', ); } else if (toolResult.error === undefined) { - return await this.createSuccessResult(call, toolResult); + completedToolCall = await this.createSuccessResult( + call, + toolResult, + ); } else { const displayText = typeof toolResult.returnDisplay === 'string' ? toolResult.returnDisplay : undefined; - return this.createErrorResult( + completedToolCall = this.createErrorResult( call, new Error(toolResult.error.message), toolResult.error.type, @@ -141,21 +155,25 @@ export class ToolExecutor { } catch (executionError: unknown) { spanMetadata.error = executionError; if (signal.aborted) { - return this.createCancelledResult( + completedToolCall = this.createCancelledResult( call, 'User cancelled tool execution.', ); + } else { + const error = + executionError instanceof Error + ? executionError + : new Error(String(executionError)); + completedToolCall = this.createErrorResult( + call, + error, + ToolErrorType.UNHANDLED_EXCEPTION, + ); } - const error = - executionError instanceof Error - ? executionError - : new Error(String(executionError)); - return this.createErrorResult( - call, - error, - ToolErrorType.UNHANDLED_EXCEPTION, - ); } + + spanMetadata.output = completedToolCall; + return completedToolCall; }, ); } diff --git a/packages/core/src/telemetry/constants.ts b/packages/core/src/telemetry/constants.ts index 1d2b7751a1..40eb8d1a41 100644 --- a/packages/core/src/telemetry/constants.ts +++ b/packages/core/src/telemetry/constants.ts @@ -5,3 +5,34 @@ */ export const SERVICE_NAME = 'gemini-cli'; +export const SERVICE_DESCRIPTION = + 'Gemini CLI is an open-source AI agent that brings the power of Gemini directly into your terminal. It is designed to be a terminal-first, extensible, and powerful tool for developers, engineers, SREs, and beyond.'; + +// Gemini CLI specific semantic conventions +// https://opentelemetry.io/docs/specs/semconv/registry/attributes/gen-ai/#genai-attributes +export const GEN_AI_OPERATION_NAME = 'gen_ai.operation.name'; +export const GEN_AI_AGENT_NAME = 'gen_ai.agent.name'; +export const GEN_AI_AGENT_DESCRIPTION = 'gen_ai.agent.description'; +export const GEN_AI_INPUT_MESSAGES = 'gen_ai.input.messages'; +export const GEN_AI_OUTPUT_MESSAGES = 'gen_ai.output.messages'; +export const GEN_AI_REQUEST_MODEL = 'gen_ai.request.model'; +export const GEN_AI_RESPONSE_MODEL = 'gen_ai.response.model'; +export const GEN_AI_PROMPT_NAME = 'gen_ai.prompt.name'; +export const GEN_AI_TOOL_NAME = 'gen_ai.tool.name'; +export const GEN_AI_TOOL_CALL_ID = 'gen_ai.tool.call_id'; +export const GEN_AI_TOOL_DESCRIPTION = 'gen_ai.tool.description'; +export const GEN_AI_USAGE_INPUT_TOKENS = 'gen_ai.usage.input_tokens'; +export const GEN_AI_USAGE_OUTPUT_TOKENS = 'gen_ai.usage.output_tokens'; +export const GEN_AI_SYSTEM_INSTRUCTIONS = 'gen_ai.system_instructions'; +export const GEN_AI_TOOL_DEFINITIONS = 'gen_ai.tool.definitions'; +export const GEN_AI_CONVERSATION_ID = 'gen_ai.conversation.id'; + +// Gemini CLI specific operations +export enum GeminiCliOperation { + ToolCall = 'tool_call', + LLMCall = 'llm_call', + UserPrompt = 'user_prompt', + SystemPrompt = 'system_prompt', + AgentCall = 'agent_call', + ScheduleToolCalls = 'schedule_tool_calls', +} diff --git a/packages/core/src/telemetry/index.ts b/packages/core/src/telemetry/index.ts index 0523ae709d..84064ee7e6 100644 --- a/packages/core/src/telemetry/index.ts +++ b/packages/core/src/telemetry/index.ts @@ -148,3 +148,4 @@ export { } from './metrics.js'; export { runInDevTraceSpan, type SpanMetadata } from './trace.js'; export { startupProfiler, StartupProfiler } from './startupProfiler.js'; +export * from './constants.js'; diff --git a/packages/core/src/telemetry/trace.test.ts b/packages/core/src/telemetry/trace.test.ts new file mode 100644 index 0000000000..4d9aa0baa8 --- /dev/null +++ b/packages/core/src/telemetry/trace.test.ts @@ -0,0 +1,188 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest'; +import { trace, SpanStatusCode, diag, type Tracer } from '@opentelemetry/api'; +import { runInDevTraceSpan } from './trace.js'; +import { + GeminiCliOperation, + GEN_AI_CONVERSATION_ID, + GEN_AI_AGENT_DESCRIPTION, + GEN_AI_AGENT_NAME, + GEN_AI_INPUT_MESSAGES, + GEN_AI_OPERATION_NAME, + GEN_AI_OUTPUT_MESSAGES, + SERVICE_DESCRIPTION, + SERVICE_NAME, +} from './constants.js'; + +vi.mock('@opentelemetry/api', async (importOriginal) => { + const original = await importOriginal(); + return { + ...original, + trace: { + getTracer: vi.fn(), + }, + diag: { + error: vi.fn(), + }, + }; +}); + +vi.mock('../utils/session.js', () => ({ + sessionId: 'test-session-id', +})); + +describe('runInDevTraceSpan', () => { + const mockSpan = { + setAttribute: vi.fn(), + setStatus: vi.fn(), + recordException: vi.fn(), + end: vi.fn(), + }; + + const mockTracer = { + startActiveSpan: vi.fn((name, options, callback) => callback(mockSpan)), + } as unknown as Tracer; + + beforeEach(() => { + vi.clearAllMocks(); + vi.mocked(trace.getTracer).mockReturnValue(mockTracer); + }); + + afterEach(() => { + vi.unstubAllEnvs(); + }); + + it('should start an active span', async () => { + const fn = vi.fn(async () => 'result'); + + const result = await runInDevTraceSpan( + { operation: GeminiCliOperation.LLMCall }, + fn, + ); + + expect(result).toBe('result'); + expect(trace.getTracer).toHaveBeenCalled(); + expect(mockTracer.startActiveSpan).toHaveBeenCalledWith( + GeminiCliOperation.LLMCall, + {}, + expect.any(Function), + ); + }); + + it('should set default attributes on the span metadata', async () => { + await runInDevTraceSpan( + { operation: GeminiCliOperation.LLMCall }, + async ({ metadata }) => { + expect(metadata.attributes[GEN_AI_OPERATION_NAME]).toBe( + GeminiCliOperation.LLMCall, + ); + expect(metadata.attributes[GEN_AI_AGENT_NAME]).toBe(SERVICE_NAME); + expect(metadata.attributes[GEN_AI_AGENT_DESCRIPTION]).toBe( + SERVICE_DESCRIPTION, + ); + expect(metadata.attributes[GEN_AI_CONVERSATION_ID]).toBe( + 'test-session-id', + ); + }, + ); + }); + + it('should set span attributes from metadata on completion', async () => { + await runInDevTraceSpan( + { operation: GeminiCliOperation.LLMCall }, + async ({ metadata }) => { + metadata.input = { query: 'hello' }; + metadata.output = { response: 'world' }; + metadata.attributes['custom.attr'] = 'value'; + }, + ); + + expect(mockSpan.setAttribute).toHaveBeenCalledWith( + GEN_AI_INPUT_MESSAGES, + JSON.stringify({ query: 'hello' }), + ); + expect(mockSpan.setAttribute).toHaveBeenCalledWith( + GEN_AI_OUTPUT_MESSAGES, + JSON.stringify({ response: 'world' }), + ); + expect(mockSpan.setAttribute).toHaveBeenCalledWith('custom.attr', 'value'); + expect(mockSpan.setStatus).toHaveBeenCalledWith({ + code: SpanStatusCode.OK, + }); + expect(mockSpan.end).toHaveBeenCalled(); + }); + + it('should handle errors in the wrapped function', async () => { + const error = new Error('test error'); + await expect( + runInDevTraceSpan({ operation: GeminiCliOperation.LLMCall }, async () => { + throw error; + }), + ).rejects.toThrow(error); + + expect(mockSpan.setStatus).toHaveBeenCalledWith({ + code: SpanStatusCode.ERROR, + message: 'test error', + }); + expect(mockSpan.recordException).toHaveBeenCalledWith(error); + expect(mockSpan.end).toHaveBeenCalled(); + }); + + it('should respect noAutoEnd option', async () => { + let capturedEndSpan: () => void = () => {}; + const result = await runInDevTraceSpan( + { operation: GeminiCliOperation.LLMCall, noAutoEnd: true }, + async ({ endSpan }) => { + capturedEndSpan = endSpan; + return 'streaming'; + }, + ); + + expect(result).toBe('streaming'); + expect(mockSpan.end).not.toHaveBeenCalled(); + + capturedEndSpan(); + expect(mockSpan.end).toHaveBeenCalled(); + }); + + it('should automatically end span on error even if noAutoEnd is true', async () => { + const error = new Error('streaming error'); + await expect( + runInDevTraceSpan( + { operation: GeminiCliOperation.LLMCall, noAutoEnd: true }, + async () => { + throw error; + }, + ), + ).rejects.toThrow(error); + + expect(mockSpan.end).toHaveBeenCalled(); + }); + + it('should handle exceptions in endSpan gracefully', async () => { + mockSpan.setAttribute.mockImplementation(() => { + throw new Error('attribute error'); + }); + + await runInDevTraceSpan( + { operation: GeminiCliOperation.LLMCall }, + async ({ metadata }) => { + metadata.input = 'trigger error'; + }, + ); + + expect(diag.error).toHaveBeenCalled(); + expect(mockSpan.setStatus).toHaveBeenCalledWith( + expect.objectContaining({ + code: SpanStatusCode.ERROR, + message: expect.stringContaining('attribute error'), + }), + ); + expect(mockSpan.end).toHaveBeenCalled(); + }); +}); diff --git a/packages/core/src/telemetry/trace.ts b/packages/core/src/telemetry/trace.ts index d44625bf92..1f4676343a 100644 --- a/packages/core/src/telemetry/trace.ts +++ b/packages/core/src/telemetry/trace.ts @@ -12,6 +12,18 @@ import { type SpanOptions, } from '@opentelemetry/api'; import { safeJsonStringify } from '../utils/safeJsonStringify.js'; +import { + type GeminiCliOperation, + GEN_AI_AGENT_DESCRIPTION, + GEN_AI_AGENT_NAME, + GEN_AI_CONVERSATION_ID, + GEN_AI_INPUT_MESSAGES, + GEN_AI_OPERATION_NAME, + GEN_AI_OUTPUT_MESSAGES, + SERVICE_DESCRIPTION, + SERVICE_NAME, +} from './constants.js'; +import { sessionId } from '../utils/session.js'; const TRACER_NAME = 'gemini-cli'; const TRACER_VERSION = 'v1'; @@ -51,7 +63,7 @@ export interface SpanMetadata { * @returns The result of the function. */ export async function runInDevTraceSpan( - opts: SpanOptions & { name: string; noAutoEnd?: boolean }, + opts: SpanOptions & { operation: GeminiCliOperation; noAutoEnd?: boolean }, fn: ({ metadata, }: { @@ -59,33 +71,32 @@ export async function runInDevTraceSpan( endSpan: () => void; }) => Promise, ): Promise { - const { name: spanName, noAutoEnd, ...restOfSpanOpts } = opts; - if (process.env['GEMINI_DEV_TRACING'] !== 'true') { - // If GEMINI_DEV_TRACING env var not set, we do not trace. - return fn({ - metadata: { - name: spanName, - attributes: {}, - }, - endSpan: () => { - // noop - }, - }); - } + const { operation, noAutoEnd, ...restOfSpanOpts } = opts; const tracer = trace.getTracer(TRACER_NAME, TRACER_VERSION); - return tracer.startActiveSpan(opts.name, restOfSpanOpts, async (span) => { + return tracer.startActiveSpan(operation, restOfSpanOpts, async (span) => { const meta: SpanMetadata = { - name: spanName, - attributes: {}, + name: operation, + attributes: { + [GEN_AI_OPERATION_NAME]: operation, + [GEN_AI_AGENT_NAME]: SERVICE_NAME, + [GEN_AI_AGENT_DESCRIPTION]: SERVICE_DESCRIPTION, + [GEN_AI_CONVERSATION_ID]: sessionId, + }, }; const endSpan = () => { try { if (meta.input !== undefined) { - span.setAttribute('input-json', safeJsonStringify(meta.input)); + span.setAttribute( + GEN_AI_INPUT_MESSAGES, + safeJsonStringify(meta.input), + ); } if (meta.output !== undefined) { - span.setAttribute('output-json', safeJsonStringify(meta.output)); + span.setAttribute( + GEN_AI_OUTPUT_MESSAGES, + safeJsonStringify(meta.output), + ); } for (const [key, value] of Object.entries(meta.attributes)) { span.setAttribute(key, value); diff --git a/packages/sdk/src/agent.integration.test.ts b/packages/sdk/src/agent.integration.test.ts index 78229a81cc..bd98890c52 100644 --- a/packages/sdk/src/agent.integration.test.ts +++ b/packages/sdk/src/agent.integration.test.ts @@ -4,7 +4,7 @@ * SPDX-License-Identifier: Apache-2.0 */ -import { describe, it, expect } from 'vitest'; +import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest'; import { GeminiCliAgent } from './agent.js'; import * as path from 'node:path'; import { fileURLToPath } from 'node:url'; @@ -20,6 +20,13 @@ const getGoldenPath = (name: string) => path.resolve(__dirname, '../test-data', `${name}.json`); describe('GeminiCliAgent Integration', () => { + beforeEach(() => { + vi.stubEnv('GEMINI_API_KEY', 'test-api-key'); + }); + + afterEach(() => { + vi.unstubAllEnvs(); + }); it('handles static instructions', async () => { const goldenFile = getGoldenPath('agent-static-instructions'); From e17f927a69033f0f226a2a1f8541a04d2ffba8d2 Mon Sep 17 00:00:00 2001 From: christine betts Date: Thu, 26 Feb 2026 22:29:33 -0500 Subject: [PATCH 19/41] Add support for policy engine in extensions (#20049) Co-authored-by: Jerop Kipruto --- docs/extensions/reference.md | 36 +++++ docs/reference/policy-engine.md | 7 +- .../extensions/examples/policies/README.md | 41 ++++++ .../examples/policies/gemini-extension.json | 5 + .../examples/policies/policies/policies.toml | 28 ++++ packages/cli/src/config/extension-manager.ts | 39 ++++- packages/cli/src/config/extension.test.ts | 134 +++++++++++++++++- .../config/policy-engine.integration.test.ts | 24 ++-- packages/core/src/config/config.ts | 15 +- packages/core/src/policy/config.test.ts | 52 +++---- packages/core/src/policy/config.ts | 93 ++++++++++-- .../core/src/policy/policy-engine.test.ts | 105 ++++++++++++++ packages/core/src/policy/policy-engine.ts | 16 +++ packages/core/src/policy/toml-loader.test.ts | 30 ++-- packages/core/src/policy/toml-loader.ts | 13 +- .../core/src/policy/workspace-policy.test.ts | 20 +-- .../core/src/utils/extensionLoader.test.ts | 52 +++++++ packages/core/src/utils/extensionLoader.ts | 36 +++++ 18 files changed, 657 insertions(+), 89 deletions(-) create mode 100644 packages/cli/src/commands/extensions/examples/policies/README.md create mode 100644 packages/cli/src/commands/extensions/examples/policies/gemini-extension.json create mode 100644 packages/cli/src/commands/extensions/examples/policies/policies/policies.toml diff --git a/docs/extensions/reference.md b/docs/extensions/reference.md index d36df94d78..2c2b730126 100644 --- a/docs/extensions/reference.md +++ b/docs/extensions/reference.md @@ -227,6 +227,42 @@ skill definitions in a `skills/` directory. For example, Provide [sub-agents](../core/subagents.md) that users can delegate tasks to. Add agent definition files (`.md`) to an `agents/` directory in your extension root. +### Policy Engine + +Extensions can contribute policy rules and safety checkers to the Gemini CLI +[Policy Engine](../reference/policy-engine.md). These rules are defined in +`.toml` files and take effect when the extension is activated. + +To add policies, create a `policies/` directory in your extension's root and +place your `.toml` policy files inside it. Gemini CLI automatically loads all +`.toml` files from this directory. + +Rules contributed by extensions run in their own tier (tier 2), alongside +workspace-defined policies. This tier has higher priority than the default rules +but lower priority than user or admin policies. + +> **Warning:** For security, Gemini CLI ignores any `allow` decisions or `yolo` +> mode configurations in extension policies. This ensures that an extension +> cannot automatically approve tool calls or bypass security measures without +> your confirmation. + +**Example `policies.toml`** + +```toml +[[rule]] +toolName = "my_server__dangerous_tool" +decision = "ask_user" +priority = 100 + +[[safety_checker]] +toolName = "my_server__write_data" +priority = 200 +[safety_checker.checker] +type = "in-process" +name = "allowed-path" +required_context = ["environment"] +``` + ### Themes Extensions can provide custom themes to personalize the CLI UI. Themes are diff --git a/docs/reference/policy-engine.md b/docs/reference/policy-engine.md index a123634581..810c591c24 100644 --- a/docs/reference/policy-engine.md +++ b/docs/reference/policy-engine.md @@ -97,9 +97,10 @@ has a designated number that forms the base of the final priority calculation. | Tier | Base | Description | | :-------- | :--- | :------------------------------------------------------------------------- | | Default | 1 | Built-in policies that ship with the Gemini CLI. | -| Workspace | 2 | Policies defined in the current workspace's configuration directory. | -| User | 3 | Custom policies defined by the user. | -| Admin | 4 | Policies managed by an administrator (e.g., in an enterprise environment). | +| Extension | 2 | Policies defined in extensions. | +| Workspace | 3 | Policies defined in the current workspace's configuration directory. | +| User | 4 | Custom policies defined by the user. | +| Admin | 5 | Policies managed by an administrator (e.g., in an enterprise environment). | Within a TOML policy file, you assign a priority value from **0 to 999**. The engine transforms this into a final priority using the following formula: diff --git a/packages/cli/src/commands/extensions/examples/policies/README.md b/packages/cli/src/commands/extensions/examples/policies/README.md new file mode 100644 index 0000000000..d1c06de6e3 --- /dev/null +++ b/packages/cli/src/commands/extensions/examples/policies/README.md @@ -0,0 +1,41 @@ +# Policy engine example extension + +This extension demonstrates how to contribute security rules and safety checkers +to the Gemini CLI Policy Engine. + +## Description + +The extension uses a `policies/` directory containing `.toml` files to define: + +- A rule that requires user confirmation for `rm -rf` commands. +- A rule that denies searching for sensitive files (like `.env`) using `grep`. +- A safety checker that validates file paths for all write operations. + +## Structure + +- `gemini-extension.json`: The manifest file. +- `policies/`: Contains the `.toml` policy files. + +## How to use + +1. Link this extension to your local Gemini CLI installation: + + ```bash + gemini extensions link packages/cli/src/commands/extensions/examples/policies + ``` + +2. Restart your Gemini CLI session. + +3. **Observe the policies:** + - Try asking the model to delete a directory: The policy engine will prompt + you for confirmation due to the `rm -rf` rule. + - Try asking the model to search for secrets: The `grep` rule will deny the + request and display the custom deny message. + - Any file write operation will now be processed through the `allowed-path` + safety checker. + +## Security note + +For security, Gemini CLI ignores any `allow` decisions or `yolo` mode +configurations contributed by extensions. This ensures that extensions can +strengthen security but cannot bypass user confirmation. diff --git a/packages/cli/src/commands/extensions/examples/policies/gemini-extension.json b/packages/cli/src/commands/extensions/examples/policies/gemini-extension.json new file mode 100644 index 0000000000..2a2b992532 --- /dev/null +++ b/packages/cli/src/commands/extensions/examples/policies/gemini-extension.json @@ -0,0 +1,5 @@ +{ + "name": "policy-example", + "version": "1.0.0", + "description": "An example extension demonstrating Policy Engine support." +} diff --git a/packages/cli/src/commands/extensions/examples/policies/policies/policies.toml b/packages/cli/src/commands/extensions/examples/policies/policies/policies.toml new file mode 100644 index 0000000000..d89d5e5737 --- /dev/null +++ b/packages/cli/src/commands/extensions/examples/policies/policies/policies.toml @@ -0,0 +1,28 @@ +# Example Policy Rules for Gemini CLI Extension +# +# Extensions run in Tier 2 (Extension Tier). +# Security Note: 'allow' decisions and 'yolo' mode configurations are ignored. + +# Rule: Always ask the user before running a specific dangerous shell command. +[[rule]] +toolName = "run_shell_command" +commandPrefix = "rm -rf" +decision = "ask_user" +priority = 100 + +# Rule: Deny access to sensitive files using the grep tool. +[[rule]] +toolName = "grep_search" +argsPattern = "(\.env|id_rsa|passwd)" +decision = "deny" +priority = 200 +deny_message = "Access to sensitive credentials or system files is restricted by the policy-example extension." + +# Safety Checker: Apply path validation to all write operations. +[[safety_checker]] +toolName = ["write_file", "replace"] +priority = 300 +[safety_checker.checker] +type = "in-process" +name = "allowed-path" +required_context = ["environment"] diff --git a/packages/cli/src/config/extension-manager.ts b/packages/cli/src/config/extension-manager.ts index 93ad3f3536..56152cd6e1 100644 --- a/packages/cli/src/config/extension-manager.ts +++ b/packages/cli/src/config/extension-manager.ts @@ -52,6 +52,10 @@ import { applyAdminAllowlist, getAdminBlockedMcpServersMessage, CoreToolCallStatus, + loadExtensionPolicies, + isSubpath, + type PolicyRule, + type SafetyCheckerRule, HookType, } from '@google/gemini-cli-core'; import { maybeRequestConsentOrFail } from './extensions/consent.js'; @@ -764,9 +768,18 @@ Would you like to attempt to install via "git clone" instead?`, } const contextFiles = getContextFileNames(config) - .map((contextFileName) => - path.join(effectiveExtensionPath, contextFileName), - ) + .map((contextFileName) => { + const contextFilePath = path.join( + effectiveExtensionPath, + contextFileName, + ); + if (!isSubpath(effectiveExtensionPath, contextFilePath)) { + throw new Error( + `Invalid context file path: "${contextFileName}". Context files must be within the extension directory.`, + ); + } + return contextFilePath; + }) .filter((contextFilePath) => fs.existsSync(contextFilePath)); const hydrationContext: VariableContext = { @@ -820,6 +833,24 @@ Would you like to attempt to install via "git clone" instead?`, recursivelyHydrateStrings(skill, hydrationContext), ); + let rules: PolicyRule[] | undefined; + let checkers: SafetyCheckerRule[] | undefined; + + const policyDir = path.join(effectiveExtensionPath, 'policies'); + if (fs.existsSync(policyDir)) { + const result = await loadExtensionPolicies(config.name, policyDir); + rules = result.rules; + checkers = result.checkers; + + if (result.errors.length > 0) { + for (const error of result.errors) { + debugLogger.warn( + `[ExtensionManager] Error loading policies from ${config.name}: ${error.message}${error.details ? `\nDetails: ${error.details}` : ''}`, + ); + } + } + } + const agentLoadResult = await loadAgentsFromDirectory( path.join(effectiveExtensionPath, 'agents'), ); @@ -853,6 +884,8 @@ Would you like to attempt to install via "git clone" instead?`, skills, agents: agentLoadResult.agents, themes: config.themes, + rules, + checkers, }; } catch (e) { debugLogger.error( diff --git a/packages/cli/src/config/extension.test.ts b/packages/cli/src/config/extension.test.ts index affcd0cef0..f8e66bf8e2 100644 --- a/packages/cli/src/config/extension.test.ts +++ b/packages/cli/src/config/extension.test.ts @@ -239,6 +239,27 @@ describe('extension tests', () => { expect(extensions[0].name).toBe('test-extension'); }); + it('should throw an error if a context file path is outside the extension directory', async () => { + const consoleSpy = vi + .spyOn(console, 'error') + .mockImplementation(() => {}); + createExtension({ + extensionsDir: userExtensionsDir, + name: 'traversal-extension', + version: '1.0.0', + contextFileName: '../secret.txt', + }); + + const extensions = await extensionManager.loadExtensions(); + expect(extensions).toHaveLength(0); + expect(consoleSpy).toHaveBeenCalledWith( + expect.stringContaining( + 'traversal-extension: Invalid context file path: "../secret.txt"', + ), + ); + consoleSpy.mockRestore(); + }); + it('should load context file path when GEMINI.md is present', async () => { createExtension({ extensionsDir: userExtensionsDir, @@ -363,6 +384,111 @@ describe('extension tests', () => { ]); }); + it('should load extension policies from the policies directory', async () => { + const extDir = createExtension({ + extensionsDir: userExtensionsDir, + name: 'policy-extension', + version: '1.0.0', + }); + + const policiesDir = path.join(extDir, 'policies'); + fs.mkdirSync(policiesDir); + + const policiesContent = ` +[[rule]] +toolName = "deny_tool" +decision = "deny" +priority = 500 + +[[rule]] +toolName = "ask_tool" +decision = "ask_user" +priority = 100 +`; + fs.writeFileSync( + path.join(policiesDir, 'policies.toml'), + policiesContent, + ); + + const extensions = await extensionManager.loadExtensions(); + expect(extensions).toHaveLength(1); + const extension = extensions[0]; + + expect(extension.rules).toBeDefined(); + expect(extension.rules).toHaveLength(2); + expect( + extension.rules!.find((r) => r.toolName === 'deny_tool')?.decision, + ).toBe('deny'); + expect( + extension.rules!.find((r) => r.toolName === 'ask_tool')?.decision, + ).toBe('ask_user'); + // Verify source is prefixed + expect(extension.rules![0].source).toContain( + 'Extension (policy-extension):', + ); + }); + + it('should ignore ALLOW rules and YOLO mode from extension policies for security', async () => { + const consoleSpy = vi.spyOn(console, 'warn').mockImplementation(() => {}); + const extDir = createExtension({ + extensionsDir: userExtensionsDir, + name: 'security-test-extension', + version: '1.0.0', + }); + + const policiesDir = path.join(extDir, 'policies'); + fs.mkdirSync(policiesDir); + + const policiesContent = ` +[[rule]] +toolName = "allow_tool" +decision = "allow" +priority = 100 + +[[rule]] +toolName = "yolo_tool" +decision = "ask_user" +priority = 100 +modes = ["yolo"] + +[[safety_checker]] +toolName = "yolo_check" +priority = 100 +modes = ["yolo"] +[safety_checker.checker] +type = "external" +name = "yolo-checker" +`; + fs.writeFileSync( + path.join(policiesDir, 'policies.toml'), + policiesContent, + ); + + const extensions = await extensionManager.loadExtensions(); + expect(extensions).toHaveLength(1); + const extension = extensions[0]; + + // ALLOW rules and YOLO rules/checkers should be filtered out + expect(extension.rules).toBeDefined(); + expect(extension.rules).toHaveLength(0); + expect(extension.checkers).toBeDefined(); + expect(extension.checkers).toHaveLength(0); + + // Should have logged warnings + expect(consoleSpy).toHaveBeenCalledWith( + expect.stringContaining('attempted to contribute an ALLOW rule'), + ); + expect(consoleSpy).toHaveBeenCalledWith( + expect.stringContaining('attempted to contribute a rule for YOLO mode'), + ); + expect(consoleSpy).toHaveBeenCalledWith( + expect.stringContaining( + 'attempted to contribute a safety checker for YOLO mode', + ), + ); + consoleSpy.mockRestore(); + }); + it('should hydrate ${extensionPath} correctly for linked extensions', async () => { const sourceExtDir = getRealPath( createExtension({ @@ -540,7 +666,7 @@ describe('extension tests', () => { // Bad extension const badExtDir = path.join(userExtensionsDir, 'bad-ext'); - fs.mkdirSync(badExtDir); + fs.mkdirSync(badExtDir, { recursive: true }); const badConfigPath = path.join(badExtDir, EXTENSIONS_CONFIG_FILENAME); fs.writeFileSync(badConfigPath, '{ "name": "bad-ext"'); // Malformed @@ -548,7 +674,7 @@ describe('extension tests', () => { expect(extensions).toHaveLength(1); expect(extensions[0].name).toBe('good-ext'); - expect(consoleSpy).toHaveBeenCalledExactlyOnceWith( + expect(consoleSpy).toHaveBeenCalledWith( expect.stringContaining( `Warning: Skipping extension in ${badExtDir}: Failed to load extension config from ${badConfigPath}`, ), @@ -571,7 +697,7 @@ describe('extension tests', () => { // Bad extension const badExtDir = path.join(userExtensionsDir, 'bad-ext-no-name'); - fs.mkdirSync(badExtDir); + fs.mkdirSync(badExtDir, { recursive: true }); const badConfigPath = path.join(badExtDir, EXTENSIONS_CONFIG_FILENAME); fs.writeFileSync(badConfigPath, JSON.stringify({ version: '1.0.0' })); @@ -579,7 +705,7 @@ describe('extension tests', () => { expect(extensions).toHaveLength(1); expect(extensions[0].name).toBe('good-ext'); - expect(consoleSpy).toHaveBeenCalledExactlyOnceWith( + expect(consoleSpy).toHaveBeenCalledWith( expect.stringContaining( `Warning: Skipping extension in ${badExtDir}: Failed to load extension config from ${badConfigPath}: Invalid configuration in ${badConfigPath}: missing "name"`, ), diff --git a/packages/cli/src/config/policy-engine.integration.test.ts b/packages/cli/src/config/policy-engine.integration.test.ts index 1d7573337e..02515815d0 100644 --- a/packages/cli/src/config/policy-engine.integration.test.ts +++ b/packages/cli/src/config/policy-engine.integration.test.ts @@ -177,13 +177,13 @@ describe('Policy Engine Integration Tests', () => { ); const engine = new PolicyEngine(config); - // MCP server allowed (priority 3.1) provides general allow for server - // MCP server allowed (priority 3.1) provides general allow for server + // MCP server allowed (priority 4.1) provides general allow for server + // MCP server allowed (priority 4.1) provides general allow for server expect( (await engine.check({ name: 'my-server__safe-tool' }, undefined)) .decision, ).toBe(PolicyDecision.ALLOW); - // But specific tool exclude (priority 3.4) wins over server allow + // But specific tool exclude (priority 4.4) wins over server allow expect( (await engine.check({ name: 'my-server__dangerous-tool' }, undefined)) .decision, @@ -476,25 +476,25 @@ describe('Policy Engine Integration Tests', () => { // Find rules and verify their priorities const blockedToolRule = rules.find((r) => r.toolName === 'blocked-tool'); - expect(blockedToolRule?.priority).toBe(3.4); // Command line exclude + expect(blockedToolRule?.priority).toBe(4.4); // Command line exclude const blockedServerRule = rules.find( (r) => r.toolName === 'blocked-server__*', ); - expect(blockedServerRule?.priority).toBe(3.9); // MCP server exclude + expect(blockedServerRule?.priority).toBe(4.9); // MCP server exclude const specificToolRule = rules.find( (r) => r.toolName === 'specific-tool', ); - expect(specificToolRule?.priority).toBe(3.3); // Command line allow + expect(specificToolRule?.priority).toBe(4.3); // Command line allow const trustedServerRule = rules.find( (r) => r.toolName === 'trusted-server__*', ); - expect(trustedServerRule?.priority).toBe(3.2); // MCP trusted server + expect(trustedServerRule?.priority).toBe(4.2); // MCP trusted server const mcpServerRule = rules.find((r) => r.toolName === 'mcp-server__*'); - expect(mcpServerRule?.priority).toBe(3.1); // MCP allowed server + expect(mcpServerRule?.priority).toBe(4.1); // MCP allowed server const readOnlyToolRule = rules.find((r) => r.toolName === 'glob'); // Priority 70 in default tier → 1.07 (Overriding Plan Mode Deny) @@ -641,16 +641,16 @@ describe('Policy Engine Integration Tests', () => { // Verify each rule has the expected priority const tool3Rule = rules.find((r) => r.toolName === 'tool3'); - expect(tool3Rule?.priority).toBe(3.4); // Excluded tools (user tier) + expect(tool3Rule?.priority).toBe(4.4); // Excluded tools (user tier) const server2Rule = rules.find((r) => r.toolName === 'server2__*'); - expect(server2Rule?.priority).toBe(3.9); // Excluded servers (user tier) + expect(server2Rule?.priority).toBe(4.9); // Excluded servers (user tier) const tool1Rule = rules.find((r) => r.toolName === 'tool1'); - expect(tool1Rule?.priority).toBe(3.3); // Allowed tools (user tier) + expect(tool1Rule?.priority).toBe(4.3); // Allowed tools (user tier) const server1Rule = rules.find((r) => r.toolName === 'server1__*'); - expect(server1Rule?.priority).toBe(3.1); // Allowed servers (user tier) + expect(server1Rule?.priority).toBe(4.1); // Allowed servers (user tier) const globRule = rules.find((r) => r.toolName === 'glob'); // Priority 70 in default tier → 1.07 diff --git a/packages/core/src/config/config.ts b/packages/core/src/config/config.ts index 2f5d452446..32d74479e7 100644 --- a/packages/core/src/config/config.ts +++ b/packages/core/src/config/config.ts @@ -107,7 +107,12 @@ import { FileExclusions } from '../utils/ignorePatterns.js'; import { MessageBus } from '../confirmation-bus/message-bus.js'; import type { EventEmitter } from 'node:events'; import { PolicyEngine } from '../policy/policy-engine.js'; -import { ApprovalMode, type PolicyEngineConfig } from '../policy/types.js'; +import { + ApprovalMode, + type PolicyEngineConfig, + type PolicyRule, + type SafetyCheckerRule, +} from '../policy/types.js'; import { HookSystem } from '../hooks/index.js'; import type { UserTierId, @@ -324,6 +329,14 @@ export interface GeminiCLIExtension { * These themes will be registered when the extension is activated. */ themes?: CustomTheme[]; + /** + * Policy rules contributed by this extension. + */ + rules?: PolicyRule[]; + /** + * Safety checkers contributed by this extension. + */ + checkers?: SafetyCheckerRule[]; } export interface ExtensionInstallMetadata { diff --git a/packages/core/src/policy/config.test.ts b/packages/core/src/policy/config.test.ts index a9fae7a1fa..3ded361084 100644 --- a/packages/core/src/policy/config.test.ts +++ b/packages/core/src/policy/config.test.ts @@ -169,7 +169,7 @@ describe('createPolicyEngineConfig', () => { r.decision === PolicyDecision.ALLOW, ); expect(rule).toBeDefined(); - expect(rule?.priority).toBeCloseTo(3.3, 5); // Command line allow + expect(rule?.priority).toBeCloseTo(4.3, 5); // Command line allow }); it('should deny tools in tools.exclude', async () => { @@ -188,7 +188,7 @@ describe('createPolicyEngineConfig', () => { r.decision === PolicyDecision.DENY, ); expect(rule).toBeDefined(); - expect(rule?.priority).toBeCloseTo(3.4, 5); // Command line exclude + expect(rule?.priority).toBeCloseTo(4.4, 5); // Command line exclude }); it('should allow tools from allowed MCP servers', async () => { @@ -206,7 +206,7 @@ describe('createPolicyEngineConfig', () => { r.toolName === 'my-server__*' && r.decision === PolicyDecision.ALLOW, ); expect(rule).toBeDefined(); - expect(rule?.priority).toBe(3.1); // MCP allowed server + expect(rule?.priority).toBe(4.1); // MCP allowed server }); it('should deny tools from excluded MCP servers', async () => { @@ -224,7 +224,7 @@ describe('createPolicyEngineConfig', () => { r.toolName === 'my-server__*' && r.decision === PolicyDecision.DENY, ); expect(rule).toBeDefined(); - expect(rule?.priority).toBe(3.9); // MCP excluded server + expect(rule?.priority).toBe(4.9); // MCP excluded server }); it('should allow tools from trusted MCP servers', async () => { @@ -251,7 +251,7 @@ describe('createPolicyEngineConfig', () => { r.decision === PolicyDecision.ALLOW, ); expect(trustedRule).toBeDefined(); - expect(trustedRule?.priority).toBe(3.2); // MCP trusted server + expect(trustedRule?.priority).toBe(4.2); // MCP trusted server // Untrusted server should not have an allow rule const untrustedRule = config.rules?.find( @@ -288,7 +288,7 @@ describe('createPolicyEngineConfig', () => { r.decision === PolicyDecision.ALLOW, ); expect(allowedRule).toBeDefined(); - expect(allowedRule?.priority).toBe(3.1); // MCP allowed server + expect(allowedRule?.priority).toBe(4.1); // MCP allowed server // Check trusted server const trustedRule = config.rules?.find( @@ -297,7 +297,7 @@ describe('createPolicyEngineConfig', () => { r.decision === PolicyDecision.ALLOW, ); expect(trustedRule).toBeDefined(); - expect(trustedRule?.priority).toBe(3.2); // MCP trusted server + expect(trustedRule?.priority).toBe(4.2); // MCP trusted server // Check excluded server const excludedRule = config.rules?.find( @@ -306,7 +306,7 @@ describe('createPolicyEngineConfig', () => { r.decision === PolicyDecision.DENY, ); expect(excludedRule).toBeDefined(); - expect(excludedRule?.priority).toBe(3.9); // MCP excluded server + expect(excludedRule?.priority).toBe(4.9); // MCP excluded server }); it('should allow all tools in YOLO mode', async () => { @@ -387,11 +387,11 @@ describe('createPolicyEngineConfig', () => { ); expect(serverDenyRule).toBeDefined(); - expect(serverDenyRule?.priority).toBe(3.9); // MCP excluded server + expect(serverDenyRule?.priority).toBe(4.9); // MCP excluded server expect(toolAllowRule).toBeDefined(); - expect(toolAllowRule?.priority).toBeCloseTo(3.3, 5); // Command line allow + expect(toolAllowRule?.priority).toBeCloseTo(4.3, 5); // Command line allow - // Server deny (3.9) has higher priority than tool allow (3.3), + // Server deny (4.9) has higher priority than tool allow (4.3), // so server deny wins (this is expected behavior - server-level blocks are security critical) }); @@ -424,7 +424,7 @@ describe('createPolicyEngineConfig', () => { expect(serverAllowRule).toBeDefined(); expect(toolDenyRule).toBeDefined(); - // Command line exclude (3.4) has higher priority than MCP server trust (3.2) + // Command line exclude (4.4) has higher priority than MCP server trust (4.2) // This is the correct behavior - specific exclusions should beat general server trust expect(toolDenyRule!.priority).toBeGreaterThan(serverAllowRule!.priority!); }); @@ -432,16 +432,16 @@ describe('createPolicyEngineConfig', () => { it('should handle complex priority scenarios correctly', async () => { const settings: PolicySettings = { tools: { - allowed: ['my-server__tool1', 'other-tool'], // Priority 3.3 - exclude: ['my-server__tool2', 'glob'], // Priority 3.4 + allowed: ['my-server__tool1', 'other-tool'], // Priority 4.3 + exclude: ['my-server__tool2', 'glob'], // Priority 4.4 }, mcp: { - allowed: ['allowed-server'], // Priority 3.1 - excluded: ['excluded-server'], // Priority 3.9 + allowed: ['allowed-server'], // Priority 4.1 + excluded: ['excluded-server'], // Priority 4.9 }, mcpServers: { 'trusted-server': { - trust: true, // Priority 90 -> 3.2 + trust: true, // Priority 4.2 }, }, }; @@ -517,7 +517,7 @@ describe('createPolicyEngineConfig', () => { expect(globDenyRule).toBeDefined(); expect(globAllowRule).toBeDefined(); // Deny from settings (user tier) - expect(globDenyRule!.priority).toBeCloseTo(3.4, 5); // Command line exclude + expect(globDenyRule!.priority).toBeCloseTo(4.4, 5); // Command line exclude // Allow from default TOML: 1 + 50/1000 = 1.05 expect(globAllowRule!.priority).toBeCloseTo(1.05, 5); @@ -530,11 +530,11 @@ describe('createPolicyEngineConfig', () => { })) .sort((a, b) => (b.priority ?? 0) - (a.priority ?? 0)); - // Check that the highest priority items are the excludes (user tier: 3.4 and 3.9) + // Check that the highest priority items are the excludes (user tier: 4.4 and 4.9) const highestPriorityExcludes = priorities?.filter( (p) => - Math.abs(p.priority! - 3.4) < 0.01 || - Math.abs(p.priority! - 3.9) < 0.01, + Math.abs(p.priority! - 4.4) < 0.01 || + Math.abs(p.priority! - 4.9) < 0.01, ); expect( highestPriorityExcludes?.every((p) => p.decision === PolicyDecision.DENY), @@ -626,7 +626,7 @@ describe('createPolicyEngineConfig', () => { r.toolName === 'dangerous-tool' && r.decision === PolicyDecision.DENY, ); expect(excludeRule).toBeDefined(); - expect(excludeRule?.priority).toBeCloseTo(3.4, 5); // Command line exclude + expect(excludeRule?.priority).toBeCloseTo(4.4, 5); // Command line exclude }); it('should support argsPattern in policy rules', async () => { @@ -733,8 +733,8 @@ priority = 150 r.decision === PolicyDecision.ALLOW, ); expect(rule).toBeDefined(); - // Priority 150 in user tier → 3.150 - expect(rule?.priority).toBeCloseTo(3.15, 5); + // Priority 150 in user tier → 4.150 + expect(rule?.priority).toBeCloseTo(4.15, 5); expect(rule?.argsPattern).toBeInstanceOf(RegExp); expect(rule?.argsPattern?.test('{"command":"git status"}')).toBe(true); expect(rule?.argsPattern?.test('{"command":"git diff"}')).toBe(true); @@ -1046,7 +1046,7 @@ name = "invalid-name" r.decision === PolicyDecision.ALLOW, ); expect(rule).toBeDefined(); - expect(rule?.priority).toBeCloseTo(3.3, 5); // Command line allow + expect(rule?.priority).toBeCloseTo(4.3, 5); // Command line allow vi.doUnmock('node:fs/promises'); }); @@ -1188,7 +1188,7 @@ modes = ["plan"] r.modes?.includes(ApprovalMode.PLAN), ); expect(subagentRule).toBeDefined(); - expect(subagentRule?.priority).toBeCloseTo(3.1, 5); + expect(subagentRule?.priority).toBeCloseTo(4.1, 5); vi.doUnmock('node:fs/promises'); }); diff --git a/packages/core/src/policy/config.ts b/packages/core/src/policy/config.ts index 7de415cb37..800006e27e 100644 --- a/packages/core/src/policy/config.ts +++ b/packages/core/src/policy/config.ts @@ -13,8 +13,9 @@ import { type PolicyEngineConfig, PolicyDecision, type PolicyRule, - type ApprovalMode, + ApprovalMode, type PolicySettings, + type SafetyCheckerRule, } from './types.js'; import type { PolicyEngine } from './policy-engine.js'; import { loadPoliciesFromToml, type PolicyFileError } from './toml-loader.js'; @@ -39,14 +40,15 @@ export const DEFAULT_CORE_POLICIES_DIR = path.join(__dirname, 'policies'); // Policy tier constants for priority calculation export const DEFAULT_POLICY_TIER = 1; -export const WORKSPACE_POLICY_TIER = 2; -export const USER_POLICY_TIER = 3; -export const ADMIN_POLICY_TIER = 4; +export const EXTENSION_POLICY_TIER = 2; +export const WORKSPACE_POLICY_TIER = 3; +export const USER_POLICY_TIER = 4; +export const ADMIN_POLICY_TIER = 5; // Specific priority offsets and derived priorities for dynamic/settings rules. // These are added to the tier base (e.g., USER_POLICY_TIER). -// Workspace tier (2) + high priority (950/1000) = ALWAYS_ALLOW_PRIORITY +// Workspace tier (3) + high priority (950/1000) = ALWAYS_ALLOW_PRIORITY // This ensures user "always allow" selections are high priority // within the workspace tier but still lose to user/admin policies. export const ALWAYS_ALLOW_PRIORITY = WORKSPACE_POLICY_TIER + 0.95; @@ -59,7 +61,9 @@ export const ALLOWED_MCP_SERVER_PRIORITY = USER_POLICY_TIER + 0.1; /** * Gets the list of directories to search for policy files, in order of increasing priority - * (Default -> User -> Project -> Admin). + * (Default -> Extension -> Workspace -> User -> Admin). + * + * Note: Extension policies are loaded separately by the extension manager. * * @param defaultPoliciesDir Optional path to a directory containing default policies. * @param policyPaths Optional user-provided policy paths (from --policy flag). @@ -95,7 +99,7 @@ export function getPolicyDirectories( } /** - * Determines the policy tier (1=default, 2=user, 3=workspace, 4=admin) for a given directory. + * Determines the policy tier (1=default, 2=extension, 3=workspace, 4=user, 5=admin) for a given directory. * This is used by the TOML loader to assign priority bands. */ export function getPolicyTier( @@ -178,6 +182,69 @@ async function filterSecurePolicyDirectories( return results.filter((dir): dir is string => dir !== null); } +/** + * Loads and sanitizes policies from an extension's policies directory. + * Security: Filters out 'ALLOW' rules and YOLO mode configurations. + */ +export async function loadExtensionPolicies( + extensionName: string, + policyDir: string, +): Promise<{ + rules: PolicyRule[]; + checkers: SafetyCheckerRule[]; + errors: PolicyFileError[]; +}> { + const result = await loadPoliciesFromToml( + [policyDir], + () => EXTENSION_POLICY_TIER, + ); + + const rules = result.rules.filter((rule) => { + // Security: Extensions are not allowed to automatically approve tool calls. + if (rule.decision === PolicyDecision.ALLOW) { + debugLogger.warn( + `[PolicyConfig] Extension "${extensionName}" attempted to contribute an ALLOW rule for tool "${rule.toolName}". Ignoring this rule for security.`, + ); + return false; + } + + // Security: Extensions are not allowed to contribute YOLO mode rules. + if (rule.modes?.includes(ApprovalMode.YOLO)) { + debugLogger.warn( + `[PolicyConfig] Extension "${extensionName}" attempted to contribute a rule for YOLO mode. Ignoring this rule for security.`, + ); + return false; + } + + // Prefix source with extension name to avoid collisions and double prefixing. + // toml-loader.ts adds "Extension: file.toml", we transform it to "Extension (name): file.toml". + rule.source = rule.source?.replace( + /^Extension: /, + `Extension (${extensionName}): `, + ); + return true; + }); + + const checkers = result.checkers.filter((checker) => { + // Security: Extensions are not allowed to contribute YOLO mode checkers. + if (checker.modes?.includes(ApprovalMode.YOLO)) { + debugLogger.warn( + `[PolicyConfig] Extension "${extensionName}" attempted to contribute a safety checker for YOLO mode. Ignoring this checker for security.`, + ); + return false; + } + + // Prefix source with extension name. + checker.source = checker.source?.replace( + /^Extension: /, + `Extension (${extensionName}): `, + ); + return true; + }); + + return { rules, checkers, errors: result.errors }; +} + export async function createPolicyEngineConfig( settings: PolicySettings, approvalMode: ApprovalMode, @@ -234,17 +301,19 @@ export async function createPolicyEngineConfig( const checkers = [...tomlCheckers]; // Priority system for policy rules: + // - Higher priority numbers win over lower priority numbers // - When multiple rules match, the highest priority rule is applied // - Rules are evaluated in order of priority (highest first) // // Priority bands (tiers): // - Default policies (TOML): 1 + priority/1000 (e.g., priority 100 → 1.100) - // - Workspace policies (TOML): 2 + priority/1000 (e.g., priority 100 → 2.100) - // - User policies (TOML): 3 + priority/1000 (e.g., priority 100 → 3.100) - // - Admin policies (TOML): 4 + priority/1000 (e.g., priority 100 → 4.100) + // - Extension policies (TOML): 2 + priority/1000 (e.g., priority 100 → 2.100) + // - Workspace policies (TOML): 3 + priority/1000 (e.g., priority 100 → 3.100) + // - User policies (TOML): 4 + priority/1000 (e.g., priority 100 → 4.100) + // - Admin policies (TOML): 5 + priority/1000 (e.g., priority 100 → 5.100) // - // This ensures Admin > User > Workspace > Default hierarchy is always preserved, + // This ensures Admin > User > Workspace > Extension > Default hierarchy is always preserved, // while allowing user-specified priorities to work within each tier. // // Settings-based and dynamic rules (mixed tiers): @@ -254,7 +323,7 @@ export async function createPolicyEngineConfig( // TRUSTED_MCP_SERVER_PRIORITY: MCP servers with trust=true (persistent trusted servers) // ALLOWED_MCP_SERVER_PRIORITY: MCP servers allowed list (persistent general server allows) // ALWAYS_ALLOW_PRIORITY: Tools that the user has selected as "Always Allow" in the interactive UI - // (Workspace tier 2.x - scoped to the project) + // (Workspace tier 3.x - scoped to the project) // // TOML policy priorities (before transformation): // 10: Write tools default to ASK_USER (becomes 1.010 in default tier) diff --git a/packages/core/src/policy/policy-engine.test.ts b/packages/core/src/policy/policy-engine.test.ts index 7accf5c7e5..f93c9ad3b8 100644 --- a/packages/core/src/policy/policy-engine.test.ts +++ b/packages/core/src/policy/policy-engine.test.ts @@ -406,6 +406,40 @@ describe('PolicyEngine', () => { expect(remainingRules.some((r) => r.toolName === 'tool2')).toBe(true); }); + it('should remove rules for specific tool and source', () => { + engine.addRule({ + toolName: 'tool1', + decision: PolicyDecision.ALLOW, + source: 'source1', + }); + engine.addRule({ + toolName: 'tool1', + decision: PolicyDecision.DENY, + source: 'source2', + }); + engine.addRule({ + toolName: 'tool2', + decision: PolicyDecision.ALLOW, + source: 'source1', + }); + + expect(engine.getRules()).toHaveLength(3); + + engine.removeRulesForTool('tool1', 'source1'); + + const rules = engine.getRules(); + expect(rules).toHaveLength(2); + expect( + rules.some((r) => r.toolName === 'tool1' && r.source === 'source2'), + ).toBe(true); + expect( + rules.some((r) => r.toolName === 'tool2' && r.source === 'source1'), + ).toBe(true); + expect( + rules.some((r) => r.toolName === 'tool1' && r.source === 'source1'), + ).toBe(false); + }); + it('should handle removing non-existent tool', () => { engine.addRule({ toolName: 'existing', decision: PolicyDecision.ALLOW }); @@ -2836,6 +2870,34 @@ describe('PolicyEngine', () => { }); }); + describe('removeRulesBySource', () => { + it('should remove rules matching a specific source', () => { + engine.addRule({ + toolName: 'rule1', + decision: PolicyDecision.ALLOW, + source: 'source1', + }); + engine.addRule({ + toolName: 'rule2', + decision: PolicyDecision.ALLOW, + source: 'source2', + }); + engine.addRule({ + toolName: 'rule3', + decision: PolicyDecision.ALLOW, + source: 'source1', + }); + + expect(engine.getRules()).toHaveLength(3); + + engine.removeRulesBySource('source1'); + + const rules = engine.getRules(); + expect(rules).toHaveLength(1); + expect(rules[0].toolName).toBe('rule2'); + }); + }); + describe('removeCheckersByTier', () => { it('should remove checkers matching a specific tier', () => { engine.addChecker({ @@ -2861,6 +2923,31 @@ describe('PolicyEngine', () => { }); }); + describe('removeCheckersBySource', () => { + it('should remove checkers matching a specific source', () => { + engine.addChecker({ + checker: { type: 'external', name: 'c1' }, + source: 'sourceA', + }); + engine.addChecker({ + checker: { type: 'external', name: 'c2' }, + source: 'sourceB', + }); + engine.addChecker({ + checker: { type: 'external', name: 'c3' }, + source: 'sourceA', + }); + + expect(engine.getCheckers()).toHaveLength(3); + + engine.removeCheckersBySource('sourceA'); + + const checkers = engine.getCheckers(); + expect(checkers).toHaveLength(1); + expect(checkers[0].checker.name).toBe('c2'); + }); + }); + describe('Tool Annotations', () => { it('should match tools by semantic annotations', async () => { engine = new PolicyEngine({ @@ -2924,4 +3011,22 @@ describe('PolicyEngine', () => { ).toBe(PolicyDecision.ALLOW); }); }); + + describe('hook checkers', () => { + it('should add and retrieve hook checkers in priority order', () => { + engine.addHookChecker({ + checker: { type: 'external', name: 'h1' }, + priority: 5, + }); + engine.addHookChecker({ + checker: { type: 'external', name: 'h2' }, + priority: 10, + }); + + const hookCheckers = engine.getHookCheckers(); + expect(hookCheckers).toHaveLength(2); + expect(hookCheckers[0].priority).toBe(10); + expect(hookCheckers[1].priority).toBe(5); + }); + }); }); diff --git a/packages/core/src/policy/policy-engine.ts b/packages/core/src/policy/policy-engine.ts index 8f61d622c2..03087716ff 100644 --- a/packages/core/src/policy/policy-engine.ts +++ b/packages/core/src/policy/policy-engine.ts @@ -562,6 +562,13 @@ export class PolicyEngine { ); } + /** + * Remove rules matching a specific source. + */ + removeRulesBySource(source: string): void { + this.rules = this.rules.filter((rule) => rule.source !== source); + } + /** * Remove checkers matching a specific tier (priority band). */ @@ -571,6 +578,15 @@ export class PolicyEngine { ); } + /** + * Remove checkers matching a specific source. + */ + removeCheckersBySource(source: string): void { + this.checkers = this.checkers.filter( + (checker) => checker.source !== source, + ); + } + /** * Remove rules for a specific tool. * If source is provided, only rules matching that source are removed. diff --git a/packages/core/src/policy/toml-loader.test.ts b/packages/core/src/policy/toml-loader.test.ts index 1e4c008c5d..54a81771b8 100644 --- a/packages/core/src/policy/toml-loader.test.ts +++ b/packages/core/src/policy/toml-loader.test.ts @@ -262,30 +262,34 @@ deny_message = "Deletion is permanent" expect(result.errors).toHaveLength(0); }); - it('should support modes property for Tier 2 and Tier 3 policies', async () => { + it('should support modes property for Tier 4 and Tier 5 policies', async () => { await fs.writeFile( - path.join(tempDir, 'tier2.toml'), + path.join(tempDir, 'tier4.toml'), ` [[rule]] -toolName = "tier2-tool" +toolName = "tier4-tool" decision = "allow" priority = 100 modes = ["autoEdit"] `, ); - const getPolicyTier2 = (_dir: string) => 2; // Tier 2 + const getPolicyTier4 = (_dir: string) => 4; // Tier 4 (User) + const result4 = await loadPoliciesFromToml([tempDir], getPolicyTier4); + + expect(result4.rules).toHaveLength(1); + expect(result4.rules[0].toolName).toBe('tier4-tool'); + expect(result4.rules[0].modes).toEqual(['autoEdit']); + expect(result4.rules[0].source).toBe('User: tier4.toml'); + + const getPolicyTier2 = (_dir: string) => 2; // Tier 2 (Extension) const result2 = await loadPoliciesFromToml([tempDir], getPolicyTier2); + expect(result2.rules[0].source).toBe('Extension: tier4.toml'); - expect(result2.rules).toHaveLength(1); - expect(result2.rules[0].toolName).toBe('tier2-tool'); - expect(result2.rules[0].modes).toEqual(['autoEdit']); - expect(result2.rules[0].source).toBe('Workspace: tier2.toml'); - - const getPolicyTier3 = (_dir: string) => 3; // Tier 3 - const result3 = await loadPoliciesFromToml([tempDir], getPolicyTier3); - expect(result3.rules[0].source).toBe('User: tier2.toml'); - expect(result3.errors).toHaveLength(0); + const getPolicyTier5 = (_dir: string) => 5; // Tier 5 (Admin) + const result5 = await loadPoliciesFromToml([tempDir], getPolicyTier5); + expect(result5.rules[0].source).toBe('Admin: tier4.toml'); + expect(result5.errors).toHaveLength(0); }); it('should handle TOML parse errors', async () => { diff --git a/packages/core/src/policy/toml-loader.ts b/packages/core/src/policy/toml-loader.ts index 6b164d59b8..df4bd3ca9e 100644 --- a/packages/core/src/policy/toml-loader.ts +++ b/packages/core/src/policy/toml-loader.ts @@ -108,7 +108,7 @@ export type PolicyFileErrorType = export interface PolicyFileError { filePath: string; fileName: string; - tier: 'default' | 'user' | 'workspace' | 'admin'; + tier: 'default' | 'extension' | 'user' | 'workspace' | 'admin'; ruleIndex?: number; errorType: PolicyFileErrorType; message: string; @@ -173,11 +173,14 @@ export async function readPolicyFiles( /** * Converts a tier number to a human-readable tier name. */ -function getTierName(tier: number): 'default' | 'user' | 'workspace' | 'admin' { +function getTierName( + tier: number, +): 'default' | 'extension' | 'user' | 'workspace' | 'admin' { if (tier === 1) return 'default'; - if (tier === 2) return 'workspace'; - if (tier === 3) return 'user'; - if (tier === 4) return 'admin'; + if (tier === 2) return 'extension'; + if (tier === 3) return 'workspace'; + if (tier === 4) return 'user'; + if (tier === 5) return 'admin'; return 'default'; } diff --git a/packages/core/src/policy/workspace-policy.test.ts b/packages/core/src/policy/workspace-policy.test.ts index 999dae6f0d..0a277bc072 100644 --- a/packages/core/src/policy/workspace-policy.test.ts +++ b/packages/core/src/policy/workspace-policy.test.ts @@ -34,7 +34,7 @@ describe('Workspace-Level Policies', () => { vi.doUnmock('node:fs/promises'); }); - it('should load workspace policies with correct priority (Tier 2)', async () => { + it('should load workspace policies with correct priority (Tier 3)', async () => { const workspacePoliciesDir = '/mock/workspace/policies'; const defaultPoliciesDir = '/mock/default/policies'; @@ -98,21 +98,21 @@ priority = 10 toolName = "test_tool" decision = "deny" priority = 10 -`; // Tier 3 -> 3.010 +`; // Tier 4 -> 4.010 } if (path.includes('workspace.toml')) { return `[[rule]] toolName = "test_tool" decision = "allow" priority = 10 -`; // Tier 2 -> 2.010 +`; // Tier 3 -> 3.010 } if (path.includes('admin.toml')) { return `[[rule]] toolName = "test_tool" decision = "deny" priority = 10 -`; // Tier 4 -> 4.010 +`; // Tier 5 -> 5.010 } return ''; }); @@ -144,9 +144,9 @@ priority = 10 // Check for all 4 rules const defaultRule = rules?.find((r) => r.priority === 1.01); - const workspaceRule = rules?.find((r) => r.priority === 2.01); - const userRule = rules?.find((r) => r.priority === 3.01); - const adminRule = rules?.find((r) => r.priority === 4.01); + const workspaceRule = rules?.find((r) => r.priority === 3.01); + const userRule = rules?.find((r) => r.priority === 4.01); + const adminRule = rules?.find((r) => r.priority === 5.01); expect(defaultRule).toBeDefined(); expect(userRule).toBeDefined(); @@ -224,7 +224,7 @@ priority=10`, expect(rules![0].priority).toBe(1.01); }); - it('should load workspace policies and correctly transform to Tier 2', async () => { + it('should load workspace policies and correctly transform to Tier 3', async () => { const workspacePoliciesDir = '/mock/workspace/policies'; // Mock FS @@ -284,7 +284,7 @@ priority=500`, const rule = config.rules?.find((r) => r.toolName === 'p_tool'); expect(rule).toBeDefined(); - // Workspace Tier (2) + 500/1000 = 2.5 - expect(rule?.priority).toBe(2.5); + // Workspace Tier (3) + 500/1000 = 3.5 + expect(rule?.priority).toBe(3.5); }); }); diff --git a/packages/core/src/utils/extensionLoader.test.ts b/packages/core/src/utils/extensionLoader.test.ts index 9cbcd51e06..17526b99a8 100644 --- a/packages/core/src/utils/extensionLoader.test.ts +++ b/packages/core/src/utils/extensionLoader.test.ts @@ -14,6 +14,7 @@ import { type MockInstance, } from 'vitest'; import { SimpleExtensionLoader } from './extensionLoader.js'; +import { PolicyDecision } from '../policy/types.js'; import type { Config, GeminiCLIExtension } from '../config/config.js'; import { type McpClientManager } from '../tools/mcp-client-manager.js'; import type { GeminiClient } from '../core/client.js'; @@ -38,6 +39,12 @@ describe('SimpleExtensionLoader', () => { let mockHookSystemInit: MockInstance; let mockAgentRegistryReload: MockInstance; let mockSkillsReload: MockInstance; + let mockPolicyEngine: { + addRule: MockInstance; + addChecker: MockInstance; + removeRulesBySource: MockInstance; + removeCheckersBySource: MockInstance; + }; const activeExtension: GeminiCLIExtension = { name: 'test-extension', @@ -47,7 +54,22 @@ describe('SimpleExtensionLoader', () => { contextFiles: [], excludeTools: ['some-tool'], id: '123', + rules: [ + { + toolName: 'test-tool', + decision: PolicyDecision.ALLOW, + source: 'Extension (test-extension): policies.toml', + }, + ], + checkers: [ + { + toolName: 'test-tool', + checker: { type: 'external', name: 'test-checker' }, + source: 'Extension (test-extension): policies.toml', + }, + ], }; + const inactiveExtension: GeminiCLIExtension = { name: 'test-extension', isActive: false, @@ -67,6 +89,12 @@ describe('SimpleExtensionLoader', () => { mockHookSystemInit = vi.fn(); mockAgentRegistryReload = vi.fn(); mockSkillsReload = vi.fn(); + mockPolicyEngine = { + addRule: vi.fn(), + addChecker: vi.fn(), + removeRulesBySource: vi.fn(), + removeCheckersBySource: vi.fn(), + }; mockConfig = { getMcpClientManager: () => mockMcpClientManager, getEnableExtensionReloading: () => extensionReloadingEnabled, @@ -81,6 +109,7 @@ describe('SimpleExtensionLoader', () => { reload: mockAgentRegistryReload, }), reloadSkills: mockSkillsReload, + getPolicyEngine: () => mockPolicyEngine, } as unknown as Config; }); @@ -88,6 +117,29 @@ describe('SimpleExtensionLoader', () => { vi.restoreAllMocks(); }); + it('should register policies when an extension starts', async () => { + const loader = new SimpleExtensionLoader([activeExtension]); + await loader.start(mockConfig); + expect(mockPolicyEngine.addRule).toHaveBeenCalledWith( + activeExtension.rules![0], + ); + expect(mockPolicyEngine.addChecker).toHaveBeenCalledWith( + activeExtension.checkers![0], + ); + }); + + it('should unregister policies when an extension stops', async () => { + const loader = new TestingSimpleExtensionLoader([activeExtension]); + await loader.start(mockConfig); + await loader.stopExtension(activeExtension); + expect(mockPolicyEngine.removeRulesBySource).toHaveBeenCalledWith( + 'Extension (test-extension): policies.toml', + ); + expect(mockPolicyEngine.removeCheckersBySource).toHaveBeenCalledWith( + 'Extension (test-extension): policies.toml', + ); + }); + it('should start active extensions', async () => { const loader = new SimpleExtensionLoader([activeExtension]); await loader.start(mockConfig); diff --git a/packages/core/src/utils/extensionLoader.ts b/packages/core/src/utils/extensionLoader.ts index 7110ba8615..8fdee33c2a 100644 --- a/packages/core/src/utils/extensionLoader.ts +++ b/packages/core/src/utils/extensionLoader.ts @@ -75,6 +75,21 @@ export abstract class ExtensionLoader { await this.config.getMcpClientManager()!.startExtension(extension); await this.maybeRefreshGeminiTools(extension); + // Register policy rules and checkers + if (extension.rules || extension.checkers) { + const policyEngine = this.config.getPolicyEngine(); + if (extension.rules) { + for (const rule of extension.rules) { + policyEngine.addRule(rule); + } + } + if (extension.checkers) { + for (const checker of extension.checkers) { + policyEngine.addChecker(checker); + } + } + } + // Note: Context files are loaded only once all extensions are done // loading/unloading to reduce churn, see the `maybeRefreshMemories` call // below. @@ -168,6 +183,27 @@ export abstract class ExtensionLoader { await this.config.getMcpClientManager()!.stopExtension(extension); await this.maybeRefreshGeminiTools(extension); + // Unregister policy rules and checkers + if (extension.rules || extension.checkers) { + const policyEngine = this.config.getPolicyEngine(); + const sources = new Set(); + if (extension.rules) { + for (const rule of extension.rules) { + if (rule.source) sources.add(rule.source); + } + } + if (extension.checkers) { + for (const checker of extension.checkers) { + if (checker.source) sources.add(checker.source); + } + } + + for (const source of sources) { + policyEngine.removeRulesBySource(source); + policyEngine.removeCheckersBySource(source); + } + } + // Note: Context files are loaded only once all extensions are done // loading/unloading to reduce churn, see the `maybeRefreshMemories` call // below. From 83a3851dfd6a002ef4552d34cb6c4e76e0640b98 Mon Sep 17 00:00:00 2001 From: Jenna Inouye Date: Thu, 26 Feb 2026 20:14:57 -0800 Subject: [PATCH 20/41] Docs: Update to Terms of Service & FAQ (#20488) Co-authored-by: Jerop Kipruto --- docs/resources/faq.md | 9 +++++++++ docs/resources/tos-privacy.md | 6 ++++++ 2 files changed, 15 insertions(+) diff --git a/docs/resources/faq.md b/docs/resources/faq.md index eeb0396495..6ed1504a46 100644 --- a/docs/resources/faq.md +++ b/docs/resources/faq.md @@ -5,6 +5,15 @@ problems encountered while using Gemini CLI. ## General issues +### Why can't I use third-party software (e.g. Claude Code, OpenClaw, OpenCode) with Gemini CLI? + +Using third-party software, tools, or services to access Gemini CLI is a +violation of our [applicable terms and policies](tos-privacy.md), and severely +degrades the experience for legitimate product users. Such actions may be +grounds for suspension or termination of your account. If you would like to use +a third-party coding agent with Gemini, we recommend using a Vertex or AI Studio +API key. + ### Why am I getting an `API error: 429 - Resource exhausted`? This error indicates that you have exceeded your API request limit. The Gemini diff --git a/docs/resources/tos-privacy.md b/docs/resources/tos-privacy.md index e653e59d1d..88daf2639c 100644 --- a/docs/resources/tos-privacy.md +++ b/docs/resources/tos-privacy.md @@ -7,6 +7,12 @@ is licensed under the When you use Gemini CLI to access or use Google’s services, the Terms of Service and Privacy Notices applicable to those services apply to such access and use. +Directly accessing the services powering Gemini CLI (e.g., the Gemini Code +Assist service) using third-party software, tools, or services (for example, +using OpenClaw with Gemini CLI OAuth) is a violation of applicable terms and +policies. Such actions may be grounds for suspension or termination of your +account. + Your Gemini CLI Usage Statistics are handled in accordance with Google's Privacy Policy. From 82336de7a4f77246aed090110d22551e0fc12fb2 Mon Sep 17 00:00:00 2001 From: Jacob Richman Date: Thu, 26 Feb 2026 20:55:21 -0800 Subject: [PATCH 21/41] Fix bottom border rendering for search and add a regression test. (#20517) --- packages/cli/GEMINI.md | 7 + packages/cli/src/test-utils/render.tsx | 5 + .../InlineMarkdownRenderer.test.tsx.snap | 52 ------ ...-search-dialog-google_web_search-.snap.svg | 123 ++++++++++++++ ...der-SVG-snapshot-for-a-shell-tool.snap.svg | 123 ++++++++++++++ ...pty-slice-following-a-search-tool.snap.svg | 123 ++++++++++++++ .../__snapshots__/borderStyles.test.tsx.snap | 55 ++++++ .../cli/src/ui/utils/borderStyles.test.tsx | 157 ++++++++++++++++++ packages/cli/src/ui/utils/borderStyles.ts | 5 +- 9 files changed, 597 insertions(+), 53 deletions(-) delete mode 100644 packages/cli/src/ui/utils/__snapshots__/InlineMarkdownRenderer.test.tsx.snap create mode 100644 packages/cli/src/ui/utils/__snapshots__/borderStyles-MainContent-tool-group-border-SVG-snapshots-should-render-SVG-snapshot-for-a-pending-search-dialog-google_web_search-.snap.svg create mode 100644 packages/cli/src/ui/utils/__snapshots__/borderStyles-MainContent-tool-group-border-SVG-snapshots-should-render-SVG-snapshot-for-a-shell-tool.snap.svg create mode 100644 packages/cli/src/ui/utils/__snapshots__/borderStyles-MainContent-tool-group-border-SVG-snapshots-should-render-SVG-snapshot-for-an-empty-slice-following-a-search-tool.snap.svg create mode 100644 packages/cli/src/ui/utils/__snapshots__/borderStyles.test.tsx.snap create mode 100644 packages/cli/src/ui/utils/borderStyles.test.tsx diff --git a/packages/cli/GEMINI.md b/packages/cli/GEMINI.md index 8ab50f6b57..5518696d60 100644 --- a/packages/cli/GEMINI.md +++ b/packages/cli/GEMINI.md @@ -15,4 +15,11 @@ - **Utilities**: Use `renderWithProviders` and `waitFor` from `packages/cli/src/test-utils/`. - **Snapshots**: Use `toMatchSnapshot()` to verify Ink output. +- **SVG Snapshots**: Use `await expect(renderResult).toMatchSvgSnapshot()` for + UI components whenever colors or detailed visual layout matter. SVG snapshots + capture styling accurately. Make sure to await the `waitUntilReady()` of the + render result before asserting. After updating SVG snapshots, always examine + the resulting `.svg` files (e.g. by reading their content or visually + inspecting them) to ensure the render and colors actually look as expected and + don't just contain an error message. - **Mocks**: Use mocks as sparingly as possible. diff --git a/packages/cli/src/test-utils/render.tsx b/packages/cli/src/test-utils/render.tsx index 1b64c07d7b..0420252149 100644 --- a/packages/cli/src/test-utils/render.tsx +++ b/packages/cli/src/test-utils/render.tsx @@ -547,6 +547,11 @@ const baseMockUiState = { }, hintMode: false, hintBuffer: '', + bannerData: { + defaultText: '', + warningText: '', + }, + bannerVisible: false, }; export const mockAppState: AppState = { diff --git a/packages/cli/src/ui/utils/__snapshots__/InlineMarkdownRenderer.test.tsx.snap b/packages/cli/src/ui/utils/__snapshots__/InlineMarkdownRenderer.test.tsx.snap deleted file mode 100644 index c8a5a7ff15..0000000000 --- a/packages/cli/src/ui/utils/__snapshots__/InlineMarkdownRenderer.test.tsx.snap +++ /dev/null @@ -1,52 +0,0 @@ -// Vitest Snapshot v1, https://vitest.dev/guide/snapshot.html - -exports[`InlineMarkdownRenderer > RenderInline > handles nested/complex markdown gracefully (best effort) 1`] = ` -"Bold *Italic -*" -`; - -exports[`InlineMarkdownRenderer > RenderInline > renders bold text correctly 1`] = ` -"Hello -World" -`; - -exports[`InlineMarkdownRenderer > RenderInline > renders inline code correctly 1`] = ` -"Hello -World" -`; - -exports[`InlineMarkdownRenderer > RenderInline > renders italic text correctly 1`] = ` -"Hello -World" -`; - -exports[`InlineMarkdownRenderer > RenderInline > renders links correctly 1`] = `"Google (https://google.com)"`; - -exports[`InlineMarkdownRenderer > RenderInline > renders mixed markdown correctly 1`] = ` -"Bold - and -Italic - and -Code - and -Link (https://example.com)" -`; - -exports[`InlineMarkdownRenderer > RenderInline > renders plain text correctly 1`] = `"Hello World"`; - -exports[`InlineMarkdownRenderer > RenderInline > renders raw URLs correctly 1`] = ` -"Visit -https://google.com" -`; - -exports[`InlineMarkdownRenderer > RenderInline > renders strikethrough text correctly 1`] = ` -"Hello -World" -`; - -exports[`InlineMarkdownRenderer > RenderInline > renders underline correctly 1`] = ` -"Hello -World" -`; - -exports[`InlineMarkdownRenderer > RenderInline > respects defaultColor prop 1`] = `"Hello"`; diff --git a/packages/cli/src/ui/utils/__snapshots__/borderStyles-MainContent-tool-group-border-SVG-snapshots-should-render-SVG-snapshot-for-a-pending-search-dialog-google_web_search-.snap.svg b/packages/cli/src/ui/utils/__snapshots__/borderStyles-MainContent-tool-group-border-SVG-snapshots-should-render-SVG-snapshot-for-a-pending-search-dialog-google_web_search-.snap.svg new file mode 100644 index 0000000000..b9290efcac --- /dev/null +++ b/packages/cli/src/ui/utils/__snapshots__/borderStyles-MainContent-tool-group-border-SVG-snapshots-should-render-SVG-snapshot-for-a-pending-search-dialog-google_web_search-.snap.svg @@ -0,0 +1,123 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ╭──────────────────────────────────────────────────────────────────────────────────────────────╮ + + ⊷ google_web_search + + + + + Searching... + + ╰──────────────────────────────────────────────────────────────────────────────────────────────╯ + + \ No newline at end of file diff --git a/packages/cli/src/ui/utils/__snapshots__/borderStyles-MainContent-tool-group-border-SVG-snapshots-should-render-SVG-snapshot-for-a-shell-tool.snap.svg b/packages/cli/src/ui/utils/__snapshots__/borderStyles-MainContent-tool-group-border-SVG-snapshots-should-render-SVG-snapshot-for-a-shell-tool.snap.svg new file mode 100644 index 0000000000..0ba0125a62 --- /dev/null +++ b/packages/cli/src/ui/utils/__snapshots__/borderStyles-MainContent-tool-group-border-SVG-snapshots-should-render-SVG-snapshot-for-a-shell-tool.snap.svg @@ -0,0 +1,123 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ╭──────────────────────────────────────────────────────────────────────────────────────────────╮ + + ⊷ run_shell_command + + + + + Running command... + + ╰──────────────────────────────────────────────────────────────────────────────────────────────╯ + + \ No newline at end of file diff --git a/packages/cli/src/ui/utils/__snapshots__/borderStyles-MainContent-tool-group-border-SVG-snapshots-should-render-SVG-snapshot-for-an-empty-slice-following-a-search-tool.snap.svg b/packages/cli/src/ui/utils/__snapshots__/borderStyles-MainContent-tool-group-border-SVG-snapshots-should-render-SVG-snapshot-for-an-empty-slice-following-a-search-tool.snap.svg new file mode 100644 index 0000000000..b9290efcac --- /dev/null +++ b/packages/cli/src/ui/utils/__snapshots__/borderStyles-MainContent-tool-group-border-SVG-snapshots-should-render-SVG-snapshot-for-an-empty-slice-following-a-search-tool.snap.svg @@ -0,0 +1,123 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ╭──────────────────────────────────────────────────────────────────────────────────────────────╮ + + ⊷ google_web_search + + + + + Searching... + + ╰──────────────────────────────────────────────────────────────────────────────────────────────╯ + + \ No newline at end of file diff --git a/packages/cli/src/ui/utils/__snapshots__/borderStyles.test.tsx.snap b/packages/cli/src/ui/utils/__snapshots__/borderStyles.test.tsx.snap new file mode 100644 index 0000000000..fbdc559480 --- /dev/null +++ b/packages/cli/src/ui/utils/__snapshots__/borderStyles.test.tsx.snap @@ -0,0 +1,55 @@ +// Vitest Snapshot v1, https://vitest.dev/guide/snapshot.html + +exports[`MainContent tool group border SVG snapshots > should render SVG snapshot for a pending search dialog (google_web_search) 1`] = ` +" + ███ █████████ +░░░███ ███░░░░░███ + ░░░███ ███ ░░░ + ░░░███░███ + ███░ ░███ █████ + ███░ ░░███ ░░███ + ███░ ░░█████████ +░░░ ░░░░░░░░░ + +╭──────────────────────────────────────────────────────────────────────────────────────────────╮ +│ ⊷ google_web_search │ +│ │ +│ Searching... │ +╰──────────────────────────────────────────────────────────────────────────────────────────────╯" +`; + +exports[`MainContent tool group border SVG snapshots > should render SVG snapshot for a shell tool 1`] = ` +" + ███ █████████ +░░░███ ███░░░░░███ + ░░░███ ███ ░░░ + ░░░███░███ + ███░ ░███ █████ + ███░ ░░███ ░░███ + ███░ ░░█████████ +░░░ ░░░░░░░░░ + +╭──────────────────────────────────────────────────────────────────────────────────────────────╮ +│ ⊷ run_shell_command │ +│ │ +│ Running command... │ +╰──────────────────────────────────────────────────────────────────────────────────────────────╯" +`; + +exports[`MainContent tool group border SVG snapshots > should render SVG snapshot for an empty slice following a search tool 1`] = ` +" + ███ █████████ +░░░███ ███░░░░░███ + ░░░███ ███ ░░░ + ░░░███░███ + ███░ ░███ █████ + ███░ ░░███ ░░███ + ███░ ░░█████████ +░░░ ░░░░░░░░░ + +╭──────────────────────────────────────────────────────────────────────────────────────────────╮ +│ ⊷ google_web_search │ +│ │ +│ Searching... │ +╰──────────────────────────────────────────────────────────────────────────────────────────────╯" +`; diff --git a/packages/cli/src/ui/utils/borderStyles.test.tsx b/packages/cli/src/ui/utils/borderStyles.test.tsx new file mode 100644 index 0000000000..91b2497f7f --- /dev/null +++ b/packages/cli/src/ui/utils/borderStyles.test.tsx @@ -0,0 +1,157 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { describe, expect, it } from 'vitest'; +import { getToolGroupBorderAppearance } from './borderStyles.js'; +import { CoreToolCallStatus } from '@google/gemini-cli-core'; +import { theme } from '../semantic-colors.js'; +import type { IndividualToolCallDisplay } from '../types.js'; +import { renderWithProviders } from '../../test-utils/render.js'; +import { MainContent } from '../components/MainContent.js'; + +describe('getToolGroupBorderAppearance', () => { + it('should use warning color for pending non-shell tools', () => { + const item = { + type: 'tool_group' as const, + tools: [ + { + name: 'google_web_search', + status: CoreToolCallStatus.Executing, + resultDisplay: '', + callId: 'call-1', + }, + ] as IndividualToolCallDisplay[], + }; + const appearance = getToolGroupBorderAppearance(item, undefined, false, []); + expect(appearance.borderColor).toBe(theme.status.warning); + expect(appearance.borderDimColor).toBe(true); + }); + + it('should use correct color for empty slice by looking at pending items', () => { + const pendingItem = { + type: 'tool_group' as const, + tools: [ + { + name: 'google_web_search', + status: CoreToolCallStatus.Executing, + resultDisplay: '', + callId: 'call-1', + }, + ] as IndividualToolCallDisplay[], + }; + const sliceItem = { + type: 'tool_group' as const, + tools: [] as IndividualToolCallDisplay[], + }; + const allPendingItems = [pendingItem, sliceItem]; + + const appearance = getToolGroupBorderAppearance( + sliceItem, + undefined, + false, + allPendingItems, + ); + + // It should match the pendingItem appearance + expect(appearance.borderColor).toBe(theme.status.warning); + expect(appearance.borderDimColor).toBe(true); + }); + + it('should use symbol color for shell tools', () => { + const item = { + type: 'tool_group' as const, + tools: [ + { + name: 'run_shell_command', + status: CoreToolCallStatus.Executing, + resultDisplay: '', + callId: 'call-1', + }, + ] as IndividualToolCallDisplay[], + }; + const appearance = getToolGroupBorderAppearance(item, undefined, false, []); + expect(appearance.borderColor).toBe(theme.ui.symbol); + expect(appearance.borderDimColor).toBe(true); + }); +}); + +describe('MainContent tool group border SVG snapshots', () => { + it('should render SVG snapshot for a pending search dialog (google_web_search)', async () => { + const renderResult = renderWithProviders(, { + uiState: { + history: [], + pendingHistoryItems: [ + { + type: 'tool_group', + tools: [ + { + name: 'google_web_search', + status: CoreToolCallStatus.Executing, + resultDisplay: 'Searching...', + callId: 'call-1', + } as unknown as IndividualToolCallDisplay, + ], + }, + ], + }, + }); + + await renderResult.waitUntilReady(); + await expect(renderResult).toMatchSvgSnapshot(); + }); + + it('should render SVG snapshot for an empty slice following a search tool', async () => { + const renderResult = renderWithProviders(, { + uiState: { + history: [], + pendingHistoryItems: [ + { + type: 'tool_group', + tools: [ + { + name: 'google_web_search', + status: CoreToolCallStatus.Executing, + resultDisplay: 'Searching...', + callId: 'call-1', + } as unknown as IndividualToolCallDisplay, + ], + }, + { + type: 'tool_group', + tools: [], + }, + ], + }, + }); + + await renderResult.waitUntilReady(); + await expect(renderResult).toMatchSvgSnapshot(); + }); + + it('should render SVG snapshot for a shell tool', async () => { + const renderResult = renderWithProviders(, { + uiState: { + history: [], + pendingHistoryItems: [ + { + type: 'tool_group', + tools: [ + { + name: 'run_shell_command', + status: CoreToolCallStatus.Executing, + resultDisplay: 'Running command...', + callId: 'call-1', + } as unknown as IndividualToolCallDisplay, + ], + }, + ], + }, + }); + + await renderResult.waitUntilReady(); + await expect(renderResult).toMatchSvgSnapshot(); + }); +}); diff --git a/packages/cli/src/ui/utils/borderStyles.ts b/packages/cli/src/ui/utils/borderStyles.ts index b3a0cb52bb..276d4a2502 100644 --- a/packages/cli/src/ui/utils/borderStyles.ts +++ b/packages/cli/src/ui/utils/borderStyles.ts @@ -47,7 +47,10 @@ export function getToolGroupBorderAppearance( : allPendingItems .filter( (i): i is HistoryItemToolGroup => - i !== null && i !== undefined && i.type === 'tool_group', + i !== null && + i !== undefined && + i.type === 'tool_group' && + i.tools.length > 0, ) .slice(-1) .flatMap((i) => i.tools); From 522e95439c1a42f50680935e269ca284a0784a53 Mon Sep 17 00:00:00 2001 From: Bryan Morgan Date: Fri, 27 Feb 2026 09:26:53 -0500 Subject: [PATCH 22/41] fix(core): apply retry logic to CodeAssistServer for all users (#20507) --- packages/core/src/code_assist/server.test.ts | 66 ++++++++++++-------- packages/core/src/code_assist/server.ts | 18 ++++++ 2 files changed, 58 insertions(+), 26 deletions(-) diff --git a/packages/core/src/code_assist/server.test.ts b/packages/core/src/code_assist/server.test.ts index d79526d1c3..aa5cbae7fe 100644 --- a/packages/core/src/code_assist/server.test.ts +++ b/packages/core/src/code_assist/server.test.ts @@ -73,19 +73,26 @@ describe('CodeAssistServer', () => { LlmRole.MAIN, ); - expect(mockRequest).toHaveBeenCalledWith( - expect.objectContaining({ - url: expect.stringContaining(':generateContent'), - method: 'POST', - headers: { - 'Content-Type': 'application/json', - 'x-custom-header': 'test-value', - }, - responseType: 'json', - body: expect.any(String), - signal: undefined, - }), - ); + expect(mockRequest).toHaveBeenCalledWith({ + url: expect.stringContaining(':generateContent'), + method: 'POST', + headers: { + 'Content-Type': 'application/json', + 'x-custom-header': 'test-value', + }, + responseType: 'json', + body: expect.any(String), + signal: undefined, + retryConfig: { + retry: 3, + noResponseRetries: 3, + statusCodesToRetry: [ + [429, 429], + [499, 499], + [500, 599], + ], + }, + }); const requestBody = JSON.parse(mockRequest.mock.calls[0][0].body); expect(requestBody.user_prompt_id).toBe('user-prompt-id'); @@ -393,19 +400,26 @@ describe('CodeAssistServer', () => { results.push(res); } - expect(mockRequest).toHaveBeenCalledWith( - expect.objectContaining({ - url: expect.stringContaining(':streamGenerateContent'), - method: 'POST', - params: { alt: 'sse' }, - responseType: 'stream', - body: expect.any(String), - headers: { - 'Content-Type': 'application/json', - }, - signal: undefined, - }), - ); + expect(mockRequest).toHaveBeenCalledWith({ + url: expect.stringContaining(':streamGenerateContent'), + method: 'POST', + params: { alt: 'sse' }, + responseType: 'stream', + body: expect.any(String), + headers: { + 'Content-Type': 'application/json', + }, + signal: undefined, + retryConfig: { + retry: 3, + noResponseRetries: 3, + statusCodesToRetry: [ + [429, 429], + [499, 499], + [500, 599], + ], + }, + }); expect(results).toHaveLength(2); expect(results[0].candidates?.[0].content?.parts?.[0].text).toBe('Hello'); diff --git a/packages/core/src/code_assist/server.ts b/packages/core/src/code_assist/server.ts index ff5fb76e07..dfcc693ca7 100644 --- a/packages/core/src/code_assist/server.ts +++ b/packages/core/src/code_assist/server.ts @@ -305,6 +305,15 @@ export class CodeAssistServer implements ContentGenerator { responseType: 'json', body: JSON.stringify(req), signal, + retryConfig: { + retry: 3, + noResponseRetries: 3, + statusCodesToRetry: [ + [429, 429], + [499, 499], + [500, 599], + ], + }, }); return res.data; } @@ -352,6 +361,15 @@ export class CodeAssistServer implements ContentGenerator { responseType: 'stream', body: JSON.stringify(req), signal, + retryConfig: { + retry: 3, + noResponseRetries: 3, + statusCodesToRetry: [ + [429, 429], + [499, 499], + [500, 599], + ], + }, }); return (async function* (): AsyncGenerator { From 58df1c62375dada9be70421590d19d452d08583c Mon Sep 17 00:00:00 2001 From: christine betts Date: Fri, 27 Feb 2026 09:49:10 -0500 Subject: [PATCH 23/41] Fix extension MCP server env var loading (#20374) --- packages/core/src/tools/mcp-client.test.ts | 74 ++++++++++++++++++++++ packages/core/src/tools/mcp-client.ts | 59 +++++++++++++++-- 2 files changed, 127 insertions(+), 6 deletions(-) diff --git a/packages/core/src/tools/mcp-client.test.ts b/packages/core/src/tools/mcp-client.test.ts index 68e1ba20f3..df4c869c34 100644 --- a/packages/core/src/tools/mcp-client.test.ts +++ b/packages/core/src/tools/mcp-client.test.ts @@ -1792,6 +1792,80 @@ describe('mcp-client', () => { expect(callArgs.env!['GEMINI_CLI_EXT_VAR']).toBeUndefined(); }); + it('should include extension settings with defined values in environment', async () => { + const mockedTransport = vi + .spyOn(SdkClientStdioLib, 'StdioClientTransport') + .mockReturnValue({} as SdkClientStdioLib.StdioClientTransport); + + await createTransport( + 'test-server', + { + command: 'test-command', + extension: { + name: 'test-ext', + resolvedSettings: [ + { + envVar: 'GEMINI_CLI_EXT_VAR', + value: 'defined-value', + sensitive: false, + name: 'ext-setting', + }, + ], + version: '', + isActive: false, + path: '', + contextFiles: [], + id: '', + }, + }, + false, + EMPTY_CONFIG, + ); + + const callArgs = mockedTransport.mock.calls[0][0]; + expect(callArgs.env).toBeDefined(); + expect(callArgs.env!['GEMINI_CLI_EXT_VAR']).toBe('defined-value'); + }); + + it('should resolve environment variables in mcpServerConfig.env using extension settings', async () => { + const mockedTransport = vi + .spyOn(SdkClientStdioLib, 'StdioClientTransport') + .mockReturnValue({} as SdkClientStdioLib.StdioClientTransport); + + await createTransport( + 'test-server', + { + command: 'test-command', + env: { + RESOLVED_VAR: '$GEMINI_CLI_EXT_VAR', + }, + extension: { + name: 'test-ext', + resolvedSettings: [ + { + envVar: 'GEMINI_CLI_EXT_VAR', + value: 'ext-value', + sensitive: false, + name: 'ext-setting', + }, + ], + version: '', + isActive: false, + path: '', + contextFiles: [], + id: '', + }, + }, + false, + EMPTY_CONFIG, + ); + + const callArgs = mockedTransport.mock.calls[0][0]; + expect(callArgs.env).toBeDefined(); + expect(callArgs.env!['GEMINI_CLI_EXT_VAR']).toBe('ext-value'); + expect(callArgs.env!['RESOLVED_VAR']).toBe('ext-value'); + }); + it('should expand environment variables in mcpServerConfig.env and not redact them', async () => { const mockedTransport = vi .spyOn(SdkClientStdioLib, 'StdioClientTransport') diff --git a/packages/core/src/tools/mcp-client.ts b/packages/core/src/tools/mcp-client.ts index f0a9a6be8c..c393273dbf 100644 --- a/packages/core/src/tools/mcp-client.ts +++ b/packages/core/src/tools/mcp-client.ts @@ -34,7 +34,11 @@ import { ProgressNotificationSchema, } from '@modelcontextprotocol/sdk/types.js'; import { parse } from 'shell-quote'; -import type { Config, MCPServerConfig } from '../config/config.js'; +import type { + Config, + MCPServerConfig, + GeminiCLIExtension, +} from '../config/config.js'; import { AuthProviderType } from '../config/config.js'; import { GoogleCredentialProvider } from '../mcp/google-auth-provider.js'; import { ServiceAccountImpersonationProvider } from '../mcp/sa-impersonation-provider.js'; @@ -778,15 +782,25 @@ async function handleAutomaticOAuth( * * @param mcpServerConfig The MCP server configuration * @param headers Additional headers + * @param sanitizationConfig Configuration for environment sanitization */ function createTransportRequestInit( mcpServerConfig: MCPServerConfig, headers: Record, + sanitizationConfig: EnvironmentSanitizationConfig, ): RequestInit { + const extensionEnv = getExtensionEnvironment(mcpServerConfig.extension); + const expansionEnv = { ...process.env, ...extensionEnv }; + + const sanitizedEnv = sanitizeEnvironment(expansionEnv, { + ...sanitizationConfig, + enableEnvironmentVariableRedaction: true, + }); + const expandedHeaders: Record = {}; if (mcpServerConfig.headers) { for (const [key, value] of Object.entries(mcpServerConfig.headers)) { - expandedHeaders[key] = expandEnvVars(value, process.env); + expandedHeaders[key] = expandEnvVars(value, sanitizedEnv); } } @@ -826,12 +840,14 @@ function createAuthProvider( * @param mcpServerName The name of the MCP server * @param mcpServerConfig The MCP server configuration * @param accessToken The OAuth access token + * @param sanitizationConfig Configuration for environment sanitization * @returns The transport with OAuth token, or null if creation fails */ async function createTransportWithOAuth( mcpServerName: string, mcpServerConfig: MCPServerConfig, accessToken: string, + sanitizationConfig: EnvironmentSanitizationConfig, ): Promise { try { const headers: Record = { @@ -840,7 +856,11 @@ async function createTransportWithOAuth( const transportOptions: | StreamableHTTPClientTransportOptions | SSEClientTransportOptions = { - requestInit: createTransportRequestInit(mcpServerConfig, headers), + requestInit: createTransportRequestInit( + mcpServerConfig, + headers, + sanitizationConfig, + ), }; return createUrlTransport(mcpServerName, mcpServerConfig, transportOptions); @@ -1435,6 +1455,7 @@ async function showAuthRequiredMessage(serverName: string): Promise { * @param config The MCP server configuration * @param accessToken The OAuth access token to use * @param httpReturned404 Whether the HTTP transport returned 404 (indicating SSE-only server) + * @param sanitizationConfig Configuration for environment sanitization */ async function retryWithOAuth( client: Client, @@ -1442,6 +1463,7 @@ async function retryWithOAuth( config: MCPServerConfig, accessToken: string, httpReturned404: boolean, + sanitizationConfig: EnvironmentSanitizationConfig, ): Promise { if (httpReturned404) { // HTTP returned 404, only try SSE @@ -1462,6 +1484,7 @@ async function retryWithOAuth( serverName, config, accessToken, + sanitizationConfig, ); if (!httpTransport) { throw new Error( @@ -1741,6 +1764,7 @@ export async function connectToMcpServer( mcpServerConfig, accessToken, httpReturned404, + sanitizationConfig, ); return mcpClient; } else { @@ -1813,6 +1837,7 @@ export async function connectToMcpServer( mcpServerName, mcpServerConfig, accessToken, + sanitizationConfig, ); if (!oauthTransport) { throw new Error( @@ -1960,7 +1985,11 @@ export async function createTransport( const transportOptions: | StreamableHTTPClientTransportOptions | SSEClientTransportOptions = { - requestInit: createTransportRequestInit(mcpServerConfig, headers), + requestInit: createTransportRequestInit( + mcpServerConfig, + headers, + sanitizationConfig, + ), authProvider, }; @@ -1968,8 +1997,11 @@ export async function createTransport( } if (mcpServerConfig.command) { + const extensionEnv = getExtensionEnvironment(mcpServerConfig.extension); + const expansionEnv = { ...process.env, ...extensionEnv }; + // 1. Sanitize the base process environment to prevent unintended leaks of system-wide secrets. - const sanitizedEnv = sanitizeEnvironment(process.env, { + const sanitizedEnv = sanitizeEnvironment(expansionEnv, { ...sanitizationConfig, enableEnvironmentVariableRedaction: true, }); @@ -1977,6 +2009,7 @@ export async function createTransport( const finalEnv: Record = { [GEMINI_CLI_IDENTIFICATION_ENV_VAR]: GEMINI_CLI_IDENTIFICATION_ENV_VAR_VALUE, + ...extensionEnv, }; for (const [key, value] of Object.entries(sanitizedEnv)) { if (value !== undefined) { @@ -1987,7 +2020,7 @@ export async function createTransport( // Expand and merge explicit environment variables from the MCP configuration. if (mcpServerConfig.env) { for (const [key, value] of Object.entries(mcpServerConfig.env)) { - finalEnv[key] = expandEnvVars(value, process.env); + finalEnv[key] = expandEnvVars(value, expansionEnv); } } @@ -2045,6 +2078,20 @@ interface NamedTool { name?: string; } +function getExtensionEnvironment( + extension?: GeminiCLIExtension, +): Record { + const env: Record = {}; + if (extension?.resolvedSettings) { + for (const setting of extension.resolvedSettings) { + if (setting.value !== undefined) { + env[setting.envVar] = setting.value; + } + } + } + return env; +} + /** Visible for testing */ export function isEnabled( funcDecl: NamedTool, From 66b8922d666eb98179ba1caef767b0bf132cd799 Mon Sep 17 00:00:00 2001 From: Jerop Kipruto Date: Fri, 27 Feb 2026 10:02:46 -0500 Subject: [PATCH 24/41] feat(ui): add 'ctrl+o' hint to truncated content message (#20529) --- .../components/ToolConfirmationQueue.test.tsx | 2 +- .../ExitPlanModeDialog.test.tsx.snap | 2 +- .../HistoryItemDisplay.test.tsx.snap | 4 +-- .../__snapshots__/MainContent.test.tsx.snap | 2 +- .../ToolConfirmationQueue.test.tsx.snap | 4 +-- .../ToolOverflowConsistencyChecks.test.tsx | 2 +- .../__snapshots__/DiffRenderer.test.tsx.snap | 8 ++--- .../ToolResultDisplay.test.tsx.snap | 2 +- .../ui/components/shared/MaxSizedBox.test.tsx | 32 ++++++++++++++----- .../src/ui/components/shared/MaxSizedBox.tsx | 16 +++++++--- .../__snapshots__/MaxSizedBox.test.tsx.snap | 16 +++++----- 11 files changed, 57 insertions(+), 33 deletions(-) diff --git a/packages/cli/src/ui/components/ToolConfirmationQueue.test.tsx b/packages/cli/src/ui/components/ToolConfirmationQueue.test.tsx index 75612add4c..cabce1af2f 100644 --- a/packages/cli/src/ui/components/ToolConfirmationQueue.test.tsx +++ b/packages/cli/src/ui/components/ToolConfirmationQueue.test.tsx @@ -227,7 +227,7 @@ describe('ToolConfirmationQueue', () => { // availableContentHeight = Math.max(9 - 6, 4) = 4 // MaxSizedBox in ToolConfirmationMessage will use 4 // It should show truncation message - await waitFor(() => expect(lastFrame()).toContain('first 49 lines hidden')); + await waitFor(() => expect(lastFrame()).toContain('49 hidden (Ctrl+O)')); expect(lastFrame()).toMatchSnapshot(); unmount(); }); diff --git a/packages/cli/src/ui/components/__snapshots__/ExitPlanModeDialog.test.tsx.snap b/packages/cli/src/ui/components/__snapshots__/ExitPlanModeDialog.test.tsx.snap index 0cd4553c77..db1b6d1ba5 100644 --- a/packages/cli/src/ui/components/__snapshots__/ExitPlanModeDialog.test.tsx.snap +++ b/packages/cli/src/ui/components/__snapshots__/ExitPlanModeDialog.test.tsx.snap @@ -74,7 +74,7 @@ Implementation Steps 6. Add LDAP provider support in src/auth/providers/LDAPProvider.ts 7. Create token refresh mechanism in src/auth/TokenManager.ts 8. Add multi-factor authentication in src/auth/MFAService.ts -... last 22 lines hidden ... +... last 22 lines hidden (Ctrl+O to show) ... ● 1. Yes, automatically accept edits Approves plan and allows tools to run automatically diff --git a/packages/cli/src/ui/components/__snapshots__/HistoryItemDisplay.test.tsx.snap b/packages/cli/src/ui/components/__snapshots__/HistoryItemDisplay.test.tsx.snap index 62255a1d68..b1784dc10d 100644 --- a/packages/cli/src/ui/components/__snapshots__/HistoryItemDisplay.test.tsx.snap +++ b/packages/cli/src/ui/components/__snapshots__/HistoryItemDisplay.test.tsx.snap @@ -112,7 +112,7 @@ exports[` > gemini items (alternateBuffer=false) > should exports[` > gemini items (alternateBuffer=false) > should render a truncated gemini item 1`] = ` "✦ Example code block: - ... first 42 lines hidden ... + ... 42 hidden (Ctrl+O) ... 43 Line 43 44 Line 44 45 Line 45 @@ -126,7 +126,7 @@ exports[` > gemini items (alternateBuffer=false) > should exports[` > gemini items (alternateBuffer=false) > should render a truncated gemini_content item 1`] = ` " Example code block: - ... first 42 lines hidden ... + ... 42 hidden (Ctrl+O) ... 43 Line 43 44 Line 44 45 Line 45 diff --git a/packages/cli/src/ui/components/__snapshots__/MainContent.test.tsx.snap b/packages/cli/src/ui/components/__snapshots__/MainContent.test.tsx.snap index c7a1d0f48b..0599e82f7c 100644 --- a/packages/cli/src/ui/components/__snapshots__/MainContent.test.tsx.snap +++ b/packages/cli/src/ui/components/__snapshots__/MainContent.test.tsx.snap @@ -49,7 +49,7 @@ exports[`MainContent > MainContent Tool Output Height Logic > 'Normal mode - Con ╭──────────────────────────────────────────────────────────────────────────────────────────────╮ │ ⊷ Shell Command Running a long command... │ │ │ -│ ... first 11 lines hidden ... │ +│ ... first 11 lines hidden (Ctrl+O to show) ... │ │ Line 12 │ │ Line 13 │ │ Line 14 │ diff --git a/packages/cli/src/ui/components/__snapshots__/ToolConfirmationQueue.test.tsx.snap b/packages/cli/src/ui/components/__snapshots__/ToolConfirmationQueue.test.tsx.snap index ad7e046465..a39d668825 100644 --- a/packages/cli/src/ui/components/__snapshots__/ToolConfirmationQueue.test.tsx.snap +++ b/packages/cli/src/ui/components/__snapshots__/ToolConfirmationQueue.test.tsx.snap @@ -6,7 +6,7 @@ exports[`ToolConfirmationQueue > calculates availableContentHeight based on avai │ │ │ ? replace edit file │ │ │ -│ ... first 49 lines hidden ... │ +│ ... 49 hidden (Ctrl+O) ... │ │ 50 line │ │ Apply this change? │ │ │ @@ -96,7 +96,7 @@ exports[`ToolConfirmationQueue > renders expansion hint when content is long and │ │ │ ? replace edit file │ │ │ -│ ... first 49 lines hidden ... │ +│ ... 49 hidden (Ctrl+O) ... │ │ 50 line │ │ Apply this change? │ │ │ diff --git a/packages/cli/src/ui/components/messages/ToolOverflowConsistencyChecks.test.tsx b/packages/cli/src/ui/components/messages/ToolOverflowConsistencyChecks.test.tsx index f7629945d9..a82132d0d8 100644 --- a/packages/cli/src/ui/components/messages/ToolOverflowConsistencyChecks.test.tsx +++ b/packages/cli/src/ui/components/messages/ToolOverflowConsistencyChecks.test.tsx @@ -106,7 +106,7 @@ describe('ToolOverflowConsistencyChecks: ToolGroupMessage and ToolResultDisplay ); // Verify truncation is occurring (standard mode uses MaxSizedBox) - await waitFor(() => expect(lastFrame()).toContain('hidden ...')); + await waitFor(() => expect(lastFrame()).toContain('hidden (Ctrl+O')); // In Standard mode, ToolGroupMessage calculates hasOverflow correctly now. // While Standard mode doesn't render the inline hint (ShowMoreLines returns null), diff --git a/packages/cli/src/ui/components/messages/__snapshots__/DiffRenderer.test.tsx.snap b/packages/cli/src/ui/components/messages/__snapshots__/DiffRenderer.test.tsx.snap index 8e14c3268e..fed8b32bd0 100644 --- a/packages/cli/src/ui/components/messages/__snapshots__/DiffRenderer.test.tsx.snap +++ b/packages/cli/src/ui/components/messages/__snapshots__/DiffRenderer.test.tsx.snap @@ -10,7 +10,7 @@ exports[` > with useAlterna `; exports[` > with useAlternateBuffer = false > should correctly render a diff with multiple hunks and a gap indicator > with terminalWidth 30 and height 6 1`] = ` -"... first 10 lines hidden ... +"... 10 hidden (Ctrl+O) ... 'test'; 21 + const anotherNew = 'test'; @@ -20,7 +20,7 @@ exports[` > with useAlterna `; exports[` > with useAlternateBuffer = false > should correctly render a diff with multiple hunks and a gap indicator > with terminalWidth 80 and height 6 1`] = ` -"... first 4 lines hidden ... +"... first 4 lines hidden (Ctrl+O to show) ... ════════════════════════════════════════════════════════════════════════════════ 20 console.log('second hunk'); 21 - const anotherOld = 'test'; @@ -103,7 +103,7 @@ exports[` > with useAlterna `; exports[` > with useAlternateBuffer = true > should correctly render a diff with multiple hunks and a gap indicator > with terminalWidth 30 and height 6 1`] = ` -"... first 10 lines hidden ... +"... 10 hidden (Ctrl+O) ... 'test'; 21 + const anotherNew = 'test'; @@ -113,7 +113,7 @@ exports[` > with useAlterna `; exports[` > with useAlternateBuffer = true > should correctly render a diff with multiple hunks and a gap indicator > with terminalWidth 80 and height 6 1`] = ` -"... first 4 lines hidden ... +"... first 4 lines hidden (Ctrl+O to show) ... ════════════════════════════════════════════════════════════════════════════════ 20 console.log('second hunk'); 21 - const anotherOld = 'test'; diff --git a/packages/cli/src/ui/components/messages/__snapshots__/ToolResultDisplay.test.tsx.snap b/packages/cli/src/ui/components/messages/__snapshots__/ToolResultDisplay.test.tsx.snap index d1e4b16d2f..5e5c7ea2b0 100644 --- a/packages/cli/src/ui/components/messages/__snapshots__/ToolResultDisplay.test.tsx.snap +++ b/packages/cli/src/ui/components/messages/__snapshots__/ToolResultDisplay.test.tsx.snap @@ -37,7 +37,7 @@ exports[`ToolResultDisplay > renders string result as plain text when renderOutp `; exports[`ToolResultDisplay > truncates very long string results 1`] = ` -"... first 248 lines hidden ... +"... 248 hidden (Ctrl+O) ... aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa diff --git a/packages/cli/src/ui/components/shared/MaxSizedBox.test.tsx b/packages/cli/src/ui/components/shared/MaxSizedBox.test.tsx index 0182047caa..c5122770c0 100644 --- a/packages/cli/src/ui/components/shared/MaxSizedBox.test.tsx +++ b/packages/cli/src/ui/components/shared/MaxSizedBox.test.tsx @@ -41,7 +41,9 @@ describe('', () => { , ); await waitUntilReady(); - expect(lastFrame()).toContain('... first 2 lines hidden ...'); + expect(lastFrame()).toContain( + '... first 2 lines hidden (Ctrl+O to show) ...', + ); expect(lastFrame()).toMatchSnapshot(); unmount(); }); @@ -59,7 +61,9 @@ describe('', () => { , ); await waitUntilReady(); - expect(lastFrame()).toContain('... last 2 lines hidden ...'); + expect(lastFrame()).toContain( + '... last 2 lines hidden (Ctrl+O to show) ...', + ); expect(lastFrame()).toMatchSnapshot(); unmount(); }); @@ -77,7 +81,9 @@ describe('', () => { , ); await waitUntilReady(); - expect(lastFrame()).toContain('... first 2 lines hidden ...'); + expect(lastFrame()).toContain( + '... first 2 lines hidden (Ctrl+O to show) ...', + ); expect(lastFrame()).toMatchSnapshot(); unmount(); }); @@ -93,7 +99,9 @@ describe('', () => { , ); await waitUntilReady(); - expect(lastFrame()).toContain('... first 1 line hidden ...'); + expect(lastFrame()).toContain( + '... first 1 line hidden (Ctrl+O to show) ...', + ); expect(lastFrame()).toMatchSnapshot(); unmount(); }); @@ -111,7 +119,9 @@ describe('', () => { , ); await waitUntilReady(); - expect(lastFrame()).toContain('... first 7 lines hidden ...'); + expect(lastFrame()).toContain( + '... first 7 lines hidden (Ctrl+O to show) ...', + ); expect(lastFrame()).toMatchSnapshot(); unmount(); }); @@ -197,7 +207,9 @@ describe('', () => { ); await waitUntilReady(); - expect(lastFrame()).toContain('... first 21 lines hidden ...'); + expect(lastFrame()).toContain( + '... first 21 lines hidden (Ctrl+O to show) ...', + ); expect(lastFrame()).toMatchSnapshot(); unmount(); }); @@ -218,7 +230,9 @@ describe('', () => { ); await waitUntilReady(); - expect(lastFrame()).toContain('... last 21 lines hidden ...'); + expect(lastFrame()).toContain( + '... last 21 lines hidden (Ctrl+O to show) ...', + ); expect(lastFrame()).toMatchSnapshot(); unmount(); }); @@ -247,7 +261,9 @@ describe('', () => { const lastLine = lines[lines.length - 1]; // The last line should only contain the hidden indicator, no leaked content - expect(lastLine).toMatch(/^\.\.\. last \d+ lines? hidden \.\.\.$/); + expect(lastLine).toMatch( + /^\.\.\. last \d+ lines? hidden \(Ctrl\+O to show\) \.\.\.$/, + ); expect(lastFrame()).toMatchSnapshot(); unmount(); }); diff --git a/packages/cli/src/ui/components/shared/MaxSizedBox.tsx b/packages/cli/src/ui/components/shared/MaxSizedBox.tsx index fef1e11bd5..0c2922ddfb 100644 --- a/packages/cli/src/ui/components/shared/MaxSizedBox.tsx +++ b/packages/cli/src/ui/components/shared/MaxSizedBox.tsx @@ -9,6 +9,9 @@ import { useCallback, useEffect, useId, useRef, useState } from 'react'; import { Box, Text, ResizeObserver, type DOMElement } from 'ink'; import { theme } from '../../semantic-colors.js'; import { useOverflowActions } from '../../contexts/OverflowContext.js'; +import { isNarrowWidth } from '../../utils/isNarrowWidth.js'; +import { Command } from '../../../config/keyBindings.js'; +import { formatCommand } from '../../utils/keybindingUtils.js'; /** * Minimum height for the MaxSizedBox component. @@ -84,6 +87,9 @@ export const MaxSizedBox: React.FC = ({ const totalHiddenLines = hiddenLinesCount + additionalHiddenLinesCount; + const isNarrow = maxWidth !== undefined && isNarrowWidth(maxWidth); + const showMoreKey = formatCommand(Command.SHOW_MORE_LINES); + useEffect(() => { if (totalHiddenLines > 0) { addOverflowingId?.(id); @@ -116,8 +122,9 @@ export const MaxSizedBox: React.FC = ({ > {totalHiddenLines > 0 && overflowDirection === 'top' && ( - ... first {totalHiddenLines} line{totalHiddenLines === 1 ? '' : 's'}{' '} - hidden ... + {isNarrow + ? `... ${totalHiddenLines} hidden (${showMoreKey}) ...` + : `... first ${totalHiddenLines} line${totalHiddenLines === 1 ? '' : 's'} hidden (${showMoreKey} to show) ...`} )} = ({ {totalHiddenLines > 0 && overflowDirection === 'bottom' && ( - ... last {totalHiddenLines} line{totalHiddenLines === 1 ? '' : 's'}{' '} - hidden ... + {isNarrow + ? `... ${totalHiddenLines} hidden (${showMoreKey}) ...` + : `... last ${totalHiddenLines} line${totalHiddenLines === 1 ? '' : 's'} hidden (${showMoreKey} to show) ...`} )} diff --git a/packages/cli/src/ui/components/shared/__snapshots__/MaxSizedBox.test.tsx.snap b/packages/cli/src/ui/components/shared/__snapshots__/MaxSizedBox.test.tsx.snap index c2b8a4a4e4..ef3170d8da 100644 --- a/packages/cli/src/ui/components/shared/__snapshots__/MaxSizedBox.test.tsx.snap +++ b/packages/cli/src/ui/components/shared/__snapshots__/MaxSizedBox.test.tsx.snap @@ -1,7 +1,7 @@ // Vitest Snapshot v1, https://vitest.dev/guide/snapshot.html exports[` > accounts for additionalHiddenLinesCount 1`] = ` -"... first 7 lines hidden ... +"... first 7 lines hidden (Ctrl+O to show) ... Line 3 " `; @@ -16,12 +16,12 @@ Line 6 Line 7 Line 8 Line 9 -... last 21 lines hidden ... +... last 21 lines hidden (Ctrl+O to show) ... " `; exports[` > clips a long single text child from the top 1`] = ` -"... first 21 lines hidden ... +"... first 21 lines hidden (Ctrl+O to show) ... Line 22 Line 23 Line 24 @@ -39,7 +39,7 @@ exports[` > does not leak content after hidden indicator with bot - Step 1: Do something important - Step 2: Do something important -... last 18 lines hidden ... +... last 18 lines hidden (Ctrl+O to show) ... " `; @@ -58,12 +58,12 @@ Line 3 direct child exports[` > hides lines at the end when content exceeds maxHeight and overflowDirection is bottom 1`] = ` "Line 1 -... last 2 lines hidden ... +... last 2 lines hidden (Ctrl+O to show) ... " `; exports[` > hides lines when content exceeds maxHeight 1`] = ` -"... first 2 lines hidden ... +"... first 2 lines hidden (Ctrl+O to show) ... Line 3 " `; @@ -74,13 +74,13 @@ exports[` > renders children without truncation when they fit 1`] `; exports[` > shows plural "lines" when more than one line is hidden 1`] = ` -"... first 2 lines hidden ... +"... first 2 lines hidden (Ctrl+O to show) ... Line 3 " `; exports[` > shows singular "line" when exactly one line is hidden 1`] = ` -"... first 1 line hidden ... +"... first 1 line hidden (Ctrl+O to show) ... Line 1 " `; From 4d9cc36146867c707a7df0a309ebfc4527c37890 Mon Sep 17 00:00:00 2001 From: Jacob Richman Date: Fri, 27 Feb 2026 07:07:14 -0800 Subject: [PATCH 25/41] Fix flicker showing message to press ctrl-O again to collapse. (#20414) Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> --- packages/cli/src/ui/AppContainer.tsx | 87 +++++--------------- packages/cli/src/ui/hooks/useTimedMessage.ts | 10 ++- 2 files changed, 25 insertions(+), 72 deletions(-) diff --git a/packages/cli/src/ui/AppContainer.tsx b/packages/cli/src/ui/AppContainer.tsx index b89d0b83c0..99c4b0c424 100644 --- a/packages/cli/src/ui/AppContainer.tsx +++ b/packages/cli/src/ui/AppContainer.tsx @@ -264,14 +264,16 @@ export const AppContainer = (props: AppContainerProps) => { () => isWorkspaceTrusted(settings.merged).isTrusted, ); - const [queueErrorMessage, setQueueErrorMessage] = useState( - null, + const [queueErrorMessage, setQueueErrorMessage] = useTimedMessage( + QUEUE_ERROR_DISPLAY_DURATION_MS, ); const [newAgents, setNewAgents] = useState(null); const [constrainHeight, setConstrainHeight] = useState(true); - const [showIsExpandableHint, setShowIsExpandableHint] = useState(false); - const expandHintTimerRef = useRef(null); + const [expandHintTrigger, triggerExpandHint] = useTimedMessage( + EXPAND_HINT_DURATION_MS, + ); + const showIsExpandableHint = Boolean(expandHintTrigger); const overflowState = useOverflowState(); const overflowingIdsSize = overflowState?.overflowingIds.size ?? 0; const hasOverflowState = overflowingIdsSize > 0 || !constrainHeight; @@ -284,39 +286,15 @@ export const AppContainer = (props: AppContainerProps) => { * boolean dependency (hasOverflowState) to ensure the timer only resets on * genuine state transitions, preventing it from infinitely resetting during * active text streaming. + * + * In alternate buffer mode, we don't trigger the hint automatically on overflow + * to avoid noise, but the user can still trigger it manually with Ctrl+O. */ useEffect(() => { - if (isAlternateBuffer) { - setShowIsExpandableHint(false); - if (expandHintTimerRef.current) { - clearTimeout(expandHintTimerRef.current); - } - return; + if (hasOverflowState && !isAlternateBuffer) { + triggerExpandHint(true); } - - if (hasOverflowState) { - setShowIsExpandableHint(true); - if (expandHintTimerRef.current) { - clearTimeout(expandHintTimerRef.current); - } - expandHintTimerRef.current = setTimeout(() => { - setShowIsExpandableHint(false); - }, EXPAND_HINT_DURATION_MS); - } - }, [hasOverflowState, isAlternateBuffer, constrainHeight]); - - /** - * Safe cleanup to ensure the expansion hint timer is cancelled when the - * component unmounts, preventing memory leaks. - */ - useEffect( - () => () => { - if (expandHintTimerRef.current) { - clearTimeout(expandHintTimerRef.current); - } - }, - [], - ); + }, [hasOverflowState, isAlternateBuffer, triggerExpandHint]); const [defaultBannerText, setDefaultBannerText] = useState(''); const [warningBannerText, setWarningBannerText] = useState(''); @@ -1252,10 +1230,7 @@ Logging in with Google... Restarting Gemini CLI to continue. async (submittedValue: string) => { reset(); // Explicitly hide the expansion hint and clear its x-second timer when a new turn begins. - setShowIsExpandableHint(false); - if (expandHintTimerRef.current) { - clearTimeout(expandHintTimerRef.current); - } + triggerExpandHint(null); if (!constrainHeight) { setConstrainHeight(true); if (!isAlternateBuffer) { @@ -1327,16 +1302,14 @@ Logging in with Google... Restarting Gemini CLI to continue. refreshStatic, reset, handleHintSubmit, + triggerExpandHint, ], ); const handleClearScreen = useCallback(() => { reset(); // Explicitly hide the expansion hint and clear its x-second timer when clearing the screen. - setShowIsExpandableHint(false); - if (expandHintTimerRef.current) { - clearTimeout(expandHintTimerRef.current); - } + triggerExpandHint(null); historyManager.clearItems(); clearConsoleMessagesState(); refreshStatic(); @@ -1345,7 +1318,7 @@ Logging in with Google... Restarting Gemini CLI to continue. clearConsoleMessagesState, refreshStatic, reset, - setShowIsExpandableHint, + triggerExpandHint, ]); const { handleInput: vimHandleInput } = useVim(buffer, handleFinalSubmit); @@ -1632,17 +1605,6 @@ Logging in with Google... Restarting Gemini CLI to continue. } }, [ideNeedsRestart]); - useEffect(() => { - if (queueErrorMessage) { - const timer = setTimeout(() => { - setQueueErrorMessage(null); - }, QUEUE_ERROR_DISPLAY_DURATION_MS); - - return () => clearTimeout(timer); - } - return undefined; - }, [queueErrorMessage, setQueueErrorMessage]); - useEffect(() => { if (isInitialMount.current) { isInitialMount.current = false; @@ -1748,13 +1710,7 @@ Logging in with Google... Restarting Gemini CLI to continue. setConstrainHeight(true); if (keyMatchers[Command.SHOW_MORE_LINES](key)) { // If the user manually collapses the view, show the hint and reset the x-second timer. - setShowIsExpandableHint(true); - if (expandHintTimerRef.current) { - clearTimeout(expandHintTimerRef.current); - } - expandHintTimerRef.current = setTimeout(() => { - setShowIsExpandableHint(false); - }, EXPAND_HINT_DURATION_MS); + triggerExpandHint(true); } if (!isAlternateBuffer) { refreshStatic(); @@ -1803,13 +1759,7 @@ Logging in with Google... Restarting Gemini CLI to continue. ) { setConstrainHeight(false); // If the user manually expands the view, show the hint and reset the x-second timer. - setShowIsExpandableHint(true); - if (expandHintTimerRef.current) { - clearTimeout(expandHintTimerRef.current); - } - expandHintTimerRef.current = setTimeout(() => { - setShowIsExpandableHint(false); - }, EXPAND_HINT_DURATION_MS); + triggerExpandHint(true); if (!isAlternateBuffer) { refreshStatic(); } @@ -1914,6 +1864,7 @@ Logging in with Google... Restarting Gemini CLI to continue. showTransientMessage, settings.merged.general.devtools, showErrorDetails, + triggerExpandHint, ], ); diff --git a/packages/cli/src/ui/hooks/useTimedMessage.ts b/packages/cli/src/ui/hooks/useTimedMessage.ts index 3fe5f0b9c4..547968cb90 100644 --- a/packages/cli/src/ui/hooks/useTimedMessage.ts +++ b/packages/cli/src/ui/hooks/useTimedMessage.ts @@ -15,14 +15,16 @@ export function useTimedMessage(durationMs: number) { const timeoutRef = useRef(null); const showMessage = useCallback( - (msg: T) => { + (msg: T | null) => { setMessage(msg); if (timeoutRef.current) { clearTimeout(timeoutRef.current); } - timeoutRef.current = setTimeout(() => { - setMessage(null); - }, durationMs); + if (msg !== null) { + timeoutRef.current = setTimeout(() => { + setMessage(null); + }, durationMs); + } }, [durationMs], ); From ac4d0c20d868db042bc356d509522f3375a800c0 Mon Sep 17 00:00:00 2001 From: Jacob Richman Date: Fri, 27 Feb 2026 07:34:49 -0800 Subject: [PATCH 26/41] fix(cli): hide shortcuts hint while model is thinking or the user has typed a prompt + add debounce to avoid flicker (#19389) --- .../cli/src/ui/components/Composer.test.tsx | 65 +++++++++++++++++-- packages/cli/src/ui/components/Composer.tsx | 23 ++++++- .../__snapshots__/Composer.test.tsx.snap | 2 +- 3 files changed, 83 insertions(+), 7 deletions(-) diff --git a/packages/cli/src/ui/components/Composer.test.tsx b/packages/cli/src/ui/components/Composer.test.tsx index 12deda3e76..330e615cfa 100644 --- a/packages/cli/src/ui/components/Composer.test.tsx +++ b/packages/cli/src/ui/components/Composer.test.tsx @@ -6,8 +6,8 @@ import { beforeEach, afterEach, describe, it, expect, vi } from 'vitest'; import { render } from '../../test-utils/render.js'; +import { act, useEffect } from 'react'; import { Box, Text } from 'ink'; -import { useEffect } from 'react'; import { Composer } from './Composer.js'; import { UIStateContext, type UIState } from '../contexts/UIStateContext.js'; import { @@ -34,6 +34,7 @@ import { StreamingState } from '../types.js'; import { TransientMessageType } from '../../utils/events.js'; import type { LoadedSettings } from '../../config/settings.js'; import type { SessionMetrics } from '../contexts/SessionContext.js'; +import type { TextBuffer } from './shared/text-buffer.js'; const composerTestControls = vi.hoisted(() => ({ suggestionsVisible: false, @@ -263,16 +264,26 @@ const renderComposer = async ( , ); await result.waitUntilReady(); + + // Wait for shortcuts hint debounce if using fake timers + if (vi.isFakeTimers()) { + await act(async () => { + await vi.advanceTimersByTimeAsync(250); + }); + } + return result; }; describe('Composer', () => { beforeEach(() => { + vi.useFakeTimers(); composerTestControls.suggestionsVisible = false; composerTestControls.isAlternateBuffer = false; }); afterEach(() => { + vi.useRealTimers(); vi.restoreAllMocks(); }); @@ -809,6 +820,28 @@ describe('Composer', () => { }); describe('Shortcuts Hint', () => { + it('restores shortcuts hint after 200ms debounce when buffer is empty', async () => { + const { lastFrame } = await renderComposer( + createMockUIState({ + buffer: { text: '' } as unknown as TextBuffer, + cleanUiDetailsVisible: false, + }), + ); + + expect(lastFrame({ allowEmpty: true })).toContain('ShortcutsHint'); + }); + + it('does not show shortcuts hint immediately when buffer has text', async () => { + const uiState = createMockUIState({ + buffer: { text: 'hello' } as unknown as TextBuffer, + cleanUiDetailsVisible: false, + }); + + const { lastFrame } = await renderComposer(uiState); + + expect(lastFrame()).not.toContain('ShortcutsHint'); + }); + it('hides shortcuts hint when showShortcutsHint setting is false', async () => { const uiState = createMockUIState(); const settings = createMockSettings({ @@ -857,6 +890,27 @@ describe('Composer', () => { expect(lastFrame()).toContain('ShortcutsHint'); }); + it('hides shortcuts hint while loading when full UI details are visible', async () => { + const uiState = createMockUIState({ + cleanUiDetailsVisible: true, + streamingState: StreamingState.Responding, + }); + + const { lastFrame } = await renderComposer(uiState); + + expect(lastFrame()).not.toContain('ShortcutsHint'); + }); + + it('hides shortcuts hint when text is typed in buffer', async () => { + const uiState = createMockUIState({ + buffer: { text: 'hello' } as unknown as TextBuffer, + }); + + const { lastFrame } = await renderComposer(uiState); + + expect(lastFrame()).not.toContain('ShortcutsHint'); + }); + it('hides shortcuts hint while loading in minimal mode', async () => { const uiState = createMockUIState({ cleanUiDetailsVisible: false, @@ -930,9 +984,10 @@ describe('Composer', () => { streamingState: StreamingState.Idle, }); - const { lastFrame } = await renderComposer(uiState); + const { lastFrame, unmount } = await renderComposer(uiState); expect(lastFrame()).toContain('ShortcutsHelp'); + unmount(); }); it('hides shortcuts help while streaming', async () => { @@ -941,9 +996,10 @@ describe('Composer', () => { streamingState: StreamingState.Responding, }); - const { lastFrame } = await renderComposer(uiState); + const { lastFrame, unmount } = await renderComposer(uiState); expect(lastFrame()).not.toContain('ShortcutsHelp'); + unmount(); }); it('hides shortcuts help when action is required', async () => { @@ -956,9 +1012,10 @@ describe('Composer', () => { ), }); - const { lastFrame } = await renderComposer(uiState); + const { lastFrame, unmount } = await renderComposer(uiState); expect(lastFrame()).not.toContain('ShortcutsHelp'); + unmount(); }); }); diff --git a/packages/cli/src/ui/components/Composer.tsx b/packages/cli/src/ui/components/Composer.tsx index 2adc370ed5..98a465fa39 100644 --- a/packages/cli/src/ui/components/Composer.tsx +++ b/packages/cli/src/ui/components/Composer.tsx @@ -151,11 +151,30 @@ export const Composer = ({ isFocused = true }: { isFocused?: boolean }) => { : undefined, ); const hideShortcutsHintForSuggestions = hideUiDetailsForSuggestions; + const isModelIdle = uiState.streamingState === StreamingState.Idle; + const isBufferEmpty = uiState.buffer.text.length === 0; + const canShowShortcutsHint = + isModelIdle && isBufferEmpty && !hasPendingActionRequired; + const [showShortcutsHintDebounced, setShowShortcutsHintDebounced] = + useState(canShowShortcutsHint); + + useEffect(() => { + if (!canShowShortcutsHint) { + setShowShortcutsHintDebounced(false); + return; + } + + const timeout = setTimeout(() => { + setShowShortcutsHintDebounced(true); + }, 200); + + return () => clearTimeout(timeout); + }, [canShowShortcutsHint]); + const showShortcutsHint = settings.merged.ui.showShortcutsHint && !hideShortcutsHintForSuggestions && - !hideMinimalModeHintWhileBusy && - !hasPendingActionRequired; + showShortcutsHintDebounced; const showMinimalModeBleedThrough = !hideUiDetailsForSuggestions && Boolean(minimalModeBleedThrough); const showMinimalInlineLoading = !showUiDetails && showLoadingIndicator; diff --git a/packages/cli/src/ui/components/__snapshots__/Composer.test.tsx.snap b/packages/cli/src/ui/components/__snapshots__/Composer.test.tsx.snap index 2ba370a000..452663d719 100644 --- a/packages/cli/src/ui/components/__snapshots__/Composer.test.tsx.snap +++ b/packages/cli/src/ui/components/__snapshots__/Composer.test.tsx.snap @@ -35,7 +35,7 @@ Footer `; exports[`Composer > Snapshots > matches snapshot while streaming 1`] = ` -" LoadingIndicator: Thinking ShortcutsHint +" LoadingIndicator: Thinking ──────────────────────────────────────────────────────────────────────────────────────────────────── ApprovalModeIndicator InputPrompt: Type your message or @path/to/file From 25ade7bcb76292fb8d4b78eda17a29332d82a844 Mon Sep 17 00:00:00 2001 From: Adib234 <30782825+Adib234@users.noreply.github.com> Date: Fri, 27 Feb 2026 10:42:37 -0500 Subject: [PATCH 27/41] feat(plan): update planning workflow to encourage multi-select with descriptions of options (#20491) --- packages/core/src/core/__snapshots__/prompts.test.ts.snap | 6 +++--- packages/core/src/prompts/snippets.ts | 2 +- .../__snapshots__/coreToolsModelSnapshots.test.ts.snap | 2 +- .../src/tools/definitions/model-family-sets/gemini-3.ts | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/packages/core/src/core/__snapshots__/prompts.test.ts.snap b/packages/core/src/core/__snapshots__/prompts.test.ts.snap index 48afa13515..30603b5443 100644 --- a/packages/core/src/core/__snapshots__/prompts.test.ts.snap +++ b/packages/core/src/core/__snapshots__/prompts.test.ts.snap @@ -109,7 +109,7 @@ The following tools are available in Plan Mode: ## Rules 1. **Read-Only:** You cannot modify source code. You may ONLY use read-only tools to explore, and you can only write to \`/tmp/plans/\`. If the user asks you to modify source code directly, you MUST explain that you are in Plan Mode and must first create a plan and get approval. 2. **Write Constraint:** \`write_file\` and \`replace\` may ONLY be used to write .md plan files to \`/tmp/plans/\`. They cannot modify source code. -3. **Efficiency:** Autonomously combine discovery and drafting phases to minimize conversational turns. If the request is ambiguous, use \`ask_user\` to clarify. +3. **Efficiency:** Autonomously combine discovery and drafting phases to minimize conversational turns. If the request is ambiguous, use \`ask_user\` to clarify. Use multi-select to offer flexibility and include detailed descriptions for each option to help the user understand the implications of their choice. 4. **Inquiries and Directives:** Distinguish between Inquiries and Directives to minimize unnecessary planning. - **Inquiries:** If the request is an **Inquiry** (e.g., "How does X work?"), answer directly. DO NOT create a plan. - **Directives:** If the request is a **Directive** (e.g., "Fix bug Y"), follow the workflow below. @@ -277,7 +277,7 @@ The following tools are available in Plan Mode: ## Rules 1. **Read-Only:** You cannot modify source code. You may ONLY use read-only tools to explore, and you can only write to \`/tmp/plans/\`. If the user asks you to modify source code directly, you MUST explain that you are in Plan Mode and must first create a plan and get approval. 2. **Write Constraint:** \`write_file\` and \`replace\` may ONLY be used to write .md plan files to \`/tmp/plans/\`. They cannot modify source code. -3. **Efficiency:** Autonomously combine discovery and drafting phases to minimize conversational turns. If the request is ambiguous, use \`ask_user\` to clarify. +3. **Efficiency:** Autonomously combine discovery and drafting phases to minimize conversational turns. If the request is ambiguous, use \`ask_user\` to clarify. Use multi-select to offer flexibility and include detailed descriptions for each option to help the user understand the implications of their choice. 4. **Inquiries and Directives:** Distinguish between Inquiries and Directives to minimize unnecessary planning. - **Inquiries:** If the request is an **Inquiry** (e.g., "How does X work?"), answer directly. DO NOT create a plan. - **Directives:** If the request is a **Directive** (e.g., "Fix bug Y"), follow the workflow below. @@ -564,7 +564,7 @@ The following tools are available in Plan Mode: ## Rules 1. **Read-Only:** You cannot modify source code. You may ONLY use read-only tools to explore, and you can only write to \`/tmp/project-temp/plans/\`. If the user asks you to modify source code directly, you MUST explain that you are in Plan Mode and must first create a plan and get approval. 2. **Write Constraint:** \`write_file\` and \`replace\` may ONLY be used to write .md plan files to \`/tmp/project-temp/plans/\`. They cannot modify source code. -3. **Efficiency:** Autonomously combine discovery and drafting phases to minimize conversational turns. If the request is ambiguous, use \`ask_user\` to clarify. +3. **Efficiency:** Autonomously combine discovery and drafting phases to minimize conversational turns. If the request is ambiguous, use \`ask_user\` to clarify. Use multi-select to offer flexibility and include detailed descriptions for each option to help the user understand the implications of their choice. 4. **Inquiries and Directives:** Distinguish between Inquiries and Directives to minimize unnecessary planning. - **Inquiries:** If the request is an **Inquiry** (e.g., "How does X work?"), answer directly. DO NOT create a plan. - **Directives:** If the request is a **Directive** (e.g., "Fix bug Y"), follow the workflow below. diff --git a/packages/core/src/prompts/snippets.ts b/packages/core/src/prompts/snippets.ts index 7f6c5c633e..982a366c3b 100644 --- a/packages/core/src/prompts/snippets.ts +++ b/packages/core/src/prompts/snippets.ts @@ -472,7 +472,7 @@ ${options.planModeToolsList} ## Rules 1. **Read-Only:** You cannot modify source code. You may ONLY use read-only tools to explore, and you can only write to \`${options.plansDir}/\`. If the user asks you to modify source code directly, you MUST explain that you are in Plan Mode and must first create a plan and get approval. 2. **Write Constraint:** ${formatToolName(WRITE_FILE_TOOL_NAME)} and ${formatToolName(EDIT_TOOL_NAME)} may ONLY be used to write .md plan files to \`${options.plansDir}/\`. They cannot modify source code. -3. **Efficiency:** Autonomously combine discovery and drafting phases to minimize conversational turns. If the request is ambiguous, use ${formatToolName(ASK_USER_TOOL_NAME)} to clarify. +3. **Efficiency:** Autonomously combine discovery and drafting phases to minimize conversational turns. If the request is ambiguous, use ${formatToolName(ASK_USER_TOOL_NAME)} to clarify. Use multi-select to offer flexibility and include detailed descriptions for each option to help the user understand the implications of their choice. 4. **Inquiries and Directives:** Distinguish between Inquiries and Directives to minimize unnecessary planning. - **Inquiries:** If the request is an **Inquiry** (e.g., "How does X work?"), answer directly. DO NOT create a plan. - **Directives:** If the request is a **Directive** (e.g., "Fix bug Y"), follow the workflow below. diff --git a/packages/core/src/tools/definitions/__snapshots__/coreToolsModelSnapshots.test.ts.snap b/packages/core/src/tools/definitions/__snapshots__/coreToolsModelSnapshots.test.ts.snap index 2d5cfe8d52..70cf828d86 100644 --- a/packages/core/src/tools/definitions/__snapshots__/coreToolsModelSnapshots.test.ts.snap +++ b/packages/core/src/tools/definitions/__snapshots__/coreToolsModelSnapshots.test.ts.snap @@ -859,7 +859,7 @@ exports[`coreTools snapshots for specific models > Model: gemini-3-pro-preview > exports[`coreTools snapshots for specific models > Model: gemini-3-pro-preview > snapshot for tool: ask_user 1`] = ` { - "description": "Ask the user one or more questions to gather preferences, clarify requirements, or make decisions.", + "description": "Ask the user one or more questions to gather preferences, clarify requirements, or make decisions. When using this tool, prefer providing multiple-choice options with detailed descriptions and enable multi-select where appropriate to provide maximum flexibility.", "name": "ask_user", "parametersJsonSchema": { "properties": { diff --git a/packages/core/src/tools/definitions/model-family-sets/gemini-3.ts b/packages/core/src/tools/definitions/model-family-sets/gemini-3.ts index 1d50eae7e8..7c4fddc9f6 100644 --- a/packages/core/src/tools/definitions/model-family-sets/gemini-3.ts +++ b/packages/core/src/tools/definitions/model-family-sets/gemini-3.ts @@ -558,7 +558,7 @@ The agent did not use the todo list because this task could be completed by a ti ask_user: { name: ASK_USER_TOOL_NAME, description: - 'Ask the user one or more questions to gather preferences, clarify requirements, or make decisions.', + 'Ask the user one or more questions to gather preferences, clarify requirements, or make decisions. When using this tool, prefer providing multiple-choice options with detailed descriptions and enable multi-select where appropriate to provide maximum flexibility.', parametersJsonSchema: { type: 'object', required: ['questions'], From d7320f5425c42279fe9345b8d193315a6c7b1500 Mon Sep 17 00:00:00 2001 From: Pyush Sinha Date: Fri, 27 Feb 2026 07:55:02 -0800 Subject: [PATCH 28/41] refactor(core,cli): useAlternateBuffer read from config (#20346) Co-authored-by: Jacob Richman --- packages/cli/src/config/config.ts | 1 + packages/cli/src/gemini.test.tsx | 1 + packages/cli/src/gemini.tsx | 6 +- packages/cli/src/test-utils/mockConfig.ts | 1 + packages/cli/src/test-utils/render.tsx | 19 ++++- packages/cli/src/ui/AppContainer.test.tsx | 8 ++ packages/cli/src/ui/AppContainer.tsx | 5 +- .../ui/components/ExitPlanModeDialog.test.tsx | 2 + .../components/ToolConfirmationQueue.test.tsx | 1 + .../src/ui/hooks/useAlternateBuffer.test.ts | 80 +++++++++++++++++++ .../cli/src/ui/hooks/useAlternateBuffer.ts | 13 +-- packages/cli/src/ui/utils/ui-sizing.test.ts | 30 ++----- packages/cli/src/ui/utils/ui-sizing.ts | 6 +- packages/core/src/config/config.test.ts | 25 ++++++ packages/core/src/config/config.ts | 7 ++ 15 files changed, 164 insertions(+), 41 deletions(-) create mode 100644 packages/cli/src/ui/hooks/useAlternateBuffer.test.ts diff --git a/packages/cli/src/config/config.ts b/packages/cli/src/config/config.ts index f2870a5f57..bbc8b1681e 100755 --- a/packages/cli/src/config/config.ts +++ b/packages/cli/src/config/config.ts @@ -843,6 +843,7 @@ export async function loadCliConfig( interactive, trustedFolder, useBackgroundColor: settings.ui?.useBackgroundColor, + useAlternateBuffer: settings.ui?.useAlternateBuffer, useRipgrep: settings.tools?.useRipgrep, enableInteractiveShell: settings.tools?.shell?.enableInteractiveShell, shellToolInactivityTimeout: settings.tools?.shell?.inactivityTimeout, diff --git a/packages/cli/src/gemini.test.tsx b/packages/cli/src/gemini.test.tsx index dae249a8ac..c2a1d079c5 100644 --- a/packages/cli/src/gemini.test.tsx +++ b/packages/cli/src/gemini.test.tsx @@ -1182,6 +1182,7 @@ describe('startInteractiveUI', () => { getProjectRoot: () => '/root', getScreenReader: () => false, getDebugMode: () => false, + getUseAlternateBuffer: () => true, }); const mockSettings = { merged: { diff --git a/packages/cli/src/gemini.tsx b/packages/cli/src/gemini.tsx index 8cd7048a7e..2e238765e8 100644 --- a/packages/cli/src/gemini.tsx +++ b/packages/cli/src/gemini.tsx @@ -102,8 +102,8 @@ import { loadSandboxConfig } from './config/sandboxConfig.js'; import { deleteSession, listSessions } from './utils/sessions.js'; import { createPolicyUpdater } from './config/policy.js'; import { ScrollProvider } from './ui/contexts/ScrollProvider.js'; -import { isAlternateBufferEnabled } from './ui/hooks/useAlternateBuffer.js'; import { TerminalProvider } from './ui/contexts/TerminalContext.js'; +import { isAlternateBufferEnabled } from './ui/hooks/useAlternateBuffer.js'; import { OverflowProvider } from './ui/contexts/OverflowContext.js'; import { setupTerminalAndTheme } from './utils/terminalTheme.js'; @@ -196,7 +196,7 @@ export async function startInteractiveUI( // and the Ink alternate buffer mode requires line wrapping harmful to // screen readers. const useAlternateBuffer = shouldEnterAlternateScreen( - isAlternateBufferEnabled(settings), + isAlternateBufferEnabled(config), config.getScreenReader(), ); const mouseEventsEnabled = useAlternateBuffer; @@ -678,7 +678,7 @@ export async function main() { let input = config.getQuestion(); const useAlternateBuffer = shouldEnterAlternateScreen( - isAlternateBufferEnabled(settings), + isAlternateBufferEnabled(config), config.getScreenReader(), ); const rawStartupWarnings = await getStartupWarnings(); diff --git a/packages/cli/src/test-utils/mockConfig.ts b/packages/cli/src/test-utils/mockConfig.ts index af36444c39..bae89d36c9 100644 --- a/packages/cli/src/test-utils/mockConfig.ts +++ b/packages/cli/src/test-utils/mockConfig.ts @@ -156,6 +156,7 @@ export const createMockConfig = (overrides: Partial = {}): Config => getExperiments: vi.fn().mockReturnValue(undefined), getHasAccessToPreviewModel: vi.fn().mockReturnValue(false), validatePathAccess: vi.fn().mockReturnValue(null), + getUseAlternateBuffer: vi.fn().mockReturnValue(false), ...overrides, }) as unknown as Config; diff --git a/packages/cli/src/test-utils/render.tsx b/packages/cli/src/test-utils/render.tsx index 0420252149..2cfb89d0f2 100644 --- a/packages/cli/src/test-utils/render.tsx +++ b/packages/cli/src/test-utils/render.tsx @@ -703,6 +703,21 @@ export const renderWithProviders = ( }); } + // Wrap config in a Proxy so useAlternateBuffer hook (which reads from Config) gets the correct value, + // without replacing the entire config object and its other values. + let finalConfig = config; + if (useAlternateBuffer !== undefined) { + finalConfig = new Proxy(config, { + get(target, prop, receiver) { + if (prop === 'getUseAlternateBuffer') { + return () => useAlternateBuffer; + } + // eslint-disable-next-line @typescript-eslint/no-unsafe-return + return Reflect.get(target, prop, receiver); + }, + }); + } + const mainAreaWidth = terminalWidth; const finalUiState = { @@ -731,7 +746,7 @@ export const renderWithProviders = ( const renderResult = render( - + @@ -743,7 +758,7 @@ export const renderWithProviders = ( { isAlternateMode = false, childHandler?: Mock, ) => { + vi.spyOn(mockConfig, 'getUseAlternateBuffer').mockReturnValue( + isAlternateMode, + ); + // Update settings for this test run const defaultMergedSettings = mergeSettings({}, {}, {}, {}, true); const testSettings = { @@ -3364,6 +3368,8 @@ describe('AppContainer State Management', () => { ); vi.mocked(checkPermissions).mockResolvedValue([]); + vi.spyOn(mockConfig, 'getUseAlternateBuffer').mockReturnValue(true); + let unmount: () => void; await act(async () => { unmount = renderAppContainer({ @@ -3596,6 +3602,8 @@ describe('AppContainer State Management', () => { }, } as unknown as LoadedSettings; + vi.spyOn(mockConfig, 'getUseAlternateBuffer').mockReturnValue(true); + let unmount: () => void; await act(async () => { const result = renderAppContainer({ diff --git a/packages/cli/src/ui/AppContainer.tsx b/packages/cli/src/ui/AppContainer.tsx index 99c4b0c424..986bcafaa1 100644 --- a/packages/cli/src/ui/AppContainer.tsx +++ b/packages/cli/src/ui/AppContainer.tsx @@ -145,7 +145,6 @@ import { useSessionResume } from './hooks/useSessionResume.js'; import { useIncludeDirsTrust } from './hooks/useIncludeDirsTrust.js'; import { useSessionRetentionCheck } from './hooks/useSessionRetentionCheck.js'; import { isWorkspaceTrusted } from '../config/trustedFolders.js'; -import { useAlternateBuffer } from './hooks/useAlternateBuffer.js'; import { useSettings } from './contexts/SettingsContext.js'; import { terminalCapabilityManager } from './utils/terminalCapabilityManager.js'; import { useInputHistoryStore } from './hooks/useInputHistoryStore.js'; @@ -228,7 +227,7 @@ export const AppContainer = (props: AppContainerProps) => { }); useMemoryMonitor(historyManager); - const isAlternateBuffer = useAlternateBuffer(); + const isAlternateBuffer = config.getUseAlternateBuffer(); const [corgiMode, setCorgiMode] = useState(false); const [forceRerenderKey, setForceRerenderKey] = useState(0); const [debugMessage, setDebugMessage] = useState(''); @@ -545,7 +544,7 @@ export const AppContainer = (props: AppContainerProps) => { const { consoleMessages, clearConsoleMessages: clearConsoleMessagesState } = useConsoleMessages(); - const mainAreaWidth = calculateMainAreaWidth(terminalWidth, settings); + const mainAreaWidth = calculateMainAreaWidth(terminalWidth, config); // Derive widths for InputPrompt using shared helper const { inputWidth, suggestionsWidth } = useMemo(() => { const { inputWidth, suggestionsWidth } = diff --git a/packages/cli/src/ui/components/ExitPlanModeDialog.test.tsx b/packages/cli/src/ui/components/ExitPlanModeDialog.test.tsx index c9def1a8c2..d691caba1a 100644 --- a/packages/cli/src/ui/components/ExitPlanModeDialog.test.tsx +++ b/packages/cli/src/ui/components/ExitPlanModeDialog.test.tsx @@ -167,6 +167,7 @@ Implement a comprehensive authentication system with multiple providers. readTextFile: vi.fn(), writeTextFile: vi.fn(), }), + getUseAlternateBuffer: () => options?.useAlternateBuffer ?? true, } as unknown as import('@google/gemini-cli-core').Config, }, ); @@ -443,6 +444,7 @@ Implement a comprehensive authentication system with multiple providers. readTextFile: vi.fn(), writeTextFile: vi.fn(), }), + getUseAlternateBuffer: () => useAlternateBuffer ?? true, } as unknown as import('@google/gemini-cli-core').Config, }, ); diff --git a/packages/cli/src/ui/components/ToolConfirmationQueue.test.tsx b/packages/cli/src/ui/components/ToolConfirmationQueue.test.tsx index cabce1af2f..7b45bd0458 100644 --- a/packages/cli/src/ui/components/ToolConfirmationQueue.test.tsx +++ b/packages/cli/src/ui/components/ToolConfirmationQueue.test.tsx @@ -51,6 +51,7 @@ describe('ToolConfirmationQueue', () => { storage: { getPlansDir: () => '/mock/temp/plans', }, + getUseAlternateBuffer: () => false, } as unknown as Config; beforeEach(() => { diff --git a/packages/cli/src/ui/hooks/useAlternateBuffer.test.ts b/packages/cli/src/ui/hooks/useAlternateBuffer.test.ts new file mode 100644 index 0000000000..bf0e27aa37 --- /dev/null +++ b/packages/cli/src/ui/hooks/useAlternateBuffer.test.ts @@ -0,0 +1,80 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { describe, it, expect, vi, beforeEach } from 'vitest'; +import { renderHook } from '../../test-utils/render.js'; +import { + useAlternateBuffer, + isAlternateBufferEnabled, +} from './useAlternateBuffer.js'; +import type { Config } from '@google/gemini-cli-core'; + +vi.mock('../contexts/ConfigContext.js', () => ({ + useConfig: vi.fn(), +})); + +const mockUseConfig = vi.mocked( + await import('../contexts/ConfigContext.js').then((m) => m.useConfig), +); + +describe('useAlternateBuffer', () => { + beforeEach(() => { + vi.clearAllMocks(); + }); + + it('should return false when config.getUseAlternateBuffer returns false', () => { + mockUseConfig.mockReturnValue({ + getUseAlternateBuffer: () => false, + } as unknown as ReturnType); + + const { result } = renderHook(() => useAlternateBuffer()); + expect(result.current).toBe(false); + }); + + it('should return true when config.getUseAlternateBuffer returns true', () => { + mockUseConfig.mockReturnValue({ + getUseAlternateBuffer: () => true, + } as unknown as ReturnType); + + const { result } = renderHook(() => useAlternateBuffer()); + expect(result.current).toBe(true); + }); + + it('should return the immutable config value, not react to settings changes', () => { + const mockConfig = { + getUseAlternateBuffer: () => true, + } as unknown as ReturnType; + + mockUseConfig.mockReturnValue(mockConfig); + + const { result, rerender } = renderHook(() => useAlternateBuffer()); + + // Value should remain true even after rerender + expect(result.current).toBe(true); + + rerender(); + + expect(result.current).toBe(true); + }); +}); + +describe('isAlternateBufferEnabled', () => { + it('should return true when config.getUseAlternateBuffer returns true', () => { + const config = { + getUseAlternateBuffer: () => true, + } as unknown as Config; + + expect(isAlternateBufferEnabled(config)).toBe(true); + }); + + it('should return false when config.getUseAlternateBuffer returns false', () => { + const config = { + getUseAlternateBuffer: () => false, + } as unknown as Config; + + expect(isAlternateBufferEnabled(config)).toBe(false); + }); +}); diff --git a/packages/cli/src/ui/hooks/useAlternateBuffer.ts b/packages/cli/src/ui/hooks/useAlternateBuffer.ts index 45b387173a..8300df70de 100644 --- a/packages/cli/src/ui/hooks/useAlternateBuffer.ts +++ b/packages/cli/src/ui/hooks/useAlternateBuffer.ts @@ -4,13 +4,14 @@ * SPDX-License-Identifier: Apache-2.0 */ -import { useSettings } from '../contexts/SettingsContext.js'; -import type { LoadedSettings } from '../../config/settings.js'; +import { useConfig } from '../contexts/ConfigContext.js'; +import type { Config } from '@google/gemini-cli-core'; -export const isAlternateBufferEnabled = (settings: LoadedSettings): boolean => - settings.merged.ui.useAlternateBuffer === true; +export const isAlternateBufferEnabled = (config: Config): boolean => + config.getUseAlternateBuffer(); +// This is read from Config so that the UI reads the same value per application session export const useAlternateBuffer = (): boolean => { - const settings = useSettings(); - return isAlternateBufferEnabled(settings); + const config = useConfig(); + return isAlternateBufferEnabled(config); }; diff --git a/packages/cli/src/ui/utils/ui-sizing.test.ts b/packages/cli/src/ui/utils/ui-sizing.test.ts index dc3b21e862..1b849bd9df 100644 --- a/packages/cli/src/ui/utils/ui-sizing.test.ts +++ b/packages/cli/src/ui/utils/ui-sizing.test.ts @@ -4,29 +4,11 @@ * SPDX-License-Identifier: Apache-2.0 */ -import { describe, it, expect, vi } from 'vitest'; +import { describe, it, expect } from 'vitest'; import { calculateMainAreaWidth } from './ui-sizing.js'; -import { type LoadedSettings } from '../../config/settings.js'; - -// Mock dependencies -const mocks = vi.hoisted(() => ({ - isAlternateBufferEnabled: vi.fn(), -})); - -vi.mock('../hooks/useAlternateBuffer.js', () => ({ - isAlternateBufferEnabled: mocks.isAlternateBufferEnabled, -})); +import type { Config } from '@google/gemini-cli-core'; describe('ui-sizing', () => { - const createSettings = (useFullWidth?: boolean): LoadedSettings => - ({ - merged: { - ui: { - useFullWidth, - }, - }, - }) as unknown as LoadedSettings; - describe('calculateMainAreaWidth', () => { it.each([ // expected, width, altBuffer @@ -37,10 +19,10 @@ describe('ui-sizing', () => { ])( 'should return %i when width=%i and altBuffer=%s', (expected, width, altBuffer) => { - mocks.isAlternateBufferEnabled.mockReturnValue(altBuffer); - const settings = createSettings(); - - expect(calculateMainAreaWidth(width, settings)).toBe(expected); + const mockConfig = { + getUseAlternateBuffer: () => altBuffer, + } as unknown as Config; + expect(calculateMainAreaWidth(width, mockConfig)).toBe(expected); }, ); }); diff --git a/packages/cli/src/ui/utils/ui-sizing.ts b/packages/cli/src/ui/utils/ui-sizing.ts index d8b7f8e73f..8541c6c552 100644 --- a/packages/cli/src/ui/utils/ui-sizing.ts +++ b/packages/cli/src/ui/utils/ui-sizing.ts @@ -4,14 +4,14 @@ * SPDX-License-Identifier: Apache-2.0 */ -import { type LoadedSettings } from '../../config/settings.js'; +import type { Config } from '@google/gemini-cli-core'; import { isAlternateBufferEnabled } from '../hooks/useAlternateBuffer.js'; export const calculateMainAreaWidth = ( terminalWidth: number, - settings: LoadedSettings, + config: Config, ): number => { - if (isAlternateBufferEnabled(settings)) { + if (isAlternateBufferEnabled(config)) { return terminalWidth - 1; } return terminalWidth; diff --git a/packages/core/src/config/config.test.ts b/packages/core/src/config/config.test.ts index 1034246e9c..ad8af8656c 100644 --- a/packages/core/src/config/config.test.ts +++ b/packages/core/src/config/config.test.ts @@ -941,6 +941,31 @@ describe('Server Config (config.ts)', () => { }); }); + describe('UseAlternateBuffer Configuration', () => { + it('should default useAlternateBuffer to false when not provided', () => { + const config = new Config(baseParams); + expect(config.getUseAlternateBuffer()).toBe(false); + }); + + it('should set useAlternateBuffer to true when provided as true', () => { + const paramsWithAlternateBuffer: ConfigParameters = { + ...baseParams, + useAlternateBuffer: true, + }; + const config = new Config(paramsWithAlternateBuffer); + expect(config.getUseAlternateBuffer()).toBe(true); + }); + + it('should set useAlternateBuffer to false when explicitly provided as false', () => { + const paramsWithAlternateBuffer: ConfigParameters = { + ...baseParams, + useAlternateBuffer: false, + }; + const config = new Config(paramsWithAlternateBuffer); + expect(config.getUseAlternateBuffer()).toBe(false); + }); + }); + describe('UseWriteTodos Configuration', () => { it('should default useWriteTodos to true when not provided', () => { const config = new Config(baseParams); diff --git a/packages/core/src/config/config.ts b/packages/core/src/config/config.ts index 32d74479e7..256e079fde 100644 --- a/packages/core/src/config/config.ts +++ b/packages/core/src/config/config.ts @@ -519,6 +519,7 @@ export interface ConfigParameters { interactive?: boolean; trustedFolder?: boolean; useBackgroundColor?: boolean; + useAlternateBuffer?: boolean; useRipgrep?: boolean; enableInteractiveShell?: boolean; skipNextSpeakerCheck?: boolean; @@ -702,6 +703,7 @@ export class Config { private readonly enableInteractiveShell: boolean; private readonly skipNextSpeakerCheck: boolean; private readonly useBackgroundColor: boolean; + private readonly useAlternateBuffer: boolean; private shellExecutionConfig: ShellExecutionConfig; private readonly extensionManagement: boolean = true; private readonly truncateToolOutputThreshold: number; @@ -900,6 +902,7 @@ export class Config { this.directWebFetch = params.directWebFetch ?? false; this.useRipgrep = params.useRipgrep ?? true; this.useBackgroundColor = params.useBackgroundColor ?? true; + this.useAlternateBuffer = params.useAlternateBuffer ?? false; this.enableInteractiveShell = params.enableInteractiveShell ?? false; this.skipNextSpeakerCheck = params.skipNextSpeakerCheck ?? true; this.shellExecutionConfig = { @@ -2521,6 +2524,10 @@ export class Config { return this.useBackgroundColor; } + getUseAlternateBuffer(): boolean { + return this.useAlternateBuffer; + } + getEnableInteractiveShell(): boolean { return this.enableInteractiveShell; } From 14dd07be00bb4cdf48b9da91ec362297040dd03c Mon Sep 17 00:00:00 2001 From: Jacob Richman Date: Fri, 27 Feb 2026 08:00:07 -0800 Subject: [PATCH 29/41] fix(cli): ensure dialogs stay scrolled to bottom in alternate buffer mode (#20527) --- .../ui/components/shared/Scrollable.test.tsx | 39 ++- .../src/ui/components/shared/Scrollable.tsx | 149 ++++++---- .../components/shared/ScrollableList.test.tsx | 273 ++++++++++++++++++ .../ui/components/shared/ScrollableList.tsx | 32 +- .../shared/VirtualizedList.test.tsx | 9 +- .../ui/components/shared/VirtualizedList.tsx | 261 ++++++++++------- 6 files changed, 571 insertions(+), 192 deletions(-) diff --git a/packages/cli/src/ui/components/shared/Scrollable.test.tsx b/packages/cli/src/ui/components/shared/Scrollable.test.tsx index db32a1a2e9..7772cdf22c 100644 --- a/packages/cli/src/ui/components/shared/Scrollable.test.tsx +++ b/packages/cli/src/ui/components/shared/Scrollable.test.tsx @@ -6,20 +6,11 @@ import { renderWithProviders } from '../../../test-utils/render.js'; import { Scrollable } from './Scrollable.js'; -import { Text } from 'ink'; +import { Text, Box } from 'ink'; import { describe, it, expect, vi, beforeEach } from 'vitest'; import * as ScrollProviderModule from '../../contexts/ScrollProvider.js'; import { act } from 'react'; - -vi.mock('ink', async (importOriginal) => { - const actual = await importOriginal(); - return { - ...actual, - getInnerHeight: vi.fn(() => 5), - getScrollHeight: vi.fn(() => 10), - getBoundingBox: vi.fn(() => ({ x: 0, y: 0, width: 10, height: 5 })), - }; -}); +import { waitFor } from '../../../test-utils/async.js'; vi.mock('../../hooks/useAnimatedScrollbar.js', () => ({ useAnimatedScrollbar: ( @@ -129,20 +120,26 @@ describe('', () => { , ); await waitUntilReady2(); - expect(capturedEntry.getScrollState().scrollTop).toBe(5); + await waitFor(() => { + expect(capturedEntry?.getScrollState().scrollTop).toBe(5); + }); // Call scrollBy multiple times (upwards) in the same tick await act(async () => { - capturedEntry!.scrollBy(-1); - capturedEntry!.scrollBy(-1); + capturedEntry?.scrollBy(-1); + capturedEntry?.scrollBy(-1); }); // Should have moved up by 2 (5 -> 3) - expect(capturedEntry.getScrollState().scrollTop).toBe(3); + await waitFor(() => { + expect(capturedEntry?.getScrollState().scrollTop).toBe(3); + }); await act(async () => { - capturedEntry!.scrollBy(-2); + capturedEntry?.scrollBy(-2); + }); + await waitFor(() => { + expect(capturedEntry?.getScrollState().scrollTop).toBe(1); }); - expect(capturedEntry.getScrollState().scrollTop).toBe(1); unmount2(); }); @@ -191,10 +188,6 @@ describe('', () => { keySequence, expectedScrollTop, }) => { - // Dynamically import ink to mock getScrollHeight - const ink = await import('ink'); - vi.mocked(ink.getScrollHeight).mockReturnValue(scrollHeight); - let capturedEntry: ScrollProviderModule.ScrollableEntry | undefined; vi.spyOn(ScrollProviderModule, 'useScrollable').mockImplementation( async (entry, isActive) => { @@ -206,7 +199,9 @@ describe('', () => { const { stdin, waitUntilReady, unmount } = renderWithProviders( - Content + + Content + , ); await waitUntilReady(); diff --git a/packages/cli/src/ui/components/shared/Scrollable.tsx b/packages/cli/src/ui/components/shared/Scrollable.tsx index a830cbecfe..87ec6e72d6 100644 --- a/packages/cli/src/ui/components/shared/Scrollable.tsx +++ b/packages/cli/src/ui/components/shared/Scrollable.tsx @@ -4,15 +4,9 @@ * SPDX-License-Identifier: Apache-2.0 */ -import React, { - useState, - useEffect, - useRef, - useLayoutEffect, - useCallback, - useMemo, -} from 'react'; -import { Box, getInnerHeight, getScrollHeight, type DOMElement } from 'ink'; +import type React from 'react'; +import { useState, useRef, useCallback, useMemo, useLayoutEffect } from 'react'; +import { Box, ResizeObserver, type DOMElement } from 'ink'; import { useKeypress, type Key } from '../../hooks/useKeypress.js'; import { useScrollable } from '../../contexts/ScrollProvider.js'; import { useAnimatedScrollbar } from '../../hooks/useAnimatedScrollbar.js'; @@ -41,62 +35,101 @@ export const Scrollable: React.FC = ({ flexGrow, }) => { const [scrollTop, setScrollTop] = useState(0); - const ref = useRef(null); + const viewportRef = useRef(null); + const contentRef = useRef(null); const [size, setSize] = useState({ - innerHeight: 0, + innerHeight: typeof height === 'number' ? height : 0, scrollHeight: 0, }); const sizeRef = useRef(size); - useEffect(() => { + const scrollTopRef = useRef(scrollTop); + + useLayoutEffect(() => { sizeRef.current = size; }, [size]); - const childrenCountRef = useRef(0); - - // This effect needs to run on every render to correctly measure the container - // and scroll to the bottom if new children are added. - // eslint-disable-next-line react-hooks/exhaustive-deps useLayoutEffect(() => { - if (!ref.current) { - return; + scrollTopRef.current = scrollTop; + }, [scrollTop]); + + const viewportObserverRef = useRef(null); + const contentObserverRef = useRef(null); + + const viewportRefCallback = useCallback((node: DOMElement | null) => { + viewportObserverRef.current?.disconnect(); + viewportRef.current = node; + + if (node) { + const observer = new ResizeObserver((entries) => { + const entry = entries[0]; + if (entry) { + const innerHeight = Math.round(entry.contentRect.height); + setSize((prev) => { + const scrollHeight = prev.scrollHeight; + const isAtBottom = + scrollHeight > prev.innerHeight && + scrollTopRef.current >= scrollHeight - prev.innerHeight - 1; + + if (isAtBottom) { + setScrollTop(Number.MAX_SAFE_INTEGER); + } + return { ...prev, innerHeight }; + }); + } + }); + observer.observe(node); + viewportObserverRef.current = observer; } - const innerHeight = Math.round(getInnerHeight(ref.current)); - const scrollHeight = Math.round(getScrollHeight(ref.current)); + }, []); - const isAtBottom = - scrollHeight > innerHeight && scrollTop >= scrollHeight - innerHeight - 1; + const contentRefCallback = useCallback( + (node: DOMElement | null) => { + contentObserverRef.current?.disconnect(); + contentRef.current = node; - if ( - size.innerHeight !== innerHeight || - size.scrollHeight !== scrollHeight - ) { - setSize({ innerHeight, scrollHeight }); - if (isAtBottom) { - setScrollTop(Math.max(0, scrollHeight - innerHeight)); + if (node) { + const observer = new ResizeObserver((entries) => { + const entry = entries[0]; + if (entry) { + const scrollHeight = Math.round(entry.contentRect.height); + setSize((prev) => { + const innerHeight = prev.innerHeight; + const isAtBottom = + prev.scrollHeight > innerHeight && + scrollTopRef.current >= prev.scrollHeight - innerHeight - 1; + + if ( + isAtBottom || + (scrollToBottom && scrollHeight > prev.scrollHeight) + ) { + setScrollTop(Number.MAX_SAFE_INTEGER); + } + return { ...prev, scrollHeight }; + }); + } + }); + observer.observe(node); + contentObserverRef.current = observer; } - } - - const childCountCurrent = React.Children.count(children); - if (scrollToBottom && childrenCountRef.current !== childCountCurrent) { - setScrollTop(Math.max(0, scrollHeight - innerHeight)); - } - childrenCountRef.current = childCountCurrent; - }); + }, + [scrollToBottom], + ); const { getScrollTop, setPendingScrollTop } = useBatchedScroll(scrollTop); const scrollBy = useCallback( (delta: number) => { const { scrollHeight, innerHeight } = sizeRef.current; - const current = getScrollTop(); - const next = Math.min( - Math.max(0, current + delta), - Math.max(0, scrollHeight - innerHeight), - ); + const maxScroll = Math.max(0, scrollHeight - innerHeight); + const current = Math.min(getScrollTop(), maxScroll); + let next = Math.max(0, current + delta); + if (next >= maxScroll) { + next = Number.MAX_SAFE_INTEGER; + } setPendingScrollTop(next); setScrollTop(next); }, - [sizeRef, getScrollTop, setPendingScrollTop], + [getScrollTop, setPendingScrollTop], ); const { scrollbarColor, flashScrollbar, scrollByWithAnimation } = @@ -107,10 +140,11 @@ export const Scrollable: React.FC = ({ const { scrollHeight, innerHeight } = sizeRef.current; const scrollTop = getScrollTop(); const maxScroll = Math.max(0, scrollHeight - innerHeight); + const actualScrollTop = Math.min(scrollTop, maxScroll); // Only capture scroll-up events if there's room; // otherwise allow events to bubble. - if (scrollTop > 0) { + if (actualScrollTop > 0) { if (keyMatchers[Command.PAGE_UP](key)) { scrollByWithAnimation(-innerHeight); return true; @@ -123,7 +157,7 @@ export const Scrollable: React.FC = ({ // Only capture scroll-down events if there's room; // otherwise allow events to bubble. - if (scrollTop < maxScroll) { + if (actualScrollTop < maxScroll) { if (keyMatchers[Command.PAGE_DOWN](key)) { scrollByWithAnimation(innerHeight); return true; @@ -140,21 +174,21 @@ export const Scrollable: React.FC = ({ { isActive: hasFocus }, ); - const getScrollState = useCallback( - () => ({ - scrollTop: getScrollTop(), + const getScrollState = useCallback(() => { + const maxScroll = Math.max(0, size.scrollHeight - size.innerHeight); + return { + scrollTop: Math.min(getScrollTop(), maxScroll), scrollHeight: size.scrollHeight, innerHeight: size.innerHeight, - }), - [getScrollTop, size.scrollHeight, size.innerHeight], - ); + }; + }, [getScrollTop, size.scrollHeight, size.innerHeight]); const hasFocusCallback = useCallback(() => hasFocus, [hasFocus]); const scrollableEntry = useMemo( () => ({ // eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion - ref: ref as React.RefObject, + ref: viewportRef as React.RefObject, getScrollState, scrollBy: scrollByWithAnimation, hasFocus: hasFocusCallback, @@ -167,7 +201,7 @@ export const Scrollable: React.FC = ({ return ( = ({ based on the children's content. It also adds a right padding to make room for the scrollbar. */} - + {children} diff --git a/packages/cli/src/ui/components/shared/ScrollableList.test.tsx b/packages/cli/src/ui/components/shared/ScrollableList.test.tsx index 8b8c4e3fdf..1dd72b89a2 100644 --- a/packages/cli/src/ui/components/shared/ScrollableList.test.tsx +++ b/packages/cli/src/ui/components/shared/ScrollableList.test.tsx @@ -479,4 +479,277 @@ describe('ScrollableList Demo Behavior', () => { }); }); }); + + it('regression: remove last item and add 2 items when scrolled to bottom', async () => { + let listRef: ScrollableListRef | null = null; + let setItemsFunc: React.Dispatch> | null = + null; + + const TestComp = () => { + const [items, setItems] = useState( + Array.from({ length: 10 }, (_, i) => ({ + id: String(i), + title: `Item ${i}`, + })), + ); + useEffect(() => { + setItemsFunc = setItems; + }, []); + + return ( + + + + + { + listRef = ref; + }} + data={items} + renderItem={({ item }) => {item.title}} + estimatedItemHeight={() => 1} + keyExtractor={(item) => item.id} + hasFocus={true} + initialScrollIndex={Number.MAX_SAFE_INTEGER} + /> + + + + + ); + }; + + let result: ReturnType; + await act(async () => { + result = render(); + }); + + await result!.waitUntilReady(); + + // Scrolled to bottom, max scroll = 10 - 5 = 5 + await waitFor(() => { + expect(listRef?.getScrollState()?.scrollTop).toBe(5); + }); + + // Remove last element and add 2 elements + await act(async () => { + setItemsFunc!((prev) => { + const next = prev.slice(0, prev.length - 1); + next.push({ id: '10', title: 'Item 10' }); + next.push({ id: '11', title: 'Item 11' }); + return next; + }); + }); + + await result!.waitUntilReady(); + + // Auto scrolls to new bottom: max scroll = 11 - 5 = 6 + await waitFor(() => { + expect(listRef?.getScrollState()?.scrollTop).toBe(6); + }); + + // Scroll up slightly + await act(async () => { + listRef?.scrollBy(-2); + }); + await result!.waitUntilReady(); + + await waitFor(() => { + expect(listRef?.getScrollState()?.scrollTop).toBe(4); + }); + + // Scroll back to bottom + await act(async () => { + listRef?.scrollToEnd(); + }); + await result!.waitUntilReady(); + + await waitFor(() => { + expect(listRef?.getScrollState()?.scrollTop).toBe(6); + }); + + // Add two more elements + await act(async () => { + setItemsFunc!((prev) => [ + ...prev, + { id: '12', title: 'Item 12' }, + { id: '13', title: 'Item 13' }, + ]); + }); + + await result!.waitUntilReady(); + + // Auto scrolls to bottom: max scroll = 13 - 5 = 8 + await waitFor(() => { + expect(listRef?.getScrollState()?.scrollTop).toBe(8); + }); + + result!.unmount(); + }); + + it('regression: bottom-most element changes size but list does not update', async () => { + let listRef: ScrollableListRef | null = null; + let expandLastFunc: (() => void) | null = null; + + const ItemWithState = ({ + item, + isLast, + }: { + item: Item; + isLast: boolean; + }) => { + const [expanded, setExpanded] = useState(false); + useEffect(() => { + if (isLast) { + expandLastFunc = () => setExpanded(true); + } + }, [isLast]); + return ( + + {item.title} + {expanded && Expanded content} + + ); + }; + + const TestComp = () => { + // items array is stable + const [items] = useState(() => + Array.from({ length: 5 }, (_, i) => ({ + id: String(i), + title: `Item ${i}`, + })), + ); + + return ( + + + + + { + listRef = ref; + }} + data={items} + renderItem={({ item, index }) => ( + + )} + estimatedItemHeight={() => 1} + keyExtractor={(item) => item.id} + hasFocus={true} + initialScrollIndex={Number.MAX_SAFE_INTEGER} + /> + + + + + ); + }; + + let result: ReturnType; + await act(async () => { + result = render(); + }); + + await result!.waitUntilReady(); + + // Initially, total height is 5. viewport is 4. scroll is 1. + await waitFor(() => { + expect(listRef?.getScrollState()?.scrollTop).toBe(1); + }); + + // Expand the last item locally, without re-rendering the list! + await act(async () => { + expandLastFunc!(); + }); + + await result!.waitUntilReady(); + + // The total height becomes 6. It should remain scrolled to bottom, so scroll becomes 2. + // This is expected to FAIL currently because VirtualizedList won't remeasure + // unless data changes or container height changes. + await waitFor( + () => { + expect(listRef?.getScrollState()?.scrollTop).toBe(2); + }, + { timeout: 1000 }, + ); + + result!.unmount(); + }); + + it('regression: prepending items does not corrupt heights (total height correct)', async () => { + let listRef: ScrollableListRef | null = null; + let setItemsFunc: React.Dispatch> | null = + null; + + const TestComp = () => { + // Items 1 to 5. Item 1 is very tall. + const [items, setItems] = useState( + Array.from({ length: 5 }, (_, i) => ({ + id: String(i + 1), + title: `Item ${i + 1}`, + })), + ); + useEffect(() => { + setItemsFunc = setItems; + }, []); + + return ( + + + + + { + listRef = ref; + }} + data={items} + renderItem={({ item }) => ( + + {item.title} + + )} + estimatedItemHeight={() => 2} + keyExtractor={(item) => item.id} + hasFocus={true} + initialScrollIndex={Number.MAX_SAFE_INTEGER} + /> + + + + + ); + }; + + let result: ReturnType; + await act(async () => { + result = render(); + }); + + await result!.waitUntilReady(); + + // Scroll is at bottom. + // Heights: Item 1: 10, Item 2: 2, Item 3: 2, Item 4: 2, Item 5: 2. + // Total height = 18. Container = 10. Max scroll = 8. + await waitFor(() => { + expect(listRef?.getScrollState()?.scrollTop).toBe(8); + }); + + // Prepend an item! + await act(async () => { + setItemsFunc!((prev) => [{ id: '0', title: 'Item 0' }, ...prev]); + }); + + await result!.waitUntilReady(); + + // Now items: 0(2), 1(10), 2(2), 3(2), 4(2), 5(2). + // Total height = 20. Container = 10. Max scroll = 10. + // Auto-scrolls to bottom because it was sticking! + await waitFor(() => { + expect(listRef?.getScrollState()?.scrollTop).toBe(10); + }); + + result!.unmount(); + }); }); diff --git a/packages/cli/src/ui/components/shared/ScrollableList.tsx b/packages/cli/src/ui/components/shared/ScrollableList.tsx index e51acd6446..b7085329a3 100644 --- a/packages/cli/src/ui/components/shared/ScrollableList.tsx +++ b/packages/cli/src/ui/components/shared/ScrollableList.tsx @@ -10,7 +10,7 @@ import { useImperativeHandle, useCallback, useMemo, - useEffect, + useLayoutEffect, } from 'react'; import type React from 'react'; import { @@ -105,7 +105,7 @@ function ScrollableList( smoothScrollState.current.active = false; }, []); - useEffect(() => stopSmoothScroll, [stopSmoothScroll]); + useLayoutEffect(() => stopSmoothScroll, [stopSmoothScroll]); const smoothScrollTo = useCallback( ( @@ -120,15 +120,19 @@ function ScrollableList( innerHeight: 0, }; const { - scrollTop: startScrollTop, + scrollTop: rawStartScrollTop, scrollHeight, innerHeight, } = scrollState; const maxScrollTop = Math.max(0, scrollHeight - innerHeight); + const startScrollTop = Math.min(rawStartScrollTop, maxScrollTop); let effectiveTarget = targetScrollTop; - if (targetScrollTop === SCROLL_TO_ITEM_END) { + if ( + targetScrollTop === SCROLL_TO_ITEM_END || + targetScrollTop >= maxScrollTop + ) { effectiveTarget = maxScrollTop; } @@ -138,8 +142,11 @@ function ScrollableList( ); if (duration === 0) { - if (targetScrollTop === SCROLL_TO_ITEM_END) { - virtualizedListRef.current?.scrollTo(SCROLL_TO_ITEM_END); + if ( + targetScrollTop === SCROLL_TO_ITEM_END || + targetScrollTop >= maxScrollTop + ) { + virtualizedListRef.current?.scrollTo(Number.MAX_SAFE_INTEGER); } else { virtualizedListRef.current?.scrollTo(Math.round(clampedTarget)); } @@ -168,8 +175,11 @@ function ScrollableList( ease; if (progress >= 1) { - if (targetScrollTop === SCROLL_TO_ITEM_END) { - virtualizedListRef.current?.scrollTo(SCROLL_TO_ITEM_END); + if ( + targetScrollTop === SCROLL_TO_ITEM_END || + targetScrollTop >= maxScrollTop + ) { + virtualizedListRef.current?.scrollTo(Number.MAX_SAFE_INTEGER); } else { virtualizedListRef.current?.scrollTo(Math.round(current)); } @@ -200,9 +210,13 @@ function ScrollableList( ) { const direction = keyMatchers[Command.PAGE_UP](key) ? -1 : 1; const scrollState = getScrollState(); + const maxScroll = Math.max( + 0, + scrollState.scrollHeight - scrollState.innerHeight, + ); const current = smoothScrollState.current.active ? smoothScrollState.current.to - : scrollState.scrollTop; + : Math.min(scrollState.scrollTop, maxScroll); const innerHeight = scrollState.innerHeight; smoothScrollTo(current + direction * innerHeight); return true; diff --git a/packages/cli/src/ui/components/shared/VirtualizedList.test.tsx b/packages/cli/src/ui/components/shared/VirtualizedList.test.tsx index 0edc323d38..60b8bfc421 100644 --- a/packages/cli/src/ui/components/shared/VirtualizedList.test.tsx +++ b/packages/cli/src/ui/components/shared/VirtualizedList.test.tsx @@ -5,6 +5,7 @@ */ import { render } from '../../../test-utils/render.js'; +import { waitFor } from '../../../test-utils/async.js'; import { VirtualizedList, type VirtualizedListRef } from './VirtualizedList.js'; import { Text, Box } from 'ink'; import { @@ -275,9 +276,11 @@ describe('', () => { await waitUntilReady(); // Now Item 0 is 1px, so Items 1-9 should also be visible to fill 10px - expect(lastFrame()).toContain('Item 0'); - expect(lastFrame()).toContain('Item 1'); - expect(lastFrame()).toContain('Item 9'); + await waitFor(() => { + expect(lastFrame()).toContain('Item 0'); + expect(lastFrame()).toContain('Item 1'); + expect(lastFrame()).toContain('Item 9'); + }); unmount(); }); diff --git a/packages/cli/src/ui/components/shared/VirtualizedList.tsx b/packages/cli/src/ui/components/shared/VirtualizedList.tsx index 98e45a695e..669b1bc035 100644 --- a/packages/cli/src/ui/components/shared/VirtualizedList.tsx +++ b/packages/cli/src/ui/components/shared/VirtualizedList.tsx @@ -10,7 +10,6 @@ import { useLayoutEffect, forwardRef, useImperativeHandle, - useEffect, useMemo, useCallback, } from 'react'; @@ -19,7 +18,7 @@ import { theme } from '../../semantic-colors.js'; import { useBatchedScroll } from '../../hooks/useBatchedScroll.js'; import { useUIState } from '../../contexts/UIStateContext.js'; -import { type DOMElement, measureElement, Box } from 'ink'; +import { type DOMElement, Box, ResizeObserver } from 'ink'; export const SCROLL_TO_ITEM_END = Number.MAX_SAFE_INTEGER; @@ -81,7 +80,7 @@ function VirtualizedList( } = props; const { copyModeEnabled } = useUIState(); const dataRef = useRef(data); - useEffect(() => { + useLayoutEffect(() => { dataRef.current = data; }, [data]); @@ -108,6 +107,7 @@ function VirtualizedList( return { index: 0, offset: 0 }; }); + const [isStickingToBottom, setIsStickingToBottom] = useState(() => { const scrollToEnd = initialScrollIndex === SCROLL_TO_ITEM_END || @@ -116,73 +116,75 @@ function VirtualizedList( initialScrollOffsetInIndex === SCROLL_TO_ITEM_END); return scrollToEnd; }); - const containerRef = useRef(null); + + const containerRef = useRef(null); const [containerHeight, setContainerHeight] = useState(0); const itemRefs = useRef>([]); - const [heights, setHeights] = useState([]); + const [heights, setHeights] = useState>({}); const isInitialScrollSet = useRef(false); + const containerObserverRef = useRef(null); + const nodeToKeyRef = useRef(new WeakMap()); + + const containerRefCallback = useCallback((node: DOMElement | null) => { + containerObserverRef.current?.disconnect(); + containerRef.current = node; + if (node) { + const observer = new ResizeObserver((entries) => { + const entry = entries[0]; + if (entry) { + setContainerHeight(Math.round(entry.contentRect.height)); + } + }); + observer.observe(node); + containerObserverRef.current = observer; + } + }, []); + + const itemsObserver = useMemo( + () => + new ResizeObserver((entries) => { + setHeights((prev) => { + let next: Record | null = null; + for (const entry of entries) { + const key = nodeToKeyRef.current.get(entry.target); + if (key !== undefined) { + const height = Math.round(entry.contentRect.height); + if (prev[key] !== height) { + if (!next) { + next = { ...prev }; + } + next[key] = height; + } + } + } + return next ?? prev; + }); + }), + [], + ); + + useLayoutEffect( + () => () => { + containerObserverRef.current?.disconnect(); + itemsObserver.disconnect(); + }, + [itemsObserver], + ); + const { totalHeight, offsets } = useMemo(() => { const offsets: number[] = [0]; let totalHeight = 0; for (let i = 0; i < data.length; i++) { - const height = heights[i] ?? estimatedItemHeight(i); + const key = keyExtractor(data[i], i); + const height = heights[key] ?? estimatedItemHeight(i); totalHeight += height; offsets.push(totalHeight); } return { totalHeight, offsets }; - }, [heights, data, estimatedItemHeight]); + }, [heights, data, estimatedItemHeight, keyExtractor]); - useEffect(() => { - setHeights((prevHeights) => { - if (data.length === prevHeights.length) { - return prevHeights; - } - - const newHeights = [...prevHeights]; - if (data.length < prevHeights.length) { - newHeights.length = data.length; - } else { - for (let i = prevHeights.length; i < data.length; i++) { - newHeights[i] = estimatedItemHeight(i); - } - } - return newHeights; - }); - }, [data, estimatedItemHeight]); - - // This layout effect needs to run on every render to correctly measure the - // container and ensure we recompute the layout if it has changed. - // eslint-disable-next-line react-hooks/exhaustive-deps - useLayoutEffect(() => { - if (containerRef.current) { - const height = Math.round(measureElement(containerRef.current).height); - if (containerHeight !== height) { - setContainerHeight(height); - } - } - - let newHeights: number[] | null = null; - for (let i = startIndex; i <= endIndex; i++) { - const itemRef = itemRefs.current[i]; - if (itemRef) { - const height = Math.round(measureElement(itemRef).height); - if (height !== heights[i]) { - if (!newHeights) { - newHeights = [...heights]; - } - newHeights[i] = height; - } - } - } - if (newHeights) { - setHeights(newHeights); - } - }); - - const scrollableContainerHeight = containerRef.current - ? Math.round(measureElement(containerRef.current).height) - : containerHeight; + const scrollableContainerHeight = containerHeight; const getAnchorForScrollTop = useCallback( ( @@ -199,23 +201,36 @@ function VirtualizedList( [], ); - const scrollTop = useMemo(() => { + const actualScrollTop = useMemo(() => { const offset = offsets[scrollAnchor.index]; if (typeof offset !== 'number') { return 0; } if (scrollAnchor.offset === SCROLL_TO_ITEM_END) { - const itemHeight = heights[scrollAnchor.index] ?? 0; + const item = data[scrollAnchor.index]; + const key = item ? keyExtractor(item, scrollAnchor.index) : ''; + const itemHeight = heights[key] ?? 0; return offset + itemHeight - scrollableContainerHeight; } return offset + scrollAnchor.offset; - }, [scrollAnchor, offsets, heights, scrollableContainerHeight]); + }, [ + scrollAnchor, + offsets, + heights, + scrollableContainerHeight, + data, + keyExtractor, + ]); + + const scrollTop = isStickingToBottom + ? Number.MAX_SAFE_INTEGER + : actualScrollTop; const prevDataLength = useRef(data.length); const prevTotalHeight = useRef(totalHeight); - const prevScrollTop = useRef(scrollTop); + const prevScrollTop = useRef(actualScrollTop); const prevContainerHeight = useRef(scrollableContainerHeight); useLayoutEffect(() => { @@ -226,9 +241,7 @@ function VirtualizedList( prevTotalHeight.current - prevContainerHeight.current - 1; const wasAtBottom = contentPreviouslyFit || wasScrolledToBottomPixels; - // If the user was at the bottom, they are now sticking. This handles - // manually scrolling back to the bottom. - if (wasAtBottom && scrollTop >= prevScrollTop.current) { + if (wasAtBottom && actualScrollTop >= prevScrollTop.current) { setIsStickingToBottom(true); } @@ -236,9 +249,6 @@ function VirtualizedList( const containerChanged = prevContainerHeight.current !== scrollableContainerHeight; - // We scroll to the end if: - // 1. The list grew AND we were already at the bottom (or sticking). - // 2. We are sticking to the bottom AND the container size changed. if ( (listGrew && (isStickingToBottom || wasAtBottom)) || (isStickingToBottom && containerChanged) @@ -247,34 +257,28 @@ function VirtualizedList( index: data.length > 0 ? data.length - 1 : 0, offset: SCROLL_TO_ITEM_END, }); - // If we are scrolling to the bottom, we are by definition sticking. if (!isStickingToBottom) { setIsStickingToBottom(true); } - } - // Scenario 2: The list has changed (shrunk) in a way that our - // current scroll position or anchor is invalid. We should adjust to the bottom. - else if ( + } else if ( (scrollAnchor.index >= data.length || - scrollTop > totalHeight - scrollableContainerHeight) && + actualScrollTop > totalHeight - scrollableContainerHeight) && data.length > 0 ) { const newScrollTop = Math.max(0, totalHeight - scrollableContainerHeight); setScrollAnchor(getAnchorForScrollTop(newScrollTop, offsets)); } else if (data.length === 0) { - // List is now empty, reset scroll to top. setScrollAnchor({ index: 0, offset: 0 }); } - // Update refs for the next render cycle. prevDataLength.current = data.length; prevTotalHeight.current = totalHeight; - prevScrollTop.current = scrollTop; + prevScrollTop.current = actualScrollTop; prevContainerHeight.current = scrollableContainerHeight; }, [ data.length, totalHeight, - scrollTop, + actualScrollTop, scrollableContainerHeight, scrollAnchor.index, getAnchorForScrollTop, @@ -334,10 +338,10 @@ function VirtualizedList( const startIndex = Math.max( 0, - findLastIndex(offsets, (offset) => offset <= scrollTop) - 1, + findLastIndex(offsets, (offset) => offset <= actualScrollTop) - 1, ); const endIndexOffset = offsets.findIndex( - (offset) => offset > scrollTop + scrollableContainerHeight, + (offset) => offset > actualScrollTop + scrollableContainerHeight, ); const endIndex = endIndexOffset === -1 @@ -348,6 +352,32 @@ function VirtualizedList( const bottomSpacerHeight = totalHeight - (offsets[endIndex + 1] ?? totalHeight); + // Maintain a stable set of observed nodes using useLayoutEffect + const observedNodes = useRef>(new Set()); + useLayoutEffect(() => { + const currentNodes = new Set(); + for (let i = startIndex; i <= endIndex; i++) { + const node = itemRefs.current[i]; + const item = data[i]; + if (node && item) { + currentNodes.add(node); + const key = keyExtractor(item, i); + // Always update the key mapping because React can reuse nodes at different indices/keys + nodeToKeyRef.current.set(node, key); + if (!observedNodes.current.has(node)) { + itemsObserver.observe(node); + } + } + } + for (const node of observedNodes.current) { + if (!currentNodes.has(node)) { + itemsObserver.unobserve(node); + nodeToKeyRef.current.delete(node); + } + } + observedNodes.current = currentNodes; + }); + const renderedItems = []; for (let i = startIndex; i <= endIndex; i++) { const item = data[i]; @@ -356,6 +386,8 @@ function VirtualizedList( { itemRefs.current[i] = el; }} @@ -376,27 +408,39 @@ function VirtualizedList( setIsStickingToBottom(false); } const currentScrollTop = getScrollTop(); - const newScrollTop = Math.max( - 0, - Math.min( - totalHeight - scrollableContainerHeight, - currentScrollTop + delta, - ), - ); + const maxScroll = Math.max(0, totalHeight - scrollableContainerHeight); + const actualCurrent = Math.min(currentScrollTop, maxScroll); + let newScrollTop = Math.max(0, actualCurrent + delta); + if (newScrollTop >= maxScroll) { + setIsStickingToBottom(true); + newScrollTop = Number.MAX_SAFE_INTEGER; + } setPendingScrollTop(newScrollTop); - setScrollAnchor(getAnchorForScrollTop(newScrollTop, offsets)); + setScrollAnchor( + getAnchorForScrollTop(Math.min(newScrollTop, maxScroll), offsets), + ); }, scrollTo: (offset: number) => { - setIsStickingToBottom(false); - const newScrollTop = Math.max( - 0, - Math.min(totalHeight - scrollableContainerHeight, offset), - ); - setPendingScrollTop(newScrollTop); - setScrollAnchor(getAnchorForScrollTop(newScrollTop, offsets)); + const maxScroll = Math.max(0, totalHeight - scrollableContainerHeight); + if (offset >= maxScroll || offset === SCROLL_TO_ITEM_END) { + setIsStickingToBottom(true); + setPendingScrollTop(Number.MAX_SAFE_INTEGER); + if (data.length > 0) { + setScrollAnchor({ + index: data.length - 1, + offset: SCROLL_TO_ITEM_END, + }); + } + } else { + setIsStickingToBottom(false); + const newScrollTop = Math.max(0, offset); + setPendingScrollTop(newScrollTop); + setScrollAnchor(getAnchorForScrollTop(newScrollTop, offsets)); + } }, scrollToEnd: () => { setIsStickingToBottom(true); + setPendingScrollTop(Number.MAX_SAFE_INTEGER); if (data.length > 0) { setScrollAnchor({ index: data.length - 1, @@ -416,10 +460,14 @@ function VirtualizedList( setIsStickingToBottom(false); const offset = offsets[index]; if (offset !== undefined) { + const maxScroll = Math.max( + 0, + totalHeight - scrollableContainerHeight, + ); const newScrollTop = Math.max( 0, Math.min( - totalHeight - scrollableContainerHeight, + maxScroll, offset - viewPosition * scrollableContainerHeight + viewOffset, ), ); @@ -441,10 +489,14 @@ function VirtualizedList( if (index !== -1) { const offset = offsets[index]; if (offset !== undefined) { + const maxScroll = Math.max( + 0, + totalHeight - scrollableContainerHeight, + ); const newScrollTop = Math.max( 0, Math.min( - totalHeight - scrollableContainerHeight, + maxScroll, offset - viewPosition * scrollableContainerHeight + viewOffset, ), ); @@ -454,11 +506,14 @@ function VirtualizedList( } }, getScrollIndex: () => scrollAnchor.index, - getScrollState: () => ({ - scrollTop: getScrollTop(), - scrollHeight: totalHeight, - innerHeight: containerHeight, - }), + getScrollState: () => { + const maxScroll = Math.max(0, totalHeight - containerHeight); + return { + scrollTop: Math.min(getScrollTop(), maxScroll), + scrollHeight: totalHeight, + innerHeight: containerHeight, + }; + }, }), [ offsets, @@ -475,7 +530,7 @@ function VirtualizedList( return ( ( flexShrink={0} width="100%" flexDirection="column" - marginTop={copyModeEnabled ? -scrollTop : 0} + marginTop={copyModeEnabled ? -actualScrollTop : 0} > {renderedItems} From 32e777f8387f6bcb6f6b2eafa353e861a2e1e47c Mon Sep 17 00:00:00 2001 From: Abhijit Balaji Date: Fri, 27 Feb 2026 08:03:36 -0800 Subject: [PATCH 30/41] fix(core): revert auto-save of policies to user space (#20531) --- packages/core/src/config/storage.ts | 5 +-- packages/core/src/policy/config.ts | 3 +- packages/core/src/policy/persistence.test.ts | 43 ++++--------------- .../core/src/policy/policy-updater.test.ts | 4 +- 4 files changed, 13 insertions(+), 42 deletions(-) diff --git a/packages/core/src/config/storage.ts b/packages/core/src/config/storage.ts index e8530887b3..10e88543ba 100644 --- a/packages/core/src/config/storage.ts +++ b/packages/core/src/config/storage.ts @@ -169,10 +169,7 @@ export class Storage { } getAutoSavedPolicyPath(): string { - return path.join( - this.getWorkspacePoliciesDir(), - AUTO_SAVED_POLICY_FILENAME, - ); + return path.join(Storage.getUserPoliciesDir(), AUTO_SAVED_POLICY_FILENAME); } ensureProjectTempDirExists(): void { diff --git a/packages/core/src/policy/config.ts b/packages/core/src/policy/config.ts index 800006e27e..6cdfc199d2 100644 --- a/packages/core/src/policy/config.ts +++ b/packages/core/src/policy/config.ts @@ -516,9 +516,8 @@ export function createPolicyUpdater( if (message.persist) { persistenceQueue = persistenceQueue.then(async () => { try { - const workspacePoliciesDir = storage.getWorkspacePoliciesDir(); - await fs.mkdir(workspacePoliciesDir, { recursive: true }); const policyFile = storage.getAutoSavedPolicyPath(); + await fs.mkdir(path.dirname(policyFile), { recursive: true }); // Read existing file let existingData: { rule?: TomlRule[] } = {}; diff --git a/packages/core/src/policy/persistence.test.ts b/packages/core/src/policy/persistence.test.ts index 43f52a956d..c5a71fdd93 100644 --- a/packages/core/src/policy/persistence.test.ts +++ b/packages/core/src/policy/persistence.test.ts @@ -48,14 +48,8 @@ describe('createPolicyUpdater', () => { it('should persist policy when persist flag is true', async () => { createPolicyUpdater(policyEngine, messageBus, mockStorage); - const workspacePoliciesDir = '/mock/project/.gemini/policies'; - const policyFile = path.join( - workspacePoliciesDir, - AUTO_SAVED_POLICY_FILENAME, - ); - vi.spyOn(mockStorage, 'getWorkspacePoliciesDir').mockReturnValue( - workspacePoliciesDir, - ); + const userPoliciesDir = '/mock/user/.gemini/policies'; + const policyFile = path.join(userPoliciesDir, AUTO_SAVED_POLICY_FILENAME); vi.spyOn(mockStorage, 'getAutoSavedPolicyPath').mockReturnValue(policyFile); (fs.mkdir as unknown as Mock).mockResolvedValue(undefined); (fs.readFile as unknown as Mock).mockRejectedValue( @@ -79,8 +73,7 @@ describe('createPolicyUpdater', () => { // Wait for async operations (microtasks) await new Promise((resolve) => setTimeout(resolve, 0)); - expect(mockStorage.getWorkspacePoliciesDir).toHaveBeenCalled(); - expect(fs.mkdir).toHaveBeenCalledWith(workspacePoliciesDir, { + expect(fs.mkdir).toHaveBeenCalledWith(userPoliciesDir, { recursive: true, }); @@ -115,14 +108,8 @@ describe('createPolicyUpdater', () => { it('should persist policy with commandPrefix when provided', async () => { createPolicyUpdater(policyEngine, messageBus, mockStorage); - const workspacePoliciesDir = '/mock/project/.gemini/policies'; - const policyFile = path.join( - workspacePoliciesDir, - AUTO_SAVED_POLICY_FILENAME, - ); - vi.spyOn(mockStorage, 'getWorkspacePoliciesDir').mockReturnValue( - workspacePoliciesDir, - ); + const userPoliciesDir = '/mock/user/.gemini/policies'; + const policyFile = path.join(userPoliciesDir, AUTO_SAVED_POLICY_FILENAME); vi.spyOn(mockStorage, 'getAutoSavedPolicyPath').mockReturnValue(policyFile); (fs.mkdir as unknown as Mock).mockResolvedValue(undefined); (fs.readFile as unknown as Mock).mockRejectedValue( @@ -168,14 +155,8 @@ describe('createPolicyUpdater', () => { it('should persist policy with mcpName and toolName when provided', async () => { createPolicyUpdater(policyEngine, messageBus, mockStorage); - const workspacePoliciesDir = '/mock/project/.gemini/policies'; - const policyFile = path.join( - workspacePoliciesDir, - AUTO_SAVED_POLICY_FILENAME, - ); - vi.spyOn(mockStorage, 'getWorkspacePoliciesDir').mockReturnValue( - workspacePoliciesDir, - ); + const userPoliciesDir = '/mock/user/.gemini/policies'; + const policyFile = path.join(userPoliciesDir, AUTO_SAVED_POLICY_FILENAME); vi.spyOn(mockStorage, 'getAutoSavedPolicyPath').mockReturnValue(policyFile); (fs.mkdir as unknown as Mock).mockResolvedValue(undefined); (fs.readFile as unknown as Mock).mockRejectedValue( @@ -214,14 +195,8 @@ describe('createPolicyUpdater', () => { it('should escape special characters in toolName and mcpName', async () => { createPolicyUpdater(policyEngine, messageBus, mockStorage); - const workspacePoliciesDir = '/mock/project/.gemini/policies'; - const policyFile = path.join( - workspacePoliciesDir, - AUTO_SAVED_POLICY_FILENAME, - ); - vi.spyOn(mockStorage, 'getWorkspacePoliciesDir').mockReturnValue( - workspacePoliciesDir, - ); + const userPoliciesDir = '/mock/user/.gemini/policies'; + const policyFile = path.join(userPoliciesDir, AUTO_SAVED_POLICY_FILENAME); vi.spyOn(mockStorage, 'getAutoSavedPolicyPath').mockReturnValue(policyFile); (fs.mkdir as unknown as Mock).mockResolvedValue(undefined); (fs.readFile as unknown as Mock).mockRejectedValue( diff --git a/packages/core/src/policy/policy-updater.test.ts b/packages/core/src/policy/policy-updater.test.ts index 40780a1850..3037667949 100644 --- a/packages/core/src/policy/policy-updater.test.ts +++ b/packages/core/src/policy/policy-updater.test.ts @@ -50,8 +50,8 @@ describe('createPolicyUpdater', () => { messageBus = new MessageBus(policyEngine); mockStorage = new Storage('/mock/project'); - vi.spyOn(mockStorage, 'getWorkspacePoliciesDir').mockReturnValue( - '/mock/project/.gemini/policies', + vi.spyOn(mockStorage, 'getAutoSavedPolicyPath').mockReturnValue( + '/mock/user/.gemini/policies/auto-saved.toml', ); }); From 514d43104954d8d3fd5b87210d9c3869a0131808 Mon Sep 17 00:00:00 2001 From: Christian Gunderman Date: Fri, 27 Feb 2026 16:48:46 +0000 Subject: [PATCH 31/41] Demote unreliable test. (#20571) --- evals/validation_fidelity.eval.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/evals/validation_fidelity.eval.ts b/evals/validation_fidelity.eval.ts index d8f571773d..8cfb4f6626 100644 --- a/evals/validation_fidelity.eval.ts +++ b/evals/validation_fidelity.eval.ts @@ -8,7 +8,7 @@ import { describe, expect } from 'vitest'; import { evalTest } from './test-helper.js'; describe('validation_fidelity', () => { - evalTest('ALWAYS_PASSES', { + evalTest('USUALLY_PASSES', { name: 'should perform exhaustive validation autonomously when guided by system instructions', files: { 'src/types.ts': ` From e709789067e833823ce3528160806c30d40a8338 Mon Sep 17 00:00:00 2001 From: Sehoon Shon Date: Fri, 27 Feb 2026 11:52:37 -0500 Subject: [PATCH 32/41] fix(core): handle optional response fields from code assist API (#20345) --- .../cli/src/ui/hooks/usePrivacySettings.ts | 15 +++++++++-- .../core/src/code_assist/converter.test.ts | 8 +++--- packages/core/src/code_assist/converter.ts | 14 ++++++++--- packages/core/src/code_assist/setup.ts | 25 ++++++++++++++++--- packages/core/src/code_assist/types.ts | 20 +++++++-------- 5 files changed, 58 insertions(+), 24 deletions(-) diff --git a/packages/cli/src/ui/hooks/usePrivacySettings.ts b/packages/cli/src/ui/hooks/usePrivacySettings.ts index 64a9673812..7bf5a5ff1b 100644 --- a/packages/cli/src/ui/hooks/usePrivacySettings.ts +++ b/packages/cli/src/ui/hooks/usePrivacySettings.ts @@ -10,6 +10,7 @@ import { type CodeAssistServer, UserTierId, getCodeAssistServer, + debugLogger, } from '@google/gemini-cli-core'; export interface PrivacyState { @@ -103,7 +104,12 @@ async function getRemoteDataCollectionOptIn( ): Promise { try { const resp = await server.getCodeAssistGlobalUserSetting(); - return resp.freeTierDataCollectionOptin; + if (resp.freeTierDataCollectionOptin === undefined) { + debugLogger.warn( + 'Warning: Code Assist API did not return freeTierDataCollectionOptin. Defaulting to true.', + ); + } + return resp.freeTierDataCollectionOptin ?? true; } catch (error: unknown) { if (error && typeof error === 'object' && 'response' in error) { // eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion @@ -128,5 +134,10 @@ async function setRemoteDataCollectionOptIn( cloudaicompanionProject: server.projectId, freeTierDataCollectionOptin: optIn, }); - return resp.freeTierDataCollectionOptin; + if (resp.freeTierDataCollectionOptin === undefined) { + debugLogger.warn( + `Warning: Code Assist API did not return freeTierDataCollectionOptin. Defaulting to ${optIn}.`, + ); + } + return resp.freeTierDataCollectionOptin ?? optIn; } diff --git a/packages/core/src/code_assist/converter.test.ts b/packages/core/src/code_assist/converter.test.ts index 21fecec547..674bbaf70e 100644 --- a/packages/core/src/code_assist/converter.test.ts +++ b/packages/core/src/code_assist/converter.test.ts @@ -246,7 +246,7 @@ describe('converter', () => { }; const genaiRes = fromGenerateContentResponse(codeAssistRes); expect(genaiRes).toBeInstanceOf(GenerateContentResponse); - expect(genaiRes.candidates).toEqual(codeAssistRes.response.candidates); + expect(genaiRes.candidates).toEqual(codeAssistRes.response!.candidates); }); it('should handle prompt feedback and usage metadata', () => { @@ -266,10 +266,10 @@ describe('converter', () => { }; const genaiRes = fromGenerateContentResponse(codeAssistRes); expect(genaiRes.promptFeedback).toEqual( - codeAssistRes.response.promptFeedback, + codeAssistRes.response!.promptFeedback, ); expect(genaiRes.usageMetadata).toEqual( - codeAssistRes.response.usageMetadata, + codeAssistRes.response!.usageMetadata, ); }); @@ -296,7 +296,7 @@ describe('converter', () => { }; const genaiRes = fromGenerateContentResponse(codeAssistRes); expect(genaiRes.automaticFunctionCallingHistory).toEqual( - codeAssistRes.response.automaticFunctionCallingHistory, + codeAssistRes.response!.automaticFunctionCallingHistory, ); }); diff --git a/packages/core/src/code_assist/converter.ts b/packages/core/src/code_assist/converter.ts index 1d41101f31..fc163a8f02 100644 --- a/packages/core/src/code_assist/converter.ts +++ b/packages/core/src/code_assist/converter.ts @@ -27,6 +27,7 @@ import type { ToolConfig, } from '@google/genai'; import { GenerateContentResponse } from '@google/genai'; +import { debugLogger } from '../utils/debugLogger.js'; export interface CAGenerateContentRequest { model: string; @@ -72,12 +73,12 @@ interface VertexGenerationConfig { } export interface CaGenerateContentResponse { - response: VertexGenerateContentResponse; + response?: VertexGenerateContentResponse; traceId?: string; } interface VertexGenerateContentResponse { - candidates: Candidate[]; + candidates?: Candidate[]; automaticFunctionCallingHistory?: Content[]; promptFeedback?: GenerateContentResponsePromptFeedback; usageMetadata?: GenerateContentResponseUsageMetadata; @@ -94,7 +95,7 @@ interface VertexCountTokenRequest { } export interface CaCountTokenResponse { - totalTokens: number; + totalTokens?: number; } export function toCountTokenRequest( @@ -111,8 +112,13 @@ export function toCountTokenRequest( export function fromCountTokenResponse( res: CaCountTokenResponse, ): CountTokensResponse { + if (res.totalTokens === undefined) { + debugLogger.warn( + 'Warning: Code Assist API did not return totalTokens. Defaulting to 0.', + ); + } return { - totalTokens: res.totalTokens, + totalTokens: res.totalTokens ?? 0, }; } diff --git a/packages/core/src/code_assist/setup.ts b/packages/core/src/code_assist/setup.ts index 895fabb6bc..dce96b9cdd 100644 --- a/packages/core/src/code_assist/setup.ts +++ b/packages/core/src/code_assist/setup.ts @@ -18,6 +18,7 @@ import type { AuthClient } from 'google-auth-library'; import type { ValidationHandler } from '../fallback/types.js'; import { ChangeAuthRequestedError } from '../utils/errors.js'; import { ValidationRequiredError } from '../utils/googleQuotaErrors.js'; +import { debugLogger } from '../utils/debugLogger.js'; export class ProjectIdRequiredError extends Error { constructor() { @@ -130,11 +131,20 @@ export async function setupUser( } if (loadRes.currentTier) { + if (!loadRes.paidTier?.id && !loadRes.currentTier.id) { + debugLogger.warn( + 'Warning: Code Assist API did not return a user tier ID. Defaulting to STANDARD tier.', + ); + } + if (!loadRes.cloudaicompanionProject) { if (projectId) { return { projectId, - userTier: loadRes.paidTier?.id ?? loadRes.currentTier.id, + userTier: + loadRes.paidTier?.id ?? + loadRes.currentTier.id ?? + UserTierId.STANDARD, userTierName: loadRes.paidTier?.name ?? loadRes.currentTier.name, }; } @@ -144,13 +154,20 @@ export async function setupUser( } return { projectId: loadRes.cloudaicompanionProject, - userTier: loadRes.paidTier?.id ?? loadRes.currentTier.id, + userTier: + loadRes.paidTier?.id ?? loadRes.currentTier.id ?? UserTierId.STANDARD, userTierName: loadRes.paidTier?.name ?? loadRes.currentTier.name, }; } const tier = getOnboardTier(loadRes); + if (!tier.id) { + debugLogger.warn( + 'Warning: Code Assist API did not return an onboarding tier ID. Defaulting to STANDARD tier.', + ); + } + let onboardReq: OnboardUserRequest; if (tier.id === UserTierId.FREE) { // The free tier uses a managed google cloud project. Setting a project in the `onboardUser` request causes a `Precondition Failed` error. @@ -183,7 +200,7 @@ export async function setupUser( if (projectId) { return { projectId, - userTier: tier.id, + userTier: tier.id ?? UserTierId.STANDARD, userTierName: tier.name, }; } @@ -193,7 +210,7 @@ export async function setupUser( return { projectId: lroRes.response.cloudaicompanionProject.id, - userTier: tier.id, + userTier: tier.id ?? UserTierId.STANDARD, userTierName: tier.name, }; } diff --git a/packages/core/src/code_assist/types.ts b/packages/core/src/code_assist/types.ts index 0e2f353aa3..79932efc02 100644 --- a/packages/core/src/code_assist/types.ts +++ b/packages/core/src/code_assist/types.ts @@ -60,7 +60,7 @@ export interface LoadCodeAssistResponse { * GeminiUserTier reflects the structure received from the CodeAssist when calling LoadCodeAssist. */ export interface GeminiUserTier { - id: UserTierId; + id?: UserTierId; name?: string; description?: string; // This value is used to declare whether a given tier requires the user to configure the project setting on the IDE settings or not. @@ -79,10 +79,10 @@ export interface GeminiUserTier { * @param tierName name of the tier. */ export interface IneligibleTier { - reasonCode: IneligibleTierReasonCode; - reasonMessage: string; - tierId: UserTierId; - tierName: string; + reasonCode?: IneligibleTierReasonCode; + reasonMessage?: string; + tierId?: UserTierId; + tierName?: string; validationErrorMessage?: string; validationUrl?: string; validationUrlLinkText?: string; @@ -127,7 +127,7 @@ export type UserTierId = (typeof UserTierId)[keyof typeof UserTierId] | string; * privacy notice. */ export interface PrivacyNotice { - showNotice: boolean; + showNotice?: boolean; noticeText?: string; } @@ -145,7 +145,7 @@ export interface OnboardUserRequest { * http://google3/google/longrunning/operations.proto;rcl=698857719;l=107 */ export interface LongRunningOperationResponse { - name: string; + name?: string; done?: boolean; response?: OnboardUserResponse; } @@ -157,8 +157,8 @@ export interface LongRunningOperationResponse { export interface OnboardUserResponse { // tslint:disable-next-line:enforce-name-casing This is the name of the field in the proto. cloudaicompanionProject?: { - id: string; - name: string; + id?: string; + name?: string; }; } @@ -195,7 +195,7 @@ export interface SetCodeAssistGlobalUserSettingRequest { export interface CodeAssistGlobalUserSettingResponse { cloudaicompanionProject?: string; - freeTierDataCollectionOptin: boolean; + freeTierDataCollectionOptin?: boolean; } /** From 3b2632fe40c763ee1904bb7281c41ead230563e1 Mon Sep 17 00:00:00 2001 From: Dmitry Lyalin Date: Fri, 27 Feb 2026 09:11:13 -0800 Subject: [PATCH 33/41] fix(cli): keep thought summary when loading phrases are off (#20497) Co-authored-by: Jacob Richman --- packages/cli/src/ui/components/Composer.test.tsx | 4 ++-- packages/cli/src/ui/components/Composer.tsx | 6 ++---- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/packages/cli/src/ui/components/Composer.test.tsx b/packages/cli/src/ui/components/Composer.test.tsx index 330e615cfa..999b1531f9 100644 --- a/packages/cli/src/ui/components/Composer.test.tsx +++ b/packages/cli/src/ui/components/Composer.test.tsx @@ -402,7 +402,7 @@ describe('Composer', () => { expect(output).not.toContain('ShortcutsHint'); }); - it('renders LoadingIndicator without thought when loadingPhrases is off', async () => { + it('renders LoadingIndicator with thought when loadingPhrases is off', async () => { const uiState = createMockUIState({ streamingState: StreamingState.Responding, thought: { subject: 'Hidden', description: 'Should not show' }, @@ -415,7 +415,7 @@ describe('Composer', () => { const output = lastFrame(); expect(output).toContain('LoadingIndicator'); - expect(output).not.toContain('Should not show'); + expect(output).toContain('LoadingIndicator: Hidden'); }); it('does not render LoadingIndicator when waiting for confirmation', async () => { diff --git a/packages/cli/src/ui/components/Composer.tsx b/packages/cli/src/ui/components/Composer.tsx index 98a465fa39..51c879e772 100644 --- a/packages/cli/src/ui/components/Composer.tsx +++ b/packages/cli/src/ui/components/Composer.tsx @@ -229,8 +229,7 @@ export const Composer = ({ isFocused = true }: { isFocused?: boolean }) => { inline thought={ uiState.streamingState === - StreamingState.WaitingForConfirmation || - settings.merged.ui.loadingPhrases === 'off' + StreamingState.WaitingForConfirmation ? undefined : uiState.thought } @@ -273,8 +272,7 @@ export const Composer = ({ isFocused = true }: { isFocused?: boolean }) => { inline thought={ uiState.streamingState === - StreamingState.WaitingForConfirmation || - settings.merged.ui.loadingPhrases === 'off' + StreamingState.WaitingForConfirmation ? undefined : uiState.thought } From 59c0e73718476608e701bd9fff9370d4fdcad722 Mon Sep 17 00:00:00 2001 From: Abhijit Balaji Date: Fri, 27 Feb 2026 09:25:16 -0800 Subject: [PATCH 34/41] feat(cli): add temporary flag to disable workspace policies (#20523) --- packages/cli/src/config/policy.test.ts | 33 ++++++++++++++++++- packages/cli/src/config/policy.ts | 16 ++++++++- .../src/config/workspace-policy-cli.test.ts | 1 + 3 files changed, 48 insertions(+), 2 deletions(-) diff --git a/packages/cli/src/config/policy.test.ts b/packages/cli/src/config/policy.test.ts index 10d53e56ef..9baccd3359 100644 --- a/packages/cli/src/config/policy.test.ts +++ b/packages/cli/src/config/policy.test.ts @@ -12,6 +12,8 @@ import { resolveWorkspacePolicyState, autoAcceptWorkspacePolicies, setAutoAcceptWorkspacePolicies, + disableWorkspacePolicies, + setDisableWorkspacePolicies, } from './policy.js'; import { writeToStderr } from '@google/gemini-cli-core'; @@ -45,6 +47,9 @@ describe('resolveWorkspacePolicyState', () => { fs.mkdirSync(workspaceDir); policiesDir = path.join(workspaceDir, '.gemini', 'policies'); + // Enable policies for these tests to verify loading logic + setDisableWorkspacePolicies(false); + vi.clearAllMocks(); }); @@ -67,6 +72,13 @@ describe('resolveWorkspacePolicyState', () => { }); }); + it('should have disableWorkspacePolicies set to true by default', () => { + // We explicitly set it to false in beforeEach for other tests, + // so here we test that setting it to true works. + setDisableWorkspacePolicies(true); + expect(disableWorkspacePolicies).toBe(true); + }); + it('should return policy directory if integrity matches', async () => { // Set up policies directory with a file fs.mkdirSync(policiesDir, { recursive: true }); @@ -188,7 +200,26 @@ describe('resolveWorkspacePolicyState', () => { expect(result.policyUpdateConfirmationRequest).toBeUndefined(); }); - it('should not return workspace policies if cwd is a symlink to the home directory', async () => { + it('should return empty state if disableWorkspacePolicies is true even if folder is trusted', async () => { + setDisableWorkspacePolicies(true); + + // Set up policies directory with a file + fs.mkdirSync(policiesDir, { recursive: true }); + fs.writeFileSync(path.join(policiesDir, 'policy.toml'), 'rules = []'); + + const result = await resolveWorkspacePolicyState({ + cwd: workspaceDir, + trustedFolder: true, + interactive: true, + }); + + expect(result).toEqual({ + workspacePoliciesDir: undefined, + policyUpdateConfirmationRequest: undefined, + }); + }); + + it('should return empty state if cwd is a symlink to the home directory', async () => { const policiesDir = path.join(tempDir, '.gemini', 'policies'); fs.mkdirSync(policiesDir, { recursive: true }); fs.writeFileSync(path.join(policiesDir, 'policy.toml'), 'rules = []'); diff --git a/packages/cli/src/config/policy.ts b/packages/cli/src/config/policy.ts index 6ce44020f5..bc22c928f8 100644 --- a/packages/cli/src/config/policy.ts +++ b/packages/cli/src/config/policy.ts @@ -35,6 +35,20 @@ export function setAutoAcceptWorkspacePolicies(value: boolean) { autoAcceptWorkspacePolicies = value; } +/** + * Temporary flag to disable workspace level policies altogether. + * Exported as 'let' to allow monkey patching in tests via the setter. + */ +export let disableWorkspacePolicies = true; + +/** + * Sets the disableWorkspacePolicies flag. + * Used primarily for testing purposes. + */ +export function setDisableWorkspacePolicies(value: boolean) { + disableWorkspacePolicies = value; +} + export async function createPolicyEngineConfig( settings: Settings, approvalMode: ApprovalMode, @@ -81,7 +95,7 @@ export async function resolveWorkspacePolicyState(options: { | PolicyUpdateConfirmationRequest | undefined; - if (trustedFolder) { + if (trustedFolder && !disableWorkspacePolicies) { const storage = new Storage(cwd); // If we are in the home directory (or rather, our target Gemini dir is the global one), diff --git a/packages/cli/src/config/workspace-policy-cli.test.ts b/packages/cli/src/config/workspace-policy-cli.test.ts index a7ab9d69b1..d0d98a5a31 100644 --- a/packages/cli/src/config/workspace-policy-cli.test.ts +++ b/packages/cli/src/config/workspace-policy-cli.test.ts @@ -54,6 +54,7 @@ describe('Workspace-Level Policy CLI Integration', () => { beforeEach(() => { vi.clearAllMocks(); + Policy.setDisableWorkspacePolicies(false); // Default to MATCH for existing tests mockCheckIntegrity.mockResolvedValue({ status: 'match', From 7a1f2f3288fb1180fbf7e7ccce9ef4153fb9102f Mon Sep 17 00:00:00 2001 From: DeWitt Clinton Date: Fri, 27 Feb 2026 09:40:09 -0800 Subject: [PATCH 35/41] Disable expensive and scheduled workflows on personal forks (#20449) --- .github/workflows/chained_e2e.yml | 23 ++++++++++--------- .github/workflows/ci.yml | 16 +++++++------ .github/workflows/deflake.yml | 3 +++ .github/workflows/docs-page-action.yml | 4 ++-- .github/workflows/docs-rebuild.yml | 1 + .github/workflows/evals-nightly.yml | 3 ++- .../gemini-scheduled-stale-issue-closer.yml | 1 + .../workflows/label-backlog-child-issues.yml | 4 ++-- .github/workflows/label-workstream-rollup.yml | 1 + .github/workflows/release-change-tags.yml | 1 + .github/workflows/release-manual.yml | 1 + .github/workflows/release-notes.yml | 1 + .github/workflows/release-rollback.yml | 1 + .github/workflows/release-sandbox.yml | 1 + .github/workflows/smoke-test.yml | 1 + .github/workflows/trigger_e2e.yml | 2 ++ .github/workflows/verify-release.yml | 1 + 17 files changed, 42 insertions(+), 23 deletions(-) diff --git a/.github/workflows/chained_e2e.yml b/.github/workflows/chained_e2e.yml index 05b1fb0f1d..7d13a23938 100644 --- a/.github/workflows/chained_e2e.yml +++ b/.github/workflows/chained_e2e.yml @@ -31,6 +31,7 @@ jobs: name: 'Merge Queue Skipper' permissions: 'read-all' runs-on: 'gemini-cli-ubuntu-16-core' + if: "github.repository == 'google-gemini/gemini-cli'" outputs: skip: '${{ steps.merge-queue-e2e-skipper.outputs.skip-check }}' steps: @@ -42,7 +43,7 @@ jobs: download_repo_name: runs-on: 'gemini-cli-ubuntu-16-core' - if: "${{github.event_name == 'workflow_dispatch' || github.event_name == 'workflow_run'}}" + if: "github.repository == 'google-gemini/gemini-cli' && (github.event_name == 'workflow_dispatch' || github.event_name == 'workflow_run')" outputs: repo_name: '${{ steps.output-repo-name.outputs.repo_name }}' head_sha: '${{ steps.output-repo-name.outputs.head_sha }}' @@ -91,7 +92,7 @@ jobs: name: 'Parse run context' runs-on: 'gemini-cli-ubuntu-16-core' needs: 'download_repo_name' - if: 'always()' + if: "github.repository == 'google-gemini/gemini-cli' && always()" outputs: repository: '${{ steps.set_context.outputs.REPO }}' sha: '${{ steps.set_context.outputs.SHA }}' @@ -111,11 +112,11 @@ jobs: permissions: 'write-all' needs: - 'parse_run_context' - if: 'always()' + if: "github.repository == 'google-gemini/gemini-cli' && always()" steps: - name: 'Set pending status' uses: 'myrotvorets/set-commit-status-action@16037e056d73b2d3c88e37e393ff369047f70886' # ratchet:myrotvorets/set-commit-status-action@master - if: 'always()' + if: "github.repository == 'google-gemini/gemini-cli' && always()" with: allowForks: 'true' repo: '${{ github.repository }}' @@ -131,7 +132,7 @@ jobs: - 'parse_run_context' runs-on: 'gemini-cli-ubuntu-16-core' if: | - always() && (needs.merge_queue_skipper.result !='success' || needs.merge_queue_skipper.outputs.skip != 'true') + github.repository == 'google-gemini/gemini-cli' && always() && (needs.merge_queue_skipper.result !='success' || needs.merge_queue_skipper.outputs.skip != 'true') strategy: fail-fast: false matrix: @@ -184,7 +185,7 @@ jobs: - 'parse_run_context' runs-on: 'macos-latest' if: | - always() && (needs.merge_queue_skipper.result !='success' || needs.merge_queue_skipper.outputs.skip != 'true') + github.repository == 'google-gemini/gemini-cli' && always() && (needs.merge_queue_skipper.result !='success' || needs.merge_queue_skipper.outputs.skip != 'true') steps: - name: 'Checkout' uses: 'actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955' # ratchet:actions/checkout@v5 @@ -222,7 +223,7 @@ jobs: - 'merge_queue_skipper' - 'parse_run_context' if: | - always() && (needs.merge_queue_skipper.result !='success' || needs.merge_queue_skipper.outputs.skip != 'true') + github.repository == 'google-gemini/gemini-cli' && always() && (needs.merge_queue_skipper.result !='success' || needs.merge_queue_skipper.outputs.skip != 'true') runs-on: 'gemini-cli-windows-16-core' steps: - name: 'Checkout' @@ -282,7 +283,7 @@ jobs: - 'parse_run_context' runs-on: 'gemini-cli-ubuntu-16-core' if: | - always() && (needs.merge_queue_skipper.result !='success' || needs.merge_queue_skipper.outputs.skip != 'true') + github.repository == 'google-gemini/gemini-cli' && always() && (needs.merge_queue_skipper.result !='success' || needs.merge_queue_skipper.outputs.skip != 'true') steps: - name: 'Checkout' uses: 'actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955' # ratchet:actions/checkout@v5 @@ -309,7 +310,7 @@ jobs: e2e: name: 'E2E' if: | - always() && (needs.merge_queue_skipper.result !='success' || needs.merge_queue_skipper.outputs.skip != 'true') + github.repository == 'google-gemini/gemini-cli' && always() && (needs.merge_queue_skipper.result !='success' || needs.merge_queue_skipper.outputs.skip != 'true') needs: - 'e2e_linux' - 'e2e_mac' @@ -337,14 +338,14 @@ jobs: set_workflow_status: runs-on: 'gemini-cli-ubuntu-16-core' permissions: 'write-all' - if: 'always()' + if: "github.repository == 'google-gemini/gemini-cli' && always()" needs: - 'parse_run_context' - 'e2e' steps: - name: 'Set workflow status' uses: 'myrotvorets/set-commit-status-action@16037e056d73b2d3c88e37e393ff369047f70886' # ratchet:myrotvorets/set-commit-status-action@master - if: 'always()' + if: "github.repository == 'google-gemini/gemini-cli' && always()" with: allowForks: 'true' repo: '${{ github.repository }}' diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 999eb778c4..a358ad8b07 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -37,6 +37,7 @@ jobs: permissions: 'read-all' name: 'Merge Queue Skipper' runs-on: 'gemini-cli-ubuntu-16-core' + if: "github.repository == 'google-gemini/gemini-cli'" outputs: skip: '${{ steps.merge-queue-ci-skipper.outputs.skip-check }}' steps: @@ -49,7 +50,7 @@ jobs: name: 'Lint' runs-on: 'gemini-cli-ubuntu-16-core' needs: 'merge_queue_skipper' - if: "${{needs.merge_queue_skipper.outputs.skip == 'false'}}" + if: "github.repository == 'google-gemini/gemini-cli' && needs.merge_queue_skipper.outputs.skip == 'false'" env: GEMINI_LINT_TEMP_DIR: '${{ github.workspace }}/.gemini-linters' steps: @@ -116,6 +117,7 @@ jobs: link_checker: name: 'Link Checker' runs-on: 'ubuntu-latest' + if: "github.repository == 'google-gemini/gemini-cli'" steps: - name: 'Checkout' uses: 'actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8' # ratchet:actions/checkout@v5 @@ -129,7 +131,7 @@ jobs: runs-on: 'gemini-cli-ubuntu-16-core' needs: - 'merge_queue_skipper' - if: "${{needs.merge_queue_skipper.outputs.skip == 'false'}}" + if: "github.repository == 'google-gemini/gemini-cli' && needs.merge_queue_skipper.outputs.skip == 'false'" permissions: contents: 'read' checks: 'write' @@ -216,7 +218,7 @@ jobs: runs-on: 'macos-latest' needs: - 'merge_queue_skipper' - if: "${{needs.merge_queue_skipper.outputs.skip == 'false'}}" + if: "github.repository == 'google-gemini/gemini-cli' && needs.merge_queue_skipper.outputs.skip == 'false'" permissions: contents: 'read' checks: 'write' @@ -311,7 +313,7 @@ jobs: name: 'CodeQL' runs-on: 'gemini-cli-ubuntu-16-core' needs: 'merge_queue_skipper' - if: "${{needs.merge_queue_skipper.outputs.skip == 'false'}}" + if: "github.repository == 'google-gemini/gemini-cli' && needs.merge_queue_skipper.outputs.skip == 'false'" permissions: actions: 'read' contents: 'read' @@ -334,7 +336,7 @@ jobs: bundle_size: name: 'Check Bundle Size' needs: 'merge_queue_skipper' - if: "${{github.event_name == 'pull_request' && needs.merge_queue_skipper.outputs.skip == 'false'}}" + if: "github.repository == 'google-gemini/gemini-cli' && github.event_name == 'pull_request' && needs.merge_queue_skipper.outputs.skip == 'false'" runs-on: 'gemini-cli-ubuntu-16-core' permissions: contents: 'read' # For checkout @@ -359,7 +361,7 @@ jobs: name: 'Slow Test - Win - ${{ matrix.shard }}' runs-on: 'gemini-cli-windows-16-core' needs: 'merge_queue_skipper' - if: "${{needs.merge_queue_skipper.outputs.skip == 'false'}}" + if: "github.repository == 'google-gemini/gemini-cli' && needs.merge_queue_skipper.outputs.skip == 'false'" timeout-minutes: 60 strategy: matrix: @@ -451,7 +453,7 @@ jobs: ci: name: 'CI' - if: 'always()' + if: "github.repository == 'google-gemini/gemini-cli' && always()" needs: - 'lint' - 'link_checker' diff --git a/.github/workflows/deflake.yml b/.github/workflows/deflake.yml index a0eb51a7f4..fbb3e2d8d7 100644 --- a/.github/workflows/deflake.yml +++ b/.github/workflows/deflake.yml @@ -27,6 +27,7 @@ jobs: deflake_e2e_linux: name: 'E2E Test (Linux) - ${{ matrix.sandbox }}' runs-on: 'gemini-cli-ubuntu-16-core' + if: "github.repository == 'google-gemini/gemini-cli'" strategy: fail-fast: false matrix: @@ -77,6 +78,7 @@ jobs: deflake_e2e_mac: name: 'E2E Test (macOS)' runs-on: 'macos-latest' + if: "github.repository == 'google-gemini/gemini-cli'" steps: - name: 'Checkout' uses: 'actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955' # ratchet:actions/checkout@v5 @@ -114,6 +116,7 @@ jobs: deflake_e2e_windows: name: 'Slow E2E - Win' runs-on: 'gemini-cli-windows-16-core' + if: "github.repository == 'google-gemini/gemini-cli'" steps: - name: 'Checkout' diff --git a/.github/workflows/docs-page-action.yml b/.github/workflows/docs-page-action.yml index 2d485278ce..be807c7c36 100644 --- a/.github/workflows/docs-page-action.yml +++ b/.github/workflows/docs-page-action.yml @@ -19,8 +19,7 @@ concurrency: jobs: build: - if: |- - ${{ !contains(github.ref_name, 'nightly') }} + if: "github.repository == 'google-gemini/gemini-cli' && !contains(github.ref_name, 'nightly')" runs-on: 'ubuntu-latest' steps: - name: 'Checkout' @@ -39,6 +38,7 @@ jobs: uses: 'actions/upload-pages-artifact@56afc609e74202658d3ffba0e8f6dda462b719fa' # ratchet:actions/upload-pages-artifact@v3 deploy: + if: "github.repository == 'google-gemini/gemini-cli'" environment: name: 'github-pages' url: '${{ steps.deployment.outputs.page_url }}' diff --git a/.github/workflows/docs-rebuild.yml b/.github/workflows/docs-rebuild.yml index ac41819f02..a4e2c65973 100644 --- a/.github/workflows/docs-rebuild.yml +++ b/.github/workflows/docs-rebuild.yml @@ -7,6 +7,7 @@ on: - 'docs/**' jobs: trigger-rebuild: + if: "github.repository == 'google-gemini/gemini-cli'" runs-on: 'ubuntu-latest' steps: - name: 'Trigger rebuild' diff --git a/.github/workflows/evals-nightly.yml b/.github/workflows/evals-nightly.yml index 1ed9448c03..c5b3709c75 100644 --- a/.github/workflows/evals-nightly.yml +++ b/.github/workflows/evals-nightly.yml @@ -23,6 +23,7 @@ jobs: evals: name: 'Evals (USUALLY_PASSING) nightly run' runs-on: 'gemini-cli-ubuntu-16-core' + if: "github.repository == 'google-gemini/gemini-cli'" strategy: fail-fast: false matrix: @@ -85,7 +86,7 @@ jobs: aggregate-results: name: 'Aggregate Results' needs: ['evals'] - if: 'always()' + if: "github.repository == 'google-gemini/gemini-cli' && always()" runs-on: 'gemini-cli-ubuntu-16-core' steps: - name: 'Checkout' diff --git a/.github/workflows/gemini-scheduled-stale-issue-closer.yml b/.github/workflows/gemini-scheduled-stale-issue-closer.yml index c7aef65a73..2b7b163d88 100644 --- a/.github/workflows/gemini-scheduled-stale-issue-closer.yml +++ b/.github/workflows/gemini-scheduled-stale-issue-closer.yml @@ -21,6 +21,7 @@ defaults: jobs: close-stale-issues: + if: "github.repository == 'google-gemini/gemini-cli'" runs-on: 'ubuntu-latest' permissions: issues: 'write' diff --git a/.github/workflows/label-backlog-child-issues.yml b/.github/workflows/label-backlog-child-issues.yml index b11f509f80..a819bf4e71 100644 --- a/.github/workflows/label-backlog-child-issues.yml +++ b/.github/workflows/label-backlog-child-issues.yml @@ -14,7 +14,7 @@ permissions: jobs: # Event-based: Quick reaction to new/edited issues in THIS repo labeler: - if: "github.event_name == 'issues'" + if: "github.repository == 'google-gemini/gemini-cli' && github.event_name == 'issues'" runs-on: 'ubuntu-latest' steps: - name: 'Checkout' @@ -36,7 +36,7 @@ jobs: # Scheduled/Manual: Recursive sync across multiple repos sync-maintainer-labels: - if: "github.event_name == 'schedule' || github.event_name == 'workflow_dispatch'" + if: "github.repository == 'google-gemini/gemini-cli' && (github.event_name == 'schedule' || github.event_name == 'workflow_dispatch')" runs-on: 'ubuntu-latest' steps: - name: 'Checkout' diff --git a/.github/workflows/label-workstream-rollup.yml b/.github/workflows/label-workstream-rollup.yml index 35840cfe6f..97d699d09b 100644 --- a/.github/workflows/label-workstream-rollup.yml +++ b/.github/workflows/label-workstream-rollup.yml @@ -9,6 +9,7 @@ on: jobs: labeler: + if: "github.repository == 'google-gemini/gemini-cli'" runs-on: 'ubuntu-latest' permissions: issues: 'write' diff --git a/.github/workflows/release-change-tags.yml b/.github/workflows/release-change-tags.yml index 6184850677..c7c3f3f2d2 100644 --- a/.github/workflows/release-change-tags.yml +++ b/.github/workflows/release-change-tags.yml @@ -32,6 +32,7 @@ on: jobs: change-tags: + if: "github.repository == 'google-gemini/gemini-cli'" runs-on: 'ubuntu-latest' environment: "${{ github.event.inputs.environment || 'prod' }}" permissions: diff --git a/.github/workflows/release-manual.yml b/.github/workflows/release-manual.yml index c9d2290a1c..f03bd52127 100644 --- a/.github/workflows/release-manual.yml +++ b/.github/workflows/release-manual.yml @@ -47,6 +47,7 @@ on: jobs: release: + if: "github.repository == 'google-gemini/gemini-cli'" runs-on: 'ubuntu-latest' environment: "${{ github.event.inputs.environment || 'prod' }}" permissions: diff --git a/.github/workflows/release-notes.yml b/.github/workflows/release-notes.yml index 8a681dadf6..f746e65c2e 100644 --- a/.github/workflows/release-notes.yml +++ b/.github/workflows/release-notes.yml @@ -22,6 +22,7 @@ on: jobs: generate-release-notes: + if: "github.repository == 'google-gemini/gemini-cli'" runs-on: 'ubuntu-latest' permissions: contents: 'write' diff --git a/.github/workflows/release-rollback.yml b/.github/workflows/release-rollback.yml index 8840b65721..db91457b1a 100644 --- a/.github/workflows/release-rollback.yml +++ b/.github/workflows/release-rollback.yml @@ -42,6 +42,7 @@ on: jobs: change-tags: + if: "github.repository == 'google-gemini/gemini-cli'" environment: "${{ github.event.inputs.environment || 'prod' }}" runs-on: 'ubuntu-latest' permissions: diff --git a/.github/workflows/release-sandbox.yml b/.github/workflows/release-sandbox.yml index f1deb0380c..2c7de7a0f5 100644 --- a/.github/workflows/release-sandbox.yml +++ b/.github/workflows/release-sandbox.yml @@ -16,6 +16,7 @@ on: jobs: build: + if: "github.repository == 'google-gemini/gemini-cli'" runs-on: 'ubuntu-latest' permissions: contents: 'read' diff --git a/.github/workflows/smoke-test.yml b/.github/workflows/smoke-test.yml index caeb0bebe0..29903dfbe8 100644 --- a/.github/workflows/smoke-test.yml +++ b/.github/workflows/smoke-test.yml @@ -20,6 +20,7 @@ on: jobs: smoke-test: + if: "github.repository == 'google-gemini/gemini-cli'" runs-on: 'ubuntu-latest' permissions: contents: 'write' diff --git a/.github/workflows/trigger_e2e.yml b/.github/workflows/trigger_e2e.yml index babe08e4e3..56da2727c5 100644 --- a/.github/workflows/trigger_e2e.yml +++ b/.github/workflows/trigger_e2e.yml @@ -15,6 +15,7 @@ on: jobs: save_repo_name: + if: "github.repository == 'google-gemini/gemini-cli'" runs-on: 'gemini-cli-ubuntu-16-core' steps: - name: 'Save Repo name' @@ -31,6 +32,7 @@ jobs: path: 'pr/' trigger_e2e: name: 'Trigger e2e' + if: "github.repository == 'google-gemini/gemini-cli'" runs-on: 'gemini-cli-ubuntu-16-core' steps: - id: 'trigger-e2e' diff --git a/.github/workflows/verify-release.yml b/.github/workflows/verify-release.yml index edf0995ddd..20a9f51b8a 100644 --- a/.github/workflows/verify-release.yml +++ b/.github/workflows/verify-release.yml @@ -28,6 +28,7 @@ on: jobs: verify-release: + if: "github.repository == 'google-gemini/gemini-cli'" environment: "${{ github.event.inputs.environment || 'prod' }}" strategy: fail-fast: false From ec39aa17c22745921ada8a0b2ae6594d1ca1140a Mon Sep 17 00:00:00 2001 From: Dev Randalpura Date: Fri, 27 Feb 2026 09:43:18 -0800 Subject: [PATCH 36/41] Moved markdown parsing logic to a separate util file (#20526) --- .../src/ui/utils/InlineMarkdownRenderer.tsx | 211 +---------------- packages/cli/src/ui/utils/TableRenderer.tsx | 2 +- .../src/ui/utils/markdownParsingUtils.test.ts | 2 +- .../cli/src/ui/utils/markdownParsingUtils.ts | 216 ++++++++++++++++++ 4 files changed, 219 insertions(+), 212 deletions(-) create mode 100644 packages/cli/src/ui/utils/markdownParsingUtils.ts diff --git a/packages/cli/src/ui/utils/InlineMarkdownRenderer.tsx b/packages/cli/src/ui/utils/InlineMarkdownRenderer.tsx index 02a34842f4..19d4b3cac8 100644 --- a/packages/cli/src/ui/utils/InlineMarkdownRenderer.tsx +++ b/packages/cli/src/ui/utils/InlineMarkdownRenderer.tsx @@ -6,223 +6,14 @@ import React from 'react'; import { Text } from 'ink'; -import chalk from 'chalk'; -import { - resolveColor, - INK_SUPPORTED_NAMES, - INK_NAME_TO_HEX_MAP, -} from '../themes/color-utils.js'; -import { theme } from '../semantic-colors.js'; -import { debugLogger } from '@google/gemini-cli-core'; +import { parseMarkdownToANSI } from './markdownParsingUtils.js'; import { stripUnsafeCharacters } from './textUtils.js'; -// Constants for Markdown parsing -const BOLD_MARKER_LENGTH = 2; // For "**" -const ITALIC_MARKER_LENGTH = 1; // For "*" or "_" -const STRIKETHROUGH_MARKER_LENGTH = 2; // For "~~") -const INLINE_CODE_MARKER_LENGTH = 1; // For "`" -const UNDERLINE_TAG_START_LENGTH = 3; // For "" -const UNDERLINE_TAG_END_LENGTH = 4; // For "" - interface RenderInlineProps { text: string; defaultColor?: string; } -/** - * Helper to apply color to a string using ANSI escape codes, - * consistent with how Ink's colorize works. - */ -const ansiColorize = (str: string, color: string | undefined): string => { - if (!color) return str; - const resolved = resolveColor(color); - if (!resolved) return str; - - if (resolved.startsWith('#')) { - return chalk.hex(resolved)(str); - } - - const mappedHex = INK_NAME_TO_HEX_MAP[resolved]; - if (mappedHex) { - return chalk.hex(mappedHex)(str); - } - - if (INK_SUPPORTED_NAMES.has(resolved)) { - switch (resolved) { - case 'black': - return chalk.black(str); - case 'red': - return chalk.red(str); - case 'green': - return chalk.green(str); - case 'yellow': - return chalk.yellow(str); - case 'blue': - return chalk.blue(str); - case 'magenta': - return chalk.magenta(str); - case 'cyan': - return chalk.cyan(str); - case 'white': - return chalk.white(str); - case 'gray': - case 'grey': - return chalk.gray(str); - default: - return str; - } - } - - return str; -}; - -/** - * Converts markdown text into a string with ANSI escape codes. - * This mirrors the parsing logic in InlineMarkdownRenderer.tsx - */ -export const parseMarkdownToANSI = ( - text: string, - defaultColor?: string, -): string => { - const baseColor = defaultColor ?? theme.text.primary; - // Early return for plain text without markdown or URLs - if (!/[*_~`<[https?:]/.test(text)) { - return ansiColorize(text, baseColor); - } - - let result = ''; - const inlineRegex = - /(\*\*\*.*?\*\*\*|\*\*.*?\*\*|\*.*?\*|_.*?_|~~.*?~~|\[.*?\]\(.*?\)|`+.+?`+|.*?<\/u>|https?:\/\/\S+)/g; - let lastIndex = 0; - let match; - - while ((match = inlineRegex.exec(text)) !== null) { - if (match.index > lastIndex) { - result += ansiColorize(text.slice(lastIndex, match.index), baseColor); - } - - const fullMatch = match[0]; - let styledPart = ''; - - try { - if ( - fullMatch.endsWith('***') && - fullMatch.startsWith('***') && - fullMatch.length > (BOLD_MARKER_LENGTH + ITALIC_MARKER_LENGTH) * 2 - ) { - styledPart = chalk.bold( - chalk.italic( - parseMarkdownToANSI( - fullMatch.slice( - BOLD_MARKER_LENGTH + ITALIC_MARKER_LENGTH, - -BOLD_MARKER_LENGTH - ITALIC_MARKER_LENGTH, - ), - baseColor, - ), - ), - ); - } else if ( - fullMatch.endsWith('**') && - fullMatch.startsWith('**') && - fullMatch.length > BOLD_MARKER_LENGTH * 2 - ) { - styledPart = chalk.bold( - parseMarkdownToANSI( - fullMatch.slice(BOLD_MARKER_LENGTH, -BOLD_MARKER_LENGTH), - baseColor, - ), - ); - } else if ( - fullMatch.length > ITALIC_MARKER_LENGTH * 2 && - ((fullMatch.startsWith('*') && fullMatch.endsWith('*')) || - (fullMatch.startsWith('_') && fullMatch.endsWith('_'))) && - !/\w/.test(text.substring(match.index - 1, match.index)) && - !/\w/.test( - text.substring(inlineRegex.lastIndex, inlineRegex.lastIndex + 1), - ) && - !/\S[./\\]/.test(text.substring(match.index - 2, match.index)) && - !/[./\\]\S/.test( - text.substring(inlineRegex.lastIndex, inlineRegex.lastIndex + 2), - ) - ) { - styledPart = chalk.italic( - parseMarkdownToANSI( - fullMatch.slice(ITALIC_MARKER_LENGTH, -ITALIC_MARKER_LENGTH), - baseColor, - ), - ); - } else if ( - fullMatch.startsWith('~~') && - fullMatch.endsWith('~~') && - fullMatch.length > STRIKETHROUGH_MARKER_LENGTH * 2 - ) { - styledPart = chalk.strikethrough( - parseMarkdownToANSI( - fullMatch.slice( - STRIKETHROUGH_MARKER_LENGTH, - -STRIKETHROUGH_MARKER_LENGTH, - ), - baseColor, - ), - ); - } else if ( - fullMatch.startsWith('`') && - fullMatch.endsWith('`') && - fullMatch.length > INLINE_CODE_MARKER_LENGTH - ) { - const codeMatch = fullMatch.match(/^(`+)(.+?)\1$/s); - if (codeMatch && codeMatch[2]) { - styledPart = ansiColorize(codeMatch[2], theme.text.accent); - } - } else if ( - fullMatch.startsWith('[') && - fullMatch.includes('](') && - fullMatch.endsWith(')') - ) { - const linkMatch = fullMatch.match(/\[(.*?)\]\((.*?)\)/); - if (linkMatch) { - const linkText = linkMatch[1]; - const url = linkMatch[2]; - styledPart = - parseMarkdownToANSI(linkText, baseColor) + - ansiColorize(' (', baseColor) + - ansiColorize(url, theme.text.link) + - ansiColorize(')', baseColor); - } - } else if ( - fullMatch.startsWith('') && - fullMatch.endsWith('') && - fullMatch.length > - UNDERLINE_TAG_START_LENGTH + UNDERLINE_TAG_END_LENGTH - 1 - ) { - styledPart = chalk.underline( - parseMarkdownToANSI( - fullMatch.slice( - UNDERLINE_TAG_START_LENGTH, - -UNDERLINE_TAG_END_LENGTH, - ), - baseColor, - ), - ); - } else if (fullMatch.match(/^https?:\/\//)) { - styledPart = ansiColorize(fullMatch, theme.text.link); - } - } catch (e) { - debugLogger.warn('Error parsing inline markdown part:', fullMatch, e); - styledPart = ''; - } - - result += styledPart || ansiColorize(fullMatch, baseColor); - lastIndex = inlineRegex.lastIndex; - } - - if (lastIndex < text.length) { - result += ansiColorize(text.slice(lastIndex), baseColor); - } - - return result; -}; - const RenderInlineInternal: React.FC = ({ text: rawText, defaultColor, diff --git a/packages/cli/src/ui/utils/TableRenderer.tsx b/packages/cli/src/ui/utils/TableRenderer.tsx index 143b1fe015..6143571f6a 100644 --- a/packages/cli/src/ui/utils/TableRenderer.tsx +++ b/packages/cli/src/ui/utils/TableRenderer.tsx @@ -17,7 +17,7 @@ import { widestLineFromStyledChars, } from 'ink'; import { theme } from '../semantic-colors.js'; -import { parseMarkdownToANSI } from './InlineMarkdownRenderer.js'; +import { parseMarkdownToANSI } from './markdownParsingUtils.js'; import { stripUnsafeCharacters } from './textUtils.js'; interface TableRendererProps { diff --git a/packages/cli/src/ui/utils/markdownParsingUtils.test.ts b/packages/cli/src/ui/utils/markdownParsingUtils.test.ts index 05f19f09f7..a9ff96401f 100644 --- a/packages/cli/src/ui/utils/markdownParsingUtils.test.ts +++ b/packages/cli/src/ui/utils/markdownParsingUtils.test.ts @@ -6,7 +6,7 @@ import { describe, it, expect, beforeAll, vi } from 'vitest'; import chalk from 'chalk'; -import { parseMarkdownToANSI } from './InlineMarkdownRenderer.js'; +import { parseMarkdownToANSI } from './markdownParsingUtils.js'; // Mock the theme to use explicit colors instead of empty strings from the default theme. // This ensures that ansiColorize actually applies ANSI codes that we can verify. diff --git a/packages/cli/src/ui/utils/markdownParsingUtils.ts b/packages/cli/src/ui/utils/markdownParsingUtils.ts new file mode 100644 index 0000000000..10f7cb7a40 --- /dev/null +++ b/packages/cli/src/ui/utils/markdownParsingUtils.ts @@ -0,0 +1,216 @@ +/** + * @license + * Copyright 2025 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import chalk from 'chalk'; +import { + resolveColor, + INK_SUPPORTED_NAMES, + INK_NAME_TO_HEX_MAP, +} from '../themes/color-utils.js'; +import { theme } from '../semantic-colors.js'; +import { debugLogger } from '@google/gemini-cli-core'; + +// Constants for Markdown parsing +const BOLD_MARKER_LENGTH = 2; // For "**" +const ITALIC_MARKER_LENGTH = 1; // For "*" or "_" +const STRIKETHROUGH_MARKER_LENGTH = 2; // For "~~") +const INLINE_CODE_MARKER_LENGTH = 1; // For "`" +const UNDERLINE_TAG_START_LENGTH = 3; // For "" +const UNDERLINE_TAG_END_LENGTH = 4; // For "" + +/** + * Helper to apply color to a string using ANSI escape codes, + * consistent with how Ink's colorize works. + */ +const ansiColorize = (str: string, color: string | undefined): string => { + if (!color) return str; + const resolved = resolveColor(color); + if (!resolved) return str; + + if (resolved.startsWith('#')) { + return chalk.hex(resolved)(str); + } + + const mappedHex = INK_NAME_TO_HEX_MAP[resolved]; + if (mappedHex) { + return chalk.hex(mappedHex)(str); + } + + if (INK_SUPPORTED_NAMES.has(resolved)) { + switch (resolved) { + case 'black': + return chalk.black(str); + case 'red': + return chalk.red(str); + case 'green': + return chalk.green(str); + case 'yellow': + return chalk.yellow(str); + case 'blue': + return chalk.blue(str); + case 'magenta': + return chalk.magenta(str); + case 'cyan': + return chalk.cyan(str); + case 'white': + return chalk.white(str); + case 'gray': + case 'grey': + return chalk.gray(str); + default: + return str; + } + } + + return str; +}; + +/** + * Converts markdown text into a string with ANSI escape codes. + * This mirrors the parsing logic in InlineMarkdownRenderer.tsx + */ +export const parseMarkdownToANSI = ( + text: string, + defaultColor?: string, +): string => { + const baseColor = defaultColor ?? theme.text.primary; + // Early return for plain text without markdown or URLs + if (!/[*_~`<[https?:]/.test(text)) { + return ansiColorize(text, baseColor); + } + + let result = ''; + const inlineRegex = + /(\*\*\*.*?\*\*\*|\*\*.*?\*\*|\*.*?\*|_.*?_|~~.*?~~|\[.*?\]\(.*?\)|`+.+?`+|.*?<\/u>|https?:\/\/\S+)/g; + let lastIndex = 0; + let match; + + while ((match = inlineRegex.exec(text)) !== null) { + if (match.index > lastIndex) { + result += ansiColorize(text.slice(lastIndex, match.index), baseColor); + } + + const fullMatch = match[0]; + let styledPart = ''; + + try { + if ( + fullMatch.endsWith('***') && + fullMatch.startsWith('***') && + fullMatch.length > (BOLD_MARKER_LENGTH + ITALIC_MARKER_LENGTH) * 2 + ) { + styledPart = chalk.bold( + chalk.italic( + parseMarkdownToANSI( + fullMatch.slice( + BOLD_MARKER_LENGTH + ITALIC_MARKER_LENGTH, + -BOLD_MARKER_LENGTH - ITALIC_MARKER_LENGTH, + ), + baseColor, + ), + ), + ); + } else if ( + fullMatch.endsWith('**') && + fullMatch.startsWith('**') && + fullMatch.length > BOLD_MARKER_LENGTH * 2 + ) { + styledPart = chalk.bold( + parseMarkdownToANSI( + fullMatch.slice(BOLD_MARKER_LENGTH, -BOLD_MARKER_LENGTH), + baseColor, + ), + ); + } else if ( + fullMatch.length > ITALIC_MARKER_LENGTH * 2 && + ((fullMatch.startsWith('*') && fullMatch.endsWith('*')) || + (fullMatch.startsWith('_') && fullMatch.endsWith('_'))) && + !/\w/.test(text.substring(match.index - 1, match.index)) && + !/\w/.test( + text.substring(inlineRegex.lastIndex, inlineRegex.lastIndex + 1), + ) && + !/\S[./\\]/.test(text.substring(match.index - 2, match.index)) && + !/[./\\]\S/.test( + text.substring(inlineRegex.lastIndex, inlineRegex.lastIndex + 2), + ) + ) { + styledPart = chalk.italic( + parseMarkdownToANSI( + fullMatch.slice(ITALIC_MARKER_LENGTH, -ITALIC_MARKER_LENGTH), + baseColor, + ), + ); + } else if ( + fullMatch.startsWith('~~') && + fullMatch.endsWith('~~') && + fullMatch.length > STRIKETHROUGH_MARKER_LENGTH * 2 + ) { + styledPart = chalk.strikethrough( + parseMarkdownToANSI( + fullMatch.slice( + STRIKETHROUGH_MARKER_LENGTH, + -STRIKETHROUGH_MARKER_LENGTH, + ), + baseColor, + ), + ); + } else if ( + fullMatch.startsWith('`') && + fullMatch.endsWith('`') && + fullMatch.length > INLINE_CODE_MARKER_LENGTH + ) { + const codeMatch = fullMatch.match(/^(`+)(.+?)\1$/s); + if (codeMatch && codeMatch[2]) { + styledPart = ansiColorize(codeMatch[2], theme.text.accent); + } + } else if ( + fullMatch.startsWith('[') && + fullMatch.includes('](') && + fullMatch.endsWith(')') + ) { + const linkMatch = fullMatch.match(/\[(.*?)\]\((.*?)\)/); + if (linkMatch) { + const linkText = linkMatch[1]; + const url = linkMatch[2]; + styledPart = + parseMarkdownToANSI(linkText, baseColor) + + ansiColorize(' (', baseColor) + + ansiColorize(url, theme.text.link) + + ansiColorize(')', baseColor); + } + } else if ( + fullMatch.startsWith('') && + fullMatch.endsWith('') && + fullMatch.length > + UNDERLINE_TAG_START_LENGTH + UNDERLINE_TAG_END_LENGTH - 1 + ) { + styledPart = chalk.underline( + parseMarkdownToANSI( + fullMatch.slice( + UNDERLINE_TAG_START_LENGTH, + -UNDERLINE_TAG_END_LENGTH, + ), + baseColor, + ), + ); + } else if (fullMatch.match(/^https?:\/\//)) { + styledPart = ansiColorize(fullMatch, theme.text.link); + } + } catch (e) { + debugLogger.warn('Error parsing inline markdown part:', fullMatch, e); + styledPart = ''; + } + + result += styledPart || ansiColorize(fullMatch, baseColor); + lastIndex = inlineRegex.lastIndex; + } + + if (lastIndex < text.length) { + result += ansiColorize(text.slice(lastIndex), baseColor); + } + + return result; +}; From 23905bcd770a2b76803fe3eee5e904c53560b7c9 Mon Sep 17 00:00:00 2001 From: Adib234 <30782825+Adib234@users.noreply.github.com> Date: Fri, 27 Feb 2026 12:51:47 -0500 Subject: [PATCH 37/41] fix(plan): prevent agent from using ask_user for shell command confirmation (#20504) --- .../core/__snapshots__/prompts.test.ts.snap | 36 +++++++++---------- packages/core/src/prompts/snippets.ts | 2 +- .../tools/__snapshots__/shell.test.ts.snap | 8 +++++ .../coreToolsModelSnapshots.test.ts.snap | 6 +++- .../dynamic-declaration-helpers.ts | 8 +++-- .../definitions/model-family-sets/gemini-3.ts | 2 +- 6 files changed, 39 insertions(+), 23 deletions(-) diff --git a/packages/core/src/core/__snapshots__/prompts.test.ts.snap b/packages/core/src/core/__snapshots__/prompts.test.ts.snap index 30603b5443..438251ed1f 100644 --- a/packages/core/src/core/__snapshots__/prompts.test.ts.snap +++ b/packages/core/src/core/__snapshots__/prompts.test.ts.snap @@ -152,7 +152,7 @@ Use the \`exit_plan_mode\` tool to present the plan and formally request approva - **Handling Inability:** If unable/unwilling to fulfill a request, state so briefly without excessive justification. Offer alternatives if appropriate. ## Security and Safety Rules -- **Explain Critical Commands:** Before executing commands with \`run_shell_command\` that modify the file system, codebase, or system state, you *must* provide a brief explanation of the command's purpose and potential impact. Prioritize user understanding and safety. You should not ask permission to use the tool; the user will be presented with a confirmation dialogue upon use (you do not need to tell them this). +- **Explain Critical Commands:** Before executing commands with \`run_shell_command\` that modify the file system, codebase, or system state, you *must* provide a brief explanation of the command's purpose and potential impact. Prioritize user understanding and safety. You should not ask permission to use the tool; the user will be presented with a confirmation dialogue upon use (you do not need to tell them this). You MUST NOT use \`ask_user\` to ask for permission to run a command. - **Security First:** Always apply security best practices. Never introduce code that exposes, logs, or commits secrets, API keys, or other sensitive information. ## Tool Usage @@ -326,7 +326,7 @@ An approved plan is available for this task at \`/tmp/plans/feature-x.md\`. - **Handling Inability:** If unable/unwilling to fulfill a request, state so briefly without excessive justification. Offer alternatives if appropriate. ## Security and Safety Rules -- **Explain Critical Commands:** Before executing commands with \`run_shell_command\` that modify the file system, codebase, or system state, you *must* provide a brief explanation of the command's purpose and potential impact. Prioritize user understanding and safety. You should not ask permission to use the tool; the user will be presented with a confirmation dialogue upon use (you do not need to tell them this). +- **Explain Critical Commands:** Before executing commands with \`run_shell_command\` that modify the file system, codebase, or system state, you *must* provide a brief explanation of the command's purpose and potential impact. Prioritize user understanding and safety. You should not ask permission to use the tool; the user will be presented with a confirmation dialogue upon use (you do not need to tell them this). You MUST NOT use \`ask_user\` to ask for permission to run a command. - **Security First:** Always apply security best practices. Never introduce code that exposes, logs, or commits secrets, API keys, or other sensitive information. ## Tool Usage @@ -607,7 +607,7 @@ Use the \`exit_plan_mode\` tool to present the plan and formally request approva - **Handling Inability:** If unable/unwilling to fulfill a request, state so briefly without excessive justification. Offer alternatives if appropriate. ## Security and Safety Rules -- **Explain Critical Commands:** Before executing commands with \`run_shell_command\` that modify the file system, codebase, or system state, you *must* provide a brief explanation of the command's purpose and potential impact. Prioritize user understanding and safety. You should not ask permission to use the tool; the user will be presented with a confirmation dialogue upon use (you do not need to tell them this). +- **Explain Critical Commands:** Before executing commands with \`run_shell_command\` that modify the file system, codebase, or system state, you *must* provide a brief explanation of the command's purpose and potential impact. Prioritize user understanding and safety. You should not ask permission to use the tool; the user will be presented with a confirmation dialogue upon use (you do not need to tell them this). You MUST NOT use \`ask_user\` to ask for permission to run a command. - **Security First:** Always apply security best practices. Never introduce code that exposes, logs, or commits secrets, API keys, or other sensitive information. ## Tool Usage @@ -758,7 +758,7 @@ Operate using a **Research -> Strategy -> Execution** lifecycle. For the Executi - **Handling Inability:** If unable/unwilling to fulfill a request, state so briefly without excessive justification. Offer alternatives if appropriate. ## Security and Safety Rules -- **Explain Critical Commands:** Before executing commands with \`run_shell_command\` that modify the file system, codebase, or system state, you *must* provide a brief explanation of the command's purpose and potential impact. Prioritize user understanding and safety. You should not ask permission to use the tool; the user will be presented with a confirmation dialogue upon use (you do not need to tell them this). +- **Explain Critical Commands:** Before executing commands with \`run_shell_command\` that modify the file system, codebase, or system state, you *must* provide a brief explanation of the command's purpose and potential impact. Prioritize user understanding and safety. You should not ask permission to use the tool; the user will be presented with a confirmation dialogue upon use (you do not need to tell them this). You MUST NOT use \`ask_user\` to ask for permission to run a command. - **Security First:** Always apply security best practices. Never introduce code that exposes, logs, or commits secrets, API keys, or other sensitive information. ## Tool Usage @@ -897,7 +897,7 @@ Operate using a **Research -> Strategy -> Execution** lifecycle. For the Executi - **Handling Inability:** If unable/unwilling to fulfill a request, state so briefly without excessive justification. Offer alternatives if appropriate. ## Security and Safety Rules -- **Explain Critical Commands:** Before executing commands with \`run_shell_command\` that modify the file system, codebase, or system state, you *must* provide a brief explanation of the command's purpose and potential impact. Prioritize user understanding and safety. You should not ask permission to use the tool; the user will be presented with a confirmation dialogue upon use (you do not need to tell them this). +- **Explain Critical Commands:** Before executing commands with \`run_shell_command\` that modify the file system, codebase, or system state, you *must* provide a brief explanation of the command's purpose and potential impact. Prioritize user understanding and safety. You should not ask permission to use the tool; the user will be presented with a confirmation dialogue upon use (you do not need to tell them this). You MUST NOT use \`ask_user\` to ask for permission to run a command. - **Security First:** Always apply security best practices. Never introduce code that exposes, logs, or commits secrets, API keys, or other sensitive information. ## Tool Usage @@ -1019,7 +1019,7 @@ Operate using a **Research -> Strategy -> Execution** lifecycle. For the Executi - **Handling Inability:** If unable/unwilling to fulfill a request, state so briefly without excessive justification. Offer alternatives if appropriate. ## Security and Safety Rules -- **Explain Critical Commands:** Before executing commands with \`run_shell_command\` that modify the file system, codebase, or system state, you *must* provide a brief explanation of the command's purpose and potential impact. Prioritize user understanding and safety. You should not ask permission to use the tool; the user will be presented with a confirmation dialogue upon use (you do not need to tell them this). +- **Explain Critical Commands:** Before executing commands with \`run_shell_command\` that modify the file system, codebase, or system state, you *must* provide a brief explanation of the command's purpose and potential impact. Prioritize user understanding and safety. You should not ask permission to use the tool; the user will be presented with a confirmation dialogue upon use (you do not need to tell them this). You MUST NOT use \`ask_user\` to ask for permission to run a command. - **Security First:** Always apply security best practices. Never introduce code that exposes, logs, or commits secrets, API keys, or other sensitive information. ## Tool Usage @@ -1656,7 +1656,7 @@ Operate using a **Research -> Strategy -> Execution** lifecycle. For the Executi - **Handling Inability:** If unable/unwilling to fulfill a request, state so briefly without excessive justification. Offer alternatives if appropriate. ## Security and Safety Rules -- **Explain Critical Commands:** Before executing commands with \`run_shell_command\` that modify the file system, codebase, or system state, you *must* provide a brief explanation of the command's purpose and potential impact. Prioritize user understanding and safety. You should not ask permission to use the tool; the user will be presented with a confirmation dialogue upon use (you do not need to tell them this). +- **Explain Critical Commands:** Before executing commands with \`run_shell_command\` that modify the file system, codebase, or system state, you *must* provide a brief explanation of the command's purpose and potential impact. Prioritize user understanding and safety. You should not ask permission to use the tool; the user will be presented with a confirmation dialogue upon use (you do not need to tell them this). You MUST NOT use \`ask_user\` to ask for permission to run a command. - **Security First:** Always apply security best practices. Never introduce code that exposes, logs, or commits secrets, API keys, or other sensitive information. ## Tool Usage @@ -1807,7 +1807,7 @@ Operate using a **Research -> Strategy -> Execution** lifecycle. For the Executi - **Handling Inability:** If unable/unwilling to fulfill a request, state so briefly without excessive justification. Offer alternatives if appropriate. ## Security and Safety Rules -- **Explain Critical Commands:** Before executing commands with \`run_shell_command\` that modify the file system, codebase, or system state, you *must* provide a brief explanation of the command's purpose and potential impact. Prioritize user understanding and safety. You should not ask permission to use the tool; the user will be presented with a confirmation dialogue upon use (you do not need to tell them this). +- **Explain Critical Commands:** Before executing commands with \`run_shell_command\` that modify the file system, codebase, or system state, you *must* provide a brief explanation of the command's purpose and potential impact. Prioritize user understanding and safety. You should not ask permission to use the tool; the user will be presented with a confirmation dialogue upon use (you do not need to tell them this). You MUST NOT use \`ask_user\` to ask for permission to run a command. - **Security First:** Always apply security best practices. Never introduce code that exposes, logs, or commits secrets, API keys, or other sensitive information. ## Tool Usage @@ -1962,7 +1962,7 @@ Operate using a **Research -> Strategy -> Execution** lifecycle. For the Executi - **Handling Inability:** If unable/unwilling to fulfill a request, state so briefly without excessive justification. Offer alternatives if appropriate. ## Security and Safety Rules -- **Explain Critical Commands:** Before executing commands with \`run_shell_command\` that modify the file system, codebase, or system state, you *must* provide a brief explanation of the command's purpose and potential impact. Prioritize user understanding and safety. You should not ask permission to use the tool; the user will be presented with a confirmation dialogue upon use (you do not need to tell them this). +- **Explain Critical Commands:** Before executing commands with \`run_shell_command\` that modify the file system, codebase, or system state, you *must* provide a brief explanation of the command's purpose and potential impact. Prioritize user understanding and safety. You should not ask permission to use the tool; the user will be presented with a confirmation dialogue upon use (you do not need to tell them this). You MUST NOT use \`ask_user\` to ask for permission to run a command. - **Security First:** Always apply security best practices. Never introduce code that exposes, logs, or commits secrets, API keys, or other sensitive information. ## Tool Usage @@ -2117,7 +2117,7 @@ Operate using a **Research -> Strategy -> Execution** lifecycle. For the Executi - **Handling Inability:** If unable/unwilling to fulfill a request, state so briefly without excessive justification. Offer alternatives if appropriate. ## Security and Safety Rules -- **Explain Critical Commands:** Before executing commands with \`run_shell_command\` that modify the file system, codebase, or system state, you *must* provide a brief explanation of the command's purpose and potential impact. Prioritize user understanding and safety. You should not ask permission to use the tool; the user will be presented with a confirmation dialogue upon use (you do not need to tell them this). +- **Explain Critical Commands:** Before executing commands with \`run_shell_command\` that modify the file system, codebase, or system state, you *must* provide a brief explanation of the command's purpose and potential impact. Prioritize user understanding and safety. You should not ask permission to use the tool; the user will be presented with a confirmation dialogue upon use (you do not need to tell them this). You MUST NOT use \`ask_user\` to ask for permission to run a command. - **Security First:** Always apply security best practices. Never introduce code that exposes, logs, or commits secrets, API keys, or other sensitive information. ## Tool Usage @@ -2268,7 +2268,7 @@ Operate using a **Research -> Strategy -> Execution** lifecycle. For the Executi - **Handling Inability:** If unable/unwilling to fulfill a request, state so briefly without excessive justification. Offer alternatives if appropriate. ## Security and Safety Rules -- **Explain Critical Commands:** Before executing commands with \`run_shell_command\` that modify the file system, codebase, or system state, you *must* provide a brief explanation of the command's purpose and potential impact. Prioritize user understanding and safety. You should not ask permission to use the tool; the user will be presented with a confirmation dialogue upon use (you do not need to tell them this). +- **Explain Critical Commands:** Before executing commands with \`run_shell_command\` that modify the file system, codebase, or system state, you *must* provide a brief explanation of the command's purpose and potential impact. Prioritize user understanding and safety. You should not ask permission to use the tool; the user will be presented with a confirmation dialogue upon use (you do not need to tell them this). You MUST NOT use \`ask_user\` to ask for permission to run a command. - **Security First:** Always apply security best practices. Never introduce code that exposes, logs, or commits secrets, API keys, or other sensitive information. ## Tool Usage @@ -2411,7 +2411,7 @@ Operate using a **Research -> Strategy -> Execution** lifecycle. For the Executi - **Handling Inability:** If unable/unwilling to fulfill a request, state so briefly without excessive justification. Offer alternatives if appropriate. ## Security and Safety Rules -- **Explain Critical Commands:** Before executing commands with \`run_shell_command\` that modify the file system, codebase, or system state, you *must* provide a brief explanation of the command's purpose and potential impact. Prioritize user understanding and safety. You should not ask permission to use the tool; the user will be presented with a confirmation dialogue upon use (you do not need to tell them this). +- **Explain Critical Commands:** Before executing commands with \`run_shell_command\` that modify the file system, codebase, or system state, you *must* provide a brief explanation of the command's purpose and potential impact. Prioritize user understanding and safety. You should not ask permission to use the tool; the user will be presented with a confirmation dialogue upon use (you do not need to tell them this). You MUST NOT use \`ask_user\` to ask for permission to run a command. - **Security First:** Always apply security best practices. Never introduce code that exposes, logs, or commits secrets, API keys, or other sensitive information. ## Tool Usage @@ -2561,7 +2561,7 @@ Operate using a **Research -> Strategy -> Execution** lifecycle. For the Executi - **Handling Inability:** If unable/unwilling to fulfill a request, state so briefly without excessive justification. Offer alternatives if appropriate. ## Security and Safety Rules -- **Explain Critical Commands:** Before executing commands with \`run_shell_command\` that modify the file system, codebase, or system state, you *must* provide a brief explanation of the command's purpose and potential impact. Prioritize user understanding and safety. You should not ask permission to use the tool; the user will be presented with a confirmation dialogue upon use (you do not need to tell them this). +- **Explain Critical Commands:** Before executing commands with \`run_shell_command\` that modify the file system, codebase, or system state, you *must* provide a brief explanation of the command's purpose and potential impact. Prioritize user understanding and safety. You should not ask permission to use the tool; the user will be presented with a confirmation dialogue upon use (you do not need to tell them this). You MUST NOT use \`ask_user\` to ask for permission to run a command. - **Security First:** Always apply security best practices. Never introduce code that exposes, logs, or commits secrets, API keys, or other sensitive information. ## Tool Usage @@ -2712,7 +2712,7 @@ Operate using a **Research -> Strategy -> Execution** lifecycle. For the Executi - **Handling Inability:** If unable/unwilling to fulfill a request, state so briefly without excessive justification. Offer alternatives if appropriate. ## Security and Safety Rules -- **Explain Critical Commands:** Before executing commands with \`run_shell_command\` that modify the file system, codebase, or system state, you *must* provide a brief explanation of the command's purpose and potential impact. Prioritize user understanding and safety. You should not ask permission to use the tool; the user will be presented with a confirmation dialogue upon use (you do not need to tell them this). +- **Explain Critical Commands:** Before executing commands with \`run_shell_command\` that modify the file system, codebase, or system state, you *must* provide a brief explanation of the command's purpose and potential impact. Prioritize user understanding and safety. You should not ask permission to use the tool; the user will be presented with a confirmation dialogue upon use (you do not need to tell them this). You MUST NOT use \`ask_user\` to ask for permission to run a command. - **Security First:** Always apply security best practices. Never introduce code that exposes, logs, or commits secrets, API keys, or other sensitive information. ## Tool Usage @@ -3104,7 +3104,7 @@ Operate using a **Research -> Strategy -> Execution** lifecycle. For the Executi - **Handling Inability:** If unable/unwilling to fulfill a request, state so briefly without excessive justification. Offer alternatives if appropriate. ## Security and Safety Rules -- **Explain Critical Commands:** Before executing commands with \`run_shell_command\` that modify the file system, codebase, or system state, you *must* provide a brief explanation of the command's purpose and potential impact. Prioritize user understanding and safety. You should not ask permission to use the tool; the user will be presented with a confirmation dialogue upon use (you do not need to tell them this). +- **Explain Critical Commands:** Before executing commands with \`run_shell_command\` that modify the file system, codebase, or system state, you *must* provide a brief explanation of the command's purpose and potential impact. Prioritize user understanding and safety. You should not ask permission to use the tool; the user will be presented with a confirmation dialogue upon use (you do not need to tell them this). You MUST NOT use \`ask_user\` to ask for permission to run a command. - **Security First:** Always apply security best practices. Never introduce code that exposes, logs, or commits secrets, API keys, or other sensitive information. ## Tool Usage @@ -3255,7 +3255,7 @@ Operate using a **Research -> Strategy -> Execution** lifecycle. For the Executi - **Handling Inability:** If unable/unwilling to fulfill a request, state so briefly without excessive justification. Offer alternatives if appropriate. ## Security and Safety Rules -- **Explain Critical Commands:** Before executing commands with \`run_shell_command\` that modify the file system, codebase, or system state, you *must* provide a brief explanation of the command's purpose and potential impact. Prioritize user understanding and safety. You should not ask permission to use the tool; the user will be presented with a confirmation dialogue upon use (you do not need to tell them this). +- **Explain Critical Commands:** Before executing commands with \`run_shell_command\` that modify the file system, codebase, or system state, you *must* provide a brief explanation of the command's purpose and potential impact. Prioritize user understanding and safety. You should not ask permission to use the tool; the user will be presented with a confirmation dialogue upon use (you do not need to tell them this). You MUST NOT use \`ask_user\` to ask for permission to run a command. - **Security First:** Always apply security best practices. Never introduce code that exposes, logs, or commits secrets, API keys, or other sensitive information. ## Tool Usage @@ -3518,7 +3518,7 @@ Operate using a **Research -> Strategy -> Execution** lifecycle. For the Executi - **Handling Inability:** If unable/unwilling to fulfill a request, state so briefly without excessive justification. Offer alternatives if appropriate. ## Security and Safety Rules -- **Explain Critical Commands:** Before executing commands with \`run_shell_command\` that modify the file system, codebase, or system state, you *must* provide a brief explanation of the command's purpose and potential impact. Prioritize user understanding and safety. You should not ask permission to use the tool; the user will be presented with a confirmation dialogue upon use (you do not need to tell them this). +- **Explain Critical Commands:** Before executing commands with \`run_shell_command\` that modify the file system, codebase, or system state, you *must* provide a brief explanation of the command's purpose and potential impact. Prioritize user understanding and safety. You should not ask permission to use the tool; the user will be presented with a confirmation dialogue upon use (you do not need to tell them this). You MUST NOT use \`ask_user\` to ask for permission to run a command. - **Security First:** Always apply security best practices. Never introduce code that exposes, logs, or commits secrets, API keys, or other sensitive information. ## Tool Usage @@ -3669,7 +3669,7 @@ Operate using a **Research -> Strategy -> Execution** lifecycle. For the Executi - **Handling Inability:** If unable/unwilling to fulfill a request, state so briefly without excessive justification. Offer alternatives if appropriate. ## Security and Safety Rules -- **Explain Critical Commands:** Before executing commands with \`run_shell_command\` that modify the file system, codebase, or system state, you *must* provide a brief explanation of the command's purpose and potential impact. Prioritize user understanding and safety. You should not ask permission to use the tool; the user will be presented with a confirmation dialogue upon use (you do not need to tell them this). +- **Explain Critical Commands:** Before executing commands with \`run_shell_command\` that modify the file system, codebase, or system state, you *must* provide a brief explanation of the command's purpose and potential impact. Prioritize user understanding and safety. You should not ask permission to use the tool; the user will be presented with a confirmation dialogue upon use (you do not need to tell them this). You MUST NOT use \`ask_user\` to ask for permission to run a command. - **Security First:** Always apply security best practices. Never introduce code that exposes, logs, or commits secrets, API keys, or other sensitive information. ## Tool Usage diff --git a/packages/core/src/prompts/snippets.ts b/packages/core/src/prompts/snippets.ts index 982a366c3b..0de9b11e25 100644 --- a/packages/core/src/prompts/snippets.ts +++ b/packages/core/src/prompts/snippets.ts @@ -331,7 +331,7 @@ export function renderOperationalGuidelines( - **Handling Inability:** If unable/unwilling to fulfill a request, state so briefly without excessive justification. Offer alternatives if appropriate. ## Security and Safety Rules -- **Explain Critical Commands:** Before executing commands with ${formatToolName(SHELL_TOOL_NAME)} that modify the file system, codebase, or system state, you *must* provide a brief explanation of the command's purpose and potential impact. Prioritize user understanding and safety. You should not ask permission to use the tool; the user will be presented with a confirmation dialogue upon use (you do not need to tell them this). +- **Explain Critical Commands:** Before executing commands with ${formatToolName(SHELL_TOOL_NAME)} that modify the file system, codebase, or system state, you *must* provide a brief explanation of the command's purpose and potential impact. Prioritize user understanding and safety. You should not ask permission to use the tool; the user will be presented with a confirmation dialogue upon use (you do not need to tell them this). You MUST NOT use ${formatToolName(ASK_USER_TOOL_NAME)} to ask for permission to run a command. - **Security First:** Always apply security best practices. Never introduce code that exposes, logs, or commits secrets, API keys, or other sensitive information. ## Tool Usage diff --git a/packages/core/src/tools/__snapshots__/shell.test.ts.snap b/packages/core/src/tools/__snapshots__/shell.test.ts.snap index 471ce45f6e..b7101cb6b6 100644 --- a/packages/core/src/tools/__snapshots__/shell.test.ts.snap +++ b/packages/core/src/tools/__snapshots__/shell.test.ts.snap @@ -3,6 +3,8 @@ exports[`ShellTool > getDescription > should return the non-windows description when not on windows 1`] = ` "This tool executes a given shell command as \`bash -c \`. Command can start background processes using \`&\`. Command is executed as a subprocess that leads its own process group. Command process group can be terminated as \`kill -- -PGID\` or signaled as \`kill -s SIGNAL -- -PGID\`. + The CLI will automatically prompt the user for confirmation before executing any command provided by this tool, so you MUST NOT ask for permission or confirmation separately (e.g., using ask_user). + Efficiency Guidelines: - Quiet Flags: Always prefer silent or quiet flags (e.g., \`npm install --silent\`, \`git --no-pager\`) to reduce output volume while still capturing necessary information. - Pagination: Always disable terminal pagination to ensure commands terminate (e.g., use \`git --no-pager\`, \`systemctl --no-pager\`, or set \`PAGER=cat\`). @@ -20,6 +22,8 @@ exports[`ShellTool > getDescription > should return the non-windows description exports[`ShellTool > getDescription > should return the windows description when on windows 1`] = ` "This tool executes a given shell command as \`powershell.exe -NoProfile -Command \`. Command can start background processes using PowerShell constructs such as \`Start-Process -NoNewWindow\` or \`Start-Job\`. + The CLI will automatically prompt the user for confirmation before executing any command provided by this tool, so you MUST NOT ask for permission or confirmation separately (e.g., using ask_user). + Efficiency Guidelines: - Quiet Flags: Always prefer silent or quiet flags (e.g., \`npm install --silent\`, \`git --no-pager\`) to reduce output volume while still capturing necessary information. - Pagination: Always disable terminal pagination to ensure commands terminate (e.g., use \`git --no-pager\`, \`systemctl --no-pager\`, or set \`PAGER=cat\`). @@ -37,6 +41,8 @@ exports[`ShellTool > getDescription > should return the windows description when exports[`ShellTool > getSchema > should return the base schema when no modelId is provided 1`] = ` "This tool executes a given shell command as \`bash -c \`. Command can start background processes using \`&\`. Command is executed as a subprocess that leads its own process group. Command process group can be terminated as \`kill -- -PGID\` or signaled as \`kill -s SIGNAL -- -PGID\`. + The CLI will automatically prompt the user for confirmation before executing any command provided by this tool, so you MUST NOT ask for permission or confirmation separately (e.g., using ask_user). + Efficiency Guidelines: - Quiet Flags: Always prefer silent or quiet flags (e.g., \`npm install --silent\`, \`git --no-pager\`) to reduce output volume while still capturing necessary information. - Pagination: Always disable terminal pagination to ensure commands terminate (e.g., use \`git --no-pager\`, \`systemctl --no-pager\`, or set \`PAGER=cat\`). @@ -54,6 +60,8 @@ exports[`ShellTool > getSchema > should return the base schema when no modelId i exports[`ShellTool > getSchema > should return the schema from the resolver when modelId is provided 1`] = ` "This tool executes a given shell command as \`bash -c \`. Command can start background processes using \`&\`. Command is executed as a subprocess that leads its own process group. Command process group can be terminated as \`kill -- -PGID\` or signaled as \`kill -s SIGNAL -- -PGID\`. + The CLI will automatically prompt the user for confirmation before executing any command provided by this tool, so you MUST NOT ask for permission or confirmation separately (e.g., using ask_user). + Efficiency Guidelines: - Quiet Flags: Always prefer silent or quiet flags (e.g., \`npm install --silent\`, \`git --no-pager\`) to reduce output volume while still capturing necessary information. - Pagination: Always disable terminal pagination to ensure commands terminate (e.g., use \`git --no-pager\`, \`systemctl --no-pager\`, or set \`PAGER=cat\`). diff --git a/packages/core/src/tools/definitions/__snapshots__/coreToolsModelSnapshots.test.ts.snap b/packages/core/src/tools/definitions/__snapshots__/coreToolsModelSnapshots.test.ts.snap index 70cf828d86..4700865d06 100644 --- a/packages/core/src/tools/definitions/__snapshots__/coreToolsModelSnapshots.test.ts.snap +++ b/packages/core/src/tools/definitions/__snapshots__/coreToolsModelSnapshots.test.ts.snap @@ -568,6 +568,8 @@ exports[`coreTools snapshots for specific models > Model: gemini-2.5-pro > snaps { "description": "This tool executes a given shell command as \`bash -c \`. To run a command in the background, set the \`is_background\` parameter to true. Do NOT use \`&\` to background commands. Command is executed as a subprocess that leads its own process group. Command process group can be terminated as \`kill -- -PGID\` or signaled as \`kill -s SIGNAL -- -PGID\`. + The CLI will automatically prompt the user for confirmation before executing any command provided by this tool, so you MUST NOT ask for permission or confirmation separately (e.g., using ask_user). + Efficiency Guidelines: - Quiet Flags: Always prefer silent or quiet flags (e.g., \`npm install --silent\`, \`git --no-pager\`) to reduce output volume while still capturing necessary information. - Pagination: Always disable terminal pagination to ensure commands terminate (e.g., use \`git --no-pager\`, \`systemctl --no-pager\`, or set \`PAGER=cat\`). @@ -859,7 +861,7 @@ exports[`coreTools snapshots for specific models > Model: gemini-3-pro-preview > exports[`coreTools snapshots for specific models > Model: gemini-3-pro-preview > snapshot for tool: ask_user 1`] = ` { - "description": "Ask the user one or more questions to gather preferences, clarify requirements, or make decisions. When using this tool, prefer providing multiple-choice options with detailed descriptions and enable multi-select where appropriate to provide maximum flexibility.", + "description": "Ask the user one or more questions to gather preferences, clarify requirements, or make decisions. DO NOT use this tool to ask for permission to run shell commands; the run_shell_command tool has built-in confirmation. When using this tool, prefer providing multiple-choice options with detailed descriptions and enable multi-select where appropriate to provide maximum flexibility.", "name": "ask_user", "parametersJsonSchema": { "properties": { @@ -1331,6 +1333,8 @@ exports[`coreTools snapshots for specific models > Model: gemini-3-pro-preview > { "description": "This tool executes a given shell command as \`bash -c \`. To run a command in the background, set the \`is_background\` parameter to true. Do NOT use \`&\` to background commands. Command is executed as a subprocess that leads its own process group. Command process group can be terminated as \`kill -- -PGID\` or signaled as \`kill -s SIGNAL -- -PGID\`. + The CLI will automatically prompt the user for confirmation before executing any command provided by this tool, so you MUST NOT ask for permission or confirmation separately (e.g., using ask_user). + Efficiency Guidelines: - Quiet Flags: Always prefer silent or quiet flags (e.g., \`npm install --silent\`, \`git --no-pager\`) to reduce output volume while still capturing necessary information. - Pagination: Always disable terminal pagination to ensure commands terminate (e.g., use \`git --no-pager\`, \`systemctl --no-pager\`, or set \`PAGER=cat\`). diff --git a/packages/core/src/tools/definitions/dynamic-declaration-helpers.ts b/packages/core/src/tools/definitions/dynamic-declaration-helpers.ts index 83ed680ce7..562320e57b 100644 --- a/packages/core/src/tools/definitions/dynamic-declaration-helpers.ts +++ b/packages/core/src/tools/definitions/dynamic-declaration-helpers.ts @@ -45,16 +45,20 @@ export function getShellToolDescription( Background PIDs: Only included if background processes were started. Process Group PGID: Only included if available.`; + const confirmationNote = ` + + The CLI will automatically prompt the user for confirmation before executing any command provided by this tool, so you MUST NOT ask for permission or confirmation separately (e.g., using ask_user).`; + if (os.platform() === 'win32') { const backgroundInstructions = enableInteractiveShell ? 'To run a command in the background, set the `is_background` parameter to true. Do NOT use PowerShell background constructs.' : 'Command can start background processes using PowerShell constructs such as `Start-Process -NoNewWindow` or `Start-Job`.'; - return `This tool executes a given shell command as \`powershell.exe -NoProfile -Command \`. ${backgroundInstructions}${efficiencyGuidelines}${returnedInfo}`; + return `This tool executes a given shell command as \`powershell.exe -NoProfile -Command \`. ${backgroundInstructions}${confirmationNote}${efficiencyGuidelines}${returnedInfo}`; } else { const backgroundInstructions = enableInteractiveShell ? 'To run a command in the background, set the `is_background` parameter to true. Do NOT use `&` to background commands.' : 'Command can start background processes using `&`.'; - return `This tool executes a given shell command as \`bash -c \`. ${backgroundInstructions} Command is executed as a subprocess that leads its own process group. Command process group can be terminated as \`kill -- -PGID\` or signaled as \`kill -s SIGNAL -- -PGID\`.${efficiencyGuidelines}${returnedInfo}`; + return `This tool executes a given shell command as \`bash -c \`. ${backgroundInstructions} Command is executed as a subprocess that leads its own process group. Command process group can be terminated as \`kill -- -PGID\` or signaled as \`kill -s SIGNAL -- -PGID\`.${confirmationNote}${efficiencyGuidelines}${returnedInfo}`; } } diff --git a/packages/core/src/tools/definitions/model-family-sets/gemini-3.ts b/packages/core/src/tools/definitions/model-family-sets/gemini-3.ts index 7c4fddc9f6..7c1f171366 100644 --- a/packages/core/src/tools/definitions/model-family-sets/gemini-3.ts +++ b/packages/core/src/tools/definitions/model-family-sets/gemini-3.ts @@ -558,7 +558,7 @@ The agent did not use the todo list because this task could be completed by a ti ask_user: { name: ASK_USER_TOOL_NAME, description: - 'Ask the user one or more questions to gather preferences, clarify requirements, or make decisions. When using this tool, prefer providing multiple-choice options with detailed descriptions and enable multi-select where appropriate to provide maximum flexibility.', + 'Ask the user one or more questions to gather preferences, clarify requirements, or make decisions. DO NOT use this tool to ask for permission to run shell commands; the run_shell_command tool has built-in confirmation. When using this tool, prefer providing multiple-choice options with detailed descriptions and enable multi-select where appropriate to provide maximum flexibility.', parametersJsonSchema: { type: 'object', required: ['questions'], From fdd844b405941dcbed4f28cfc0eade51657c22c4 Mon Sep 17 00:00:00 2001 From: Sehoon Shon Date: Fri, 27 Feb 2026 13:04:43 -0500 Subject: [PATCH 38/41] fix(core): disable retries for code assist streaming requests (#20561) --- packages/core/src/code_assist/server.test.ts | 11 ++--------- packages/core/src/code_assist/server.ts | 14 +++++--------- 2 files changed, 7 insertions(+), 18 deletions(-) diff --git a/packages/core/src/code_assist/server.test.ts b/packages/core/src/code_assist/server.test.ts index aa5cbae7fe..63566c4662 100644 --- a/packages/core/src/code_assist/server.test.ts +++ b/packages/core/src/code_assist/server.test.ts @@ -84,6 +84,7 @@ describe('CodeAssistServer', () => { body: expect.any(String), signal: undefined, retryConfig: { + retryDelay: 1000, retry: 3, noResponseRetries: 3, statusCodesToRetry: [ @@ -410,15 +411,7 @@ describe('CodeAssistServer', () => { 'Content-Type': 'application/json', }, signal: undefined, - retryConfig: { - retry: 3, - noResponseRetries: 3, - statusCodesToRetry: [ - [429, 429], - [499, 499], - [500, 599], - ], - }, + retry: false, }); expect(results).toHaveLength(2); diff --git a/packages/core/src/code_assist/server.ts b/packages/core/src/code_assist/server.ts index dfcc693ca7..2c726b3c1e 100644 --- a/packages/core/src/code_assist/server.ts +++ b/packages/core/src/code_assist/server.ts @@ -62,6 +62,7 @@ export interface HttpOptions { export const CODE_ASSIST_ENDPOINT = 'https://cloudcode-pa.googleapis.com'; export const CODE_ASSIST_API_VERSION = 'v1internal'; +const GENERATE_CONTENT_RETRY_DELAY_IN_MILLISECONDS = 1000; export class CodeAssistServer implements ContentGenerator { constructor( @@ -141,6 +142,7 @@ export class CodeAssistServer implements ContentGenerator { this.sessionId, ), req.config?.abortSignal, + GENERATE_CONTENT_RETRY_DELAY_IN_MILLISECONDS, ); const duration = formatProtoJsonDuration(Date.now() - start); const streamingLatency: StreamingLatency = { @@ -294,6 +296,7 @@ export class CodeAssistServer implements ContentGenerator { method: string, req: object, signal?: AbortSignal, + retryDelay: number = 100, ): Promise { const res = await this.client.request({ url: this.getMethodUrl(method), @@ -306,6 +309,7 @@ export class CodeAssistServer implements ContentGenerator { body: JSON.stringify(req), signal, retryConfig: { + retryDelay, retry: 3, noResponseRetries: 3, statusCodesToRetry: [ @@ -361,15 +365,7 @@ export class CodeAssistServer implements ContentGenerator { responseType: 'stream', body: JSON.stringify(req), signal, - retryConfig: { - retry: 3, - noResponseRetries: 3, - statusCodesToRetry: [ - [429, 429], - [499, 499], - [500, 599], - ], - }, + retry: false, }); return (async function* (): AsyncGenerator { From b2d6844f9b844a8e6281cd2a8afd9bf45170cfeb Mon Sep 17 00:00:00 2001 From: Gaurav <39389231+gsquared94@users.noreply.github.com> Date: Fri, 27 Feb 2026 10:15:06 -0800 Subject: [PATCH 39/41] feat(billing): implement G1 AI credits overage flow with billing telemetry (#18590) --- docs/cli/settings.md | 6 + docs/reference/configuration.md | 9 + packages/cli/src/config/settingsSchema.ts | 30 ++ packages/cli/src/test-utils/render.tsx | 2 + packages/cli/src/ui/AppContainer.tsx | 34 ++ .../cli/src/ui/commands/statsCommand.test.ts | 16 +- packages/cli/src/ui/commands/statsCommand.ts | 18 +- .../src/ui/components/DialogManager.test.tsx | 4 + .../cli/src/ui/components/DialogManager.tsx | 24 ++ .../ui/components/EmptyWalletDialog.test.tsx | 218 +++++++++++++ .../src/ui/components/EmptyWalletDialog.tsx | 110 +++++++ .../cli/src/ui/components/Footer.test.tsx | 6 + .../src/ui/components/HistoryItemDisplay.tsx | 1 + .../ui/components/OverageMenuDialog.test.tsx | 228 +++++++++++++ .../src/ui/components/OverageMenuDialog.tsx | 113 +++++++ .../cli/src/ui/components/StatsDisplay.tsx | 13 + .../EmptyWalletDialog.test.tsx.snap | 49 +++ .../OverageMenuDialog.test.tsx.snap | 47 +++ .../cli/src/ui/contexts/UIActionsContext.tsx | 3 + .../cli/src/ui/contexts/UIStateContext.tsx | 31 ++ .../src/ui/hooks/creditsFlowHandler.test.ts | 240 ++++++++++++++ .../cli/src/ui/hooks/creditsFlowHandler.ts | 290 +++++++++++++++++ .../src/ui/hooks/useQuotaAndFallback.test.ts | 307 +++++++++++++++++- .../cli/src/ui/hooks/useQuotaAndFallback.ts | 109 ++++++- packages/cli/src/ui/types.ts | 1 + packages/core/src/billing/billing.test.ts | 254 +++++++++++++++ packages/core/src/billing/billing.ts | 186 +++++++++++ packages/core/src/billing/index.ts | 7 + .../core/src/code_assist/codeAssist.test.ts | 4 + packages/core/src/code_assist/codeAssist.ts | 2 + packages/core/src/code_assist/converter.ts | 19 ++ packages/core/src/code_assist/server.ts | 102 +++++- packages/core/src/code_assist/setup.ts | 3 + packages/core/src/code_assist/types.ts | 32 ++ packages/core/src/config/config.ts | 43 +++ packages/core/src/core/contentGenerator.ts | 4 +- .../core/src/core/fakeContentGenerator.ts | 3 +- .../core/src/core/loggingContentGenerator.ts | 6 +- packages/core/src/fallback/handler.ts | 3 + packages/core/src/fallback/types.ts | 1 + packages/core/src/index.ts | 5 + .../core/src/telemetry/billingEvents.test.ts | 206 ++++++++++++ packages/core/src/telemetry/billingEvents.ts | 255 +++++++++++++++ .../core/src/telemetry/conseca-logger.test.ts | 1 + packages/core/src/telemetry/index.ts | 4 + packages/core/src/telemetry/loggers.test.ts | 24 +- packages/core/src/telemetry/loggers.ts | 15 + packages/core/src/telemetry/metrics.ts | 52 +++ packages/core/src/telemetry/sanitize.test.ts | 1 + packages/core/src/telemetry/sdk.test.ts | 1 + .../core/src/telemetry/telemetryAttributes.ts | 2 + packages/core/src/utils/googleErrors.ts | 4 +- .../core/src/utils/googleQuotaErrors.test.ts | 20 ++ packages/core/src/utils/googleQuotaErrors.ts | 19 ++ schemas/settings.schema.json | 18 + 55 files changed, 3182 insertions(+), 23 deletions(-) create mode 100644 packages/cli/src/ui/components/EmptyWalletDialog.test.tsx create mode 100644 packages/cli/src/ui/components/EmptyWalletDialog.tsx create mode 100644 packages/cli/src/ui/components/OverageMenuDialog.test.tsx create mode 100644 packages/cli/src/ui/components/OverageMenuDialog.tsx create mode 100644 packages/cli/src/ui/components/__snapshots__/EmptyWalletDialog.test.tsx.snap create mode 100644 packages/cli/src/ui/components/__snapshots__/OverageMenuDialog.test.tsx.snap create mode 100644 packages/cli/src/ui/hooks/creditsFlowHandler.test.ts create mode 100644 packages/cli/src/ui/hooks/creditsFlowHandler.ts create mode 100644 packages/core/src/billing/billing.test.ts create mode 100644 packages/core/src/billing/billing.ts create mode 100644 packages/core/src/billing/index.ts create mode 100644 packages/core/src/telemetry/billingEvents.test.ts create mode 100644 packages/core/src/telemetry/billingEvents.ts diff --git a/docs/cli/settings.md b/docs/cli/settings.md index b0c12116d6..ea5ea1ef93 100644 --- a/docs/cli/settings.md +++ b/docs/cli/settings.md @@ -80,6 +80,12 @@ they appear in the UI. | -------- | ------------- | ---------------------------- | ------- | | IDE Mode | `ide.enabled` | Enable IDE integration mode. | `false` | +### Billing + +| UI Label | Setting | Description | Default | +| ---------------- | ------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------- | ------- | +| Overage Strategy | `billing.overageStrategy` | How to handle quota exhaustion when AI credits are available. 'ask' prompts each time, 'always' automatically uses credits, 'never' disables credit usage. | `"ask"` | + ### Model | UI Label | Setting | Description | Default | diff --git a/docs/reference/configuration.md b/docs/reference/configuration.md index c1c67803b0..5e7e7abacb 100644 --- a/docs/reference/configuration.md +++ b/docs/reference/configuration.md @@ -357,6 +357,15 @@ their corresponding top-level category object in your `settings.json` file. - **Default:** `true` - **Requires restart:** Yes +#### `billing` + +- **`billing.overageStrategy`** (enum): + - **Description:** How to handle quota exhaustion when AI credits are + available. 'ask' prompts each time, 'always' automatically uses credits, + 'never' disables credit usage. + - **Default:** `"ask"` + - **Values:** `"ask"`, `"always"`, `"never"` + #### `model` - **`model.name`** (string): diff --git a/packages/cli/src/config/settingsSchema.ts b/packages/cli/src/config/settingsSchema.ts index 48a7641766..ca538c6a5a 100644 --- a/packages/cli/src/config/settingsSchema.ts +++ b/packages/cli/src/config/settingsSchema.ts @@ -828,6 +828,36 @@ const SETTINGS_SCHEMA = { ref: 'TelemetrySettings', }, + billing: { + type: 'object', + label: 'Billing', + category: 'Advanced', + requiresRestart: false, + default: {}, + description: 'Billing and AI credits settings.', + showInDialog: false, + properties: { + overageStrategy: { + type: 'enum', + label: 'Overage Strategy', + category: 'Advanced', + requiresRestart: false, + default: 'ask', + description: oneLine` + How to handle quota exhaustion when AI credits are available. + 'ask' prompts each time, 'always' automatically uses credits, + 'never' disables credit usage. + `, + showInDialog: true, + options: [ + { value: 'ask', label: 'Ask each time' }, + { value: 'always', label: 'Always use credits' }, + { value: 'never', label: 'Never use credits' }, + ], + }, + }, + }, + model: { type: 'object', label: 'Model', diff --git a/packages/cli/src/test-utils/render.tsx b/packages/cli/src/test-utils/render.tsx index 2cfb89d0f2..921bd3d7dd 100644 --- a/packages/cli/src/test-utils/render.tsx +++ b/packages/cli/src/test-utils/render.tsx @@ -591,6 +591,8 @@ const mockUIActions: UIActions = { handleClearScreen: vi.fn(), handleProQuotaChoice: vi.fn(), handleValidationChoice: vi.fn(), + handleOverageMenuChoice: vi.fn(), + handleEmptyWalletChoice: vi.fn(), setQueueErrorMessage: vi.fn(), popAllMessages: vi.fn(), handleApiKeySubmit: vi.fn(), diff --git a/packages/cli/src/ui/AppContainer.tsx b/packages/cli/src/ui/AppContainer.tsx index 986bcafaa1..1ddee45b0d 100644 --- a/packages/cli/src/ui/AppContainer.tsx +++ b/packages/cli/src/ui/AppContainer.tsx @@ -47,6 +47,7 @@ import { type IdeInfo, type IdeContext, type UserTierId, + type GeminiUserTier, type UserFeedbackPayload, type AgentDefinition, type ApprovalMode, @@ -82,6 +83,8 @@ import { CoreToolCallStatus, generateSteeringAckMessage, buildUserSteeringHintPrompt, + logBillingEvent, + ApiKeyUpdatedEvent, } from '@google/gemini-cli-core'; import { validateAuthMethod } from '../config/auth.js'; import process from 'node:process'; @@ -391,6 +394,9 @@ export const AppContainer = (props: AppContainerProps) => { ? { remaining, limit, resetTime } : undefined; }); + const [paidTier, setPaidTier] = useState( + undefined, + ); const [isConfigInitialized, setConfigInitialized] = useState(false); @@ -686,10 +692,17 @@ export const AppContainer = (props: AppContainerProps) => { handleProQuotaChoice, validationRequest, handleValidationChoice, + // G1 AI Credits + overageMenuRequest, + handleOverageMenuChoice, + emptyWalletRequest, + handleEmptyWalletChoice, } = useQuotaAndFallback({ config, historyManager, userTier, + paidTier, + settings, setModelSwitchedFromQuotaError, onShowAuthSelection: () => setAuthState(AuthState.Updating), }); @@ -729,6 +742,8 @@ export const AppContainer = (props: AppContainerProps) => { const handleAuthSelect = useCallback( async (authType: AuthType | undefined, scope: LoadableSettingScope) => { if (authType) { + const previousAuthType = + config.getContentGeneratorConfig()?.authType ?? 'unknown'; if (authType === AuthType.LOGIN_WITH_GOOGLE) { setAuthContext({ requiresRestart: true }); } else { @@ -741,6 +756,10 @@ export const AppContainer = (props: AppContainerProps) => { config.setRemoteAdminSettings(undefined); await config.refreshAuth(authType); setAuthState(AuthState.Authenticated); + logBillingEvent( + config, + new ApiKeyUpdatedEvent(previousAuthType, authType), + ); } catch (e) { if (e instanceof ChangeAuthRequestedError) { return; @@ -803,6 +822,7 @@ Logging in with Google... Restarting Gemini CLI to continue. // Only sync when not currently authenticating if (authState === AuthState.Authenticated) { setUserTier(config.getUserTier()); + setPaidTier(config.getUserPaidTier()); } }, [config, authState]); @@ -2006,6 +2026,8 @@ Logging in with Google... Restarting Gemini CLI to continue. showIdeRestartPrompt || !!proQuotaRequest || !!validationRequest || + !!overageMenuRequest || + !!emptyWalletRequest || isSessionBrowserOpen || authState === AuthState.AwaitingApiKeyInput || !!newAgents; @@ -2033,6 +2055,8 @@ Logging in with Google... Restarting Gemini CLI to continue. hasLoopDetectionConfirmationRequest || !!proQuotaRequest || !!validationRequest || + !!overageMenuRequest || + !!emptyWalletRequest || !!customDialog; const allowPlanMode = @@ -2243,6 +2267,9 @@ Logging in with Google... Restarting Gemini CLI to continue. stats: quotaStats, proQuotaRequest, validationRequest, + // G1 AI Credits dialog state + overageMenuRequest, + emptyWalletRequest, }, contextFileNames, errorCount, @@ -2367,6 +2394,8 @@ Logging in with Google... Restarting Gemini CLI to continue. quotaStats, proQuotaRequest, validationRequest, + overageMenuRequest, + emptyWalletRequest, contextFileNames, errorCount, availableTerminalHeight, @@ -2448,6 +2477,9 @@ Logging in with Google... Restarting Gemini CLI to continue. handleClearScreen, handleProQuotaChoice, handleValidationChoice, + // G1 AI Credits handlers + handleOverageMenuChoice, + handleEmptyWalletChoice, openSessionBrowser, closeSessionBrowser, handleResumeSession, @@ -2534,6 +2566,8 @@ Logging in with Google... Restarting Gemini CLI to continue. handleClearScreen, handleProQuotaChoice, handleValidationChoice, + handleOverageMenuChoice, + handleEmptyWalletChoice, openSessionBrowser, closeSessionBrowser, handleResumeSession, diff --git a/packages/cli/src/ui/commands/statsCommand.test.ts b/packages/cli/src/ui/commands/statsCommand.test.ts index 63fe3eb9e5..2f36c333b9 100644 --- a/packages/cli/src/ui/commands/statsCommand.test.ts +++ b/packages/cli/src/ui/commands/statsCommand.test.ts @@ -39,11 +39,18 @@ describe('statsCommand', () => { mockContext.session.stats.sessionStartTime = startTime; }); - it('should display general session stats when run with no subcommand', () => { + it('should display general session stats when run with no subcommand', async () => { if (!statsCommand.action) throw new Error('Command has no action'); - // eslint-disable-next-line @typescript-eslint/no-floating-promises - statsCommand.action(mockContext, ''); + mockContext.services.config = { + refreshUserQuota: vi.fn(), + refreshAvailableCredits: vi.fn(), + getUserTierName: vi.fn(), + getUserPaidTier: vi.fn(), + getModel: vi.fn(), + } as unknown as Config; + + await statsCommand.action(mockContext, ''); const expectedDuration = formatDuration( endTime.getTime() - startTime.getTime(), @@ -55,6 +62,7 @@ describe('statsCommand', () => { tier: undefined, userEmail: 'mock@example.com', currentModel: undefined, + creditBalance: undefined, }); }); @@ -78,6 +86,8 @@ describe('statsCommand', () => { getQuotaRemaining: mockGetQuotaRemaining, getQuotaLimit: mockGetQuotaLimit, getQuotaResetTime: mockGetQuotaResetTime, + getUserPaidTier: vi.fn(), + refreshAvailableCredits: vi.fn(), } as unknown as Config; await statsCommand.action(mockContext, ''); diff --git a/packages/cli/src/ui/commands/statsCommand.ts b/packages/cli/src/ui/commands/statsCommand.ts index b90e7309e1..1ded006618 100644 --- a/packages/cli/src/ui/commands/statsCommand.ts +++ b/packages/cli/src/ui/commands/statsCommand.ts @@ -11,7 +11,10 @@ import type { } from '../types.js'; import { MessageType } from '../types.js'; import { formatDuration } from '../utils/formatters.js'; -import { UserAccountManager } from '@google/gemini-cli-core'; +import { + UserAccountManager, + getG1CreditBalance, +} from '@google/gemini-cli-core'; import { type CommandContext, type SlashCommand, @@ -27,8 +30,10 @@ function getUserIdentity(context: CommandContext) { const userEmail = cachedAccount ?? undefined; const tier = context.services.config?.getUserTierName(); + const paidTier = context.services.config?.getUserPaidTier(); + const creditBalance = getG1CreditBalance(paidTier) ?? undefined; - return { selectedAuthType, userEmail, tier }; + return { selectedAuthType, userEmail, tier, creditBalance }; } async function defaultSessionView(context: CommandContext) { @@ -43,7 +48,8 @@ async function defaultSessionView(context: CommandContext) { } const wallDuration = now.getTime() - sessionStartTime.getTime(); - const { selectedAuthType, userEmail, tier } = getUserIdentity(context); + const { selectedAuthType, userEmail, tier, creditBalance } = + getUserIdentity(context); const currentModel = context.services.config?.getModel(); const statsItem: HistoryItemStats = { @@ -53,10 +59,14 @@ async function defaultSessionView(context: CommandContext) { userEmail, tier, currentModel, + creditBalance, }; if (context.services.config) { - const quota = await context.services.config.refreshUserQuota(); + const [quota] = await Promise.all([ + context.services.config.refreshUserQuota(), + context.services.config.refreshAvailableCredits(), + ]); if (quota) { statsItem.quotas = quota; statsItem.pooledRemaining = context.services.config.getQuotaRemaining(); diff --git a/packages/cli/src/ui/components/DialogManager.test.tsx b/packages/cli/src/ui/components/DialogManager.test.tsx index 2dbdd5019b..6329ca89a1 100644 --- a/packages/cli/src/ui/components/DialogManager.test.tsx +++ b/packages/cli/src/ui/components/DialogManager.test.tsx @@ -80,6 +80,8 @@ describe('DialogManager', () => { stats: undefined, proQuotaRequest: null, validationRequest: null, + overageMenuRequest: null, + emptyWalletRequest: null, }, shouldShowIdePrompt: false, isFolderTrustDialogOpen: false, @@ -132,6 +134,8 @@ describe('DialogManager', () => { resolve: vi.fn(), }, validationRequest: null, + overageMenuRequest: null, + emptyWalletRequest: null, }, }, 'ProQuotaDialog', diff --git a/packages/cli/src/ui/components/DialogManager.tsx b/packages/cli/src/ui/components/DialogManager.tsx index c90194052a..32edbc9d3f 100644 --- a/packages/cli/src/ui/components/DialogManager.tsx +++ b/packages/cli/src/ui/components/DialogManager.tsx @@ -18,6 +18,8 @@ import { EditorSettingsDialog } from './EditorSettingsDialog.js'; import { PrivacyNotice } from '../privacy/PrivacyNotice.js'; import { ProQuotaDialog } from './ProQuotaDialog.js'; import { ValidationDialog } from './ValidationDialog.js'; +import { OverageMenuDialog } from './OverageMenuDialog.js'; +import { EmptyWalletDialog } from './EmptyWalletDialog.js'; import { runExitCleanup } from '../../utils/cleanup.js'; import { RELAUNCH_EXIT_CODE } from '../../utils/processUtils.js'; import { SessionBrowser } from './SessionBrowser.js'; @@ -152,6 +154,28 @@ export const DialogManager = ({ /> ); } + if (uiState.quota.overageMenuRequest) { + return ( + + ); + } + if (uiState.quota.emptyWalletRequest) { + return ( + + ); + } if (uiState.shouldShowIdePrompt) { return ( void }, key: string) => { + act(() => { + stdin.write(key); + }); +}; + +describe('EmptyWalletDialog', () => { + const mockOnChoice = vi.fn(); + const mockOnGetCredits = vi.fn(); + + beforeEach(() => { + vi.clearAllMocks(); + }); + + afterEach(() => { + vi.restoreAllMocks(); + }); + + describe('rendering', () => { + it('should match snapshot with fallback available', async () => { + const { lastFrame, unmount, waitUntilReady } = renderWithProviders( + , + ); + await waitUntilReady(); + + expect(lastFrame()).toMatchSnapshot(); + unmount(); + }); + + it('should match snapshot without fallback', async () => { + const { lastFrame, unmount, waitUntilReady } = renderWithProviders( + , + ); + await waitUntilReady(); + + expect(lastFrame()).toMatchSnapshot(); + unmount(); + }); + + it('should display the model name and usage limit message', async () => { + const { lastFrame, unmount, waitUntilReady } = renderWithProviders( + , + ); + await waitUntilReady(); + + const output = lastFrame() ?? ''; + expect(output).toContain('gemini-2.5-pro'); + expect(output).toContain('Usage limit reached'); + unmount(); + }); + + it('should display purchase prompt and credits update notice', async () => { + const { lastFrame, unmount, waitUntilReady } = renderWithProviders( + , + ); + await waitUntilReady(); + + const output = lastFrame() ?? ''; + expect(output).toContain('purchase more AI Credits'); + expect(output).toContain( + 'Newly purchased AI credits may take a few minutes to update', + ); + unmount(); + }); + + it('should display reset time when provided', async () => { + const { lastFrame, unmount, waitUntilReady } = renderWithProviders( + , + ); + await waitUntilReady(); + + const output = lastFrame() ?? ''; + expect(output).toContain('3:45 PM'); + expect(output).toContain('Access resets at'); + unmount(); + }); + + it('should not display reset time when not provided', async () => { + const { lastFrame, unmount, waitUntilReady } = renderWithProviders( + , + ); + await waitUntilReady(); + + const output = lastFrame() ?? ''; + expect(output).not.toContain('Access resets at'); + unmount(); + }); + + it('should display slash command hints', async () => { + const { lastFrame, unmount, waitUntilReady } = renderWithProviders( + , + ); + await waitUntilReady(); + + const output = lastFrame() ?? ''; + expect(output).toContain('/stats'); + expect(output).toContain('/model'); + expect(output).toContain('/auth'); + unmount(); + }); + }); + + describe('onChoice handling', () => { + it('should call onGetCredits and onChoice when get_credits is selected', async () => { + // get_credits is the first item, so just press Enter + const { unmount, stdin, waitUntilReady } = renderWithProviders( + , + ); + await waitUntilReady(); + + writeKey(stdin, '\r'); + + await waitFor(() => { + expect(mockOnGetCredits).toHaveBeenCalled(); + expect(mockOnChoice).toHaveBeenCalledWith('get_credits'); + }); + unmount(); + }); + + it('should call onChoice without onGetCredits when onGetCredits is not provided', async () => { + const { unmount, stdin, waitUntilReady } = renderWithProviders( + , + ); + await waitUntilReady(); + + writeKey(stdin, '\r'); + + await waitFor(() => { + expect(mockOnChoice).toHaveBeenCalledWith('get_credits'); + }); + unmount(); + }); + + it('should call onChoice with use_fallback when selected', async () => { + // With fallback: items are [get_credits, use_fallback, stop] + // use_fallback is the second item: Down + Enter + const { unmount, stdin, waitUntilReady } = renderWithProviders( + , + ); + await waitUntilReady(); + + writeKey(stdin, '\x1b[B'); // Down arrow + writeKey(stdin, '\r'); + + await waitFor(() => { + expect(mockOnChoice).toHaveBeenCalledWith('use_fallback'); + }); + unmount(); + }); + + it('should call onChoice with stop when selected', async () => { + // Without fallback: items are [get_credits, stop] + // stop is the second item: Down + Enter + const { unmount, stdin, waitUntilReady } = renderWithProviders( + , + ); + await waitUntilReady(); + + writeKey(stdin, '\x1b[B'); // Down arrow + writeKey(stdin, '\r'); + + await waitFor(() => { + expect(mockOnChoice).toHaveBeenCalledWith('stop'); + }); + unmount(); + }); + }); +}); diff --git a/packages/cli/src/ui/components/EmptyWalletDialog.tsx b/packages/cli/src/ui/components/EmptyWalletDialog.tsx new file mode 100644 index 0000000000..25d85829d3 --- /dev/null +++ b/packages/cli/src/ui/components/EmptyWalletDialog.tsx @@ -0,0 +1,110 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import type React from 'react'; +import { Box, Text } from 'ink'; +import { RadioButtonSelect } from './shared/RadioButtonSelect.js'; +import { theme } from '../semantic-colors.js'; + +/** Available choices in the empty wallet dialog */ +export type EmptyWalletChoice = 'get_credits' | 'use_fallback' | 'stop'; + +interface EmptyWalletDialogProps { + /** The model that hit the quota limit */ + failedModel: string; + /** The fallback model to offer (omit if none available) */ + fallbackModel?: string; + /** Time when access resets (human-readable) */ + resetTime?: string; + /** Callback to log click and open the browser for purchasing credits */ + onGetCredits?: () => void; + /** Callback when user makes a selection */ + onChoice: (choice: EmptyWalletChoice) => void; +} + +export function EmptyWalletDialog({ + failedModel, + fallbackModel, + resetTime, + onGetCredits, + onChoice, +}: EmptyWalletDialogProps): React.JSX.Element { + const items: Array<{ + label: string; + value: EmptyWalletChoice; + key: string; + }> = [ + { + label: 'Get AI Credits - Open browser to purchase credits', + value: 'get_credits', + key: 'get_credits', + }, + ]; + + if (fallbackModel) { + items.push({ + label: `Switch to ${fallbackModel}`, + value: 'use_fallback', + key: 'use_fallback', + }); + } + + items.push({ + label: 'Stop - Abort request', + value: 'stop', + key: 'stop', + }); + + const handleSelect = (choice: EmptyWalletChoice) => { + if (choice === 'get_credits') { + onGetCredits?.(); + } + onChoice(choice); + }; + + return ( + + + + Usage limit reached for {failedModel}. + + {resetTime && Access resets at {resetTime}.} + + + /stats + {' '} + model for usage details + + + + /model + {' '} + to switch models. + + + + /auth + {' '} + to switch to API key. + + + + To continue using this model now, purchase more AI Credits. + + + + Newly purchased AI credits may take a few minutes to update. + + + + How would you like to proceed? + + + + + + ); +} diff --git a/packages/cli/src/ui/components/Footer.test.tsx b/packages/cli/src/ui/components/Footer.test.tsx index 143e8319a3..2d8662cd5d 100644 --- a/packages/cli/src/ui/components/Footer.test.tsx +++ b/packages/cli/src/ui/components/Footer.test.tsx @@ -177,6 +177,8 @@ describe('