From 44cdb3e3768b505f4cb36d5211d701accb2c57c2 Mon Sep 17 00:00:00 2001 From: Tommaso Sciortino Date: Mon, 30 Mar 2026 13:15:10 -0700 Subject: [PATCH 001/146] fix(cli): resolve missing F12 logs via global console store (#24235) --- packages/cli/src/gemini.tsx | 2 + .../DetailedMessagesDisplay.test.tsx | 25 +- .../ui/components/DetailedMessagesDisplay.tsx | 2 +- .../src/ui/hooks/useConsoleMessages.test.tsx | 200 ++++++---- .../cli/src/ui/hooks/useConsoleMessages.ts | 357 ++++++++---------- 5 files changed, 285 insertions(+), 301 deletions(-) diff --git a/packages/cli/src/gemini.tsx b/packages/cli/src/gemini.tsx index fa22f59267..4872e1b3d1 100644 --- a/packages/cli/src/gemini.tsx +++ b/packages/cli/src/gemini.tsx @@ -93,6 +93,7 @@ import { setupTerminalAndTheme } from './utils/terminalTheme.js'; import { runDeferredCommand } from './deferred.js'; import { cleanupBackgroundLogs } from './utils/logCleanup.js'; import { SlashCommandConflictHandler } from './services/SlashCommandConflictHandler.js'; +import { initializeConsoleStore } from './ui/hooks/useConsoleMessages.js'; export function validateDnsResolutionOrder( order: string | undefined, @@ -294,6 +295,7 @@ export async function main() { process.exit(ExitCodes.FATAL_INPUT_ERROR); } + initializeConsoleStore(); const isDebugMode = cliConfig.isDebugMode(argv); const consolePatcher = new ConsolePatcher({ stderr: true, diff --git a/packages/cli/src/ui/components/DetailedMessagesDisplay.test.tsx b/packages/cli/src/ui/components/DetailedMessagesDisplay.test.tsx index 30f98a6eda..6cb61ea95c 100644 --- a/packages/cli/src/ui/components/DetailedMessagesDisplay.test.tsx +++ b/packages/cli/src/ui/components/DetailedMessagesDisplay.test.tsx @@ -35,10 +35,7 @@ vi.mock('./shared/ScrollableList.js', () => ({ describe('DetailedMessagesDisplay', () => { beforeEach(() => { - vi.mocked(useConsoleMessages).mockReturnValue({ - consoleMessages: [], - clearConsoleMessages: vi.fn(), - }); + vi.mocked(useConsoleMessages).mockReturnValue([]); }); it('renders nothing when messages are empty', async () => { const { lastFrame, unmount } = await renderWithProviders( @@ -58,10 +55,7 @@ describe('DetailedMessagesDisplay', () => { { type: 'error', content: 'Error message', count: 1 }, { type: 'debug', content: 'Debug message', count: 1 }, ]; - vi.mocked(useConsoleMessages).mockReturnValue({ - consoleMessages: messages, - clearConsoleMessages: vi.fn(), - }); + vi.mocked(useConsoleMessages).mockReturnValue(messages); const { lastFrame, unmount } = await renderWithProviders( , @@ -79,10 +73,7 @@ describe('DetailedMessagesDisplay', () => { const messages: ConsoleMessageItem[] = [ { type: 'error', content: 'Error message', count: 1 }, ]; - vi.mocked(useConsoleMessages).mockReturnValue({ - consoleMessages: messages, - clearConsoleMessages: vi.fn(), - }); + vi.mocked(useConsoleMessages).mockReturnValue(messages); const { lastFrame, unmount } = await renderWithProviders( , @@ -98,10 +89,7 @@ describe('DetailedMessagesDisplay', () => { const messages: ConsoleMessageItem[] = [ { type: 'error', content: 'Error message', count: 1 }, ]; - vi.mocked(useConsoleMessages).mockReturnValue({ - consoleMessages: messages, - clearConsoleMessages: vi.fn(), - }); + vi.mocked(useConsoleMessages).mockReturnValue(messages); const { lastFrame, unmount } = await renderWithProviders( , @@ -117,10 +105,7 @@ describe('DetailedMessagesDisplay', () => { const messages: ConsoleMessageItem[] = [ { type: 'log', content: 'Repeated message', count: 5 }, ]; - vi.mocked(useConsoleMessages).mockReturnValue({ - consoleMessages: messages, - clearConsoleMessages: vi.fn(), - }); + vi.mocked(useConsoleMessages).mockReturnValue(messages); const { lastFrame, unmount } = await renderWithProviders( , diff --git a/packages/cli/src/ui/components/DetailedMessagesDisplay.tsx b/packages/cli/src/ui/components/DetailedMessagesDisplay.tsx index 2daa1c39e3..97e456eb99 100644 --- a/packages/cli/src/ui/components/DetailedMessagesDisplay.tsx +++ b/packages/cli/src/ui/components/DetailedMessagesDisplay.tsx @@ -29,7 +29,7 @@ export const DetailedMessagesDisplay: React.FC< > = ({ maxHeight, width, hasFocus }) => { const scrollableListRef = useRef>(null); - const { consoleMessages } = useConsoleMessages(); + const consoleMessages = useConsoleMessages(); const config = useConfig(); const messages = useMemo(() => { diff --git a/packages/cli/src/ui/hooks/useConsoleMessages.test.tsx b/packages/cli/src/ui/hooks/useConsoleMessages.test.tsx index af78f73447..c062c4bc50 100644 --- a/packages/cli/src/ui/hooks/useConsoleMessages.test.tsx +++ b/packages/cli/src/ui/hooks/useConsoleMessages.test.tsx @@ -7,76 +7,93 @@ import { act, useCallback } from 'react'; import { vi } from 'vitest'; import { render } from '../../test-utils/render.js'; -import { useConsoleMessages } from './useConsoleMessages.js'; -import { CoreEvent, type ConsoleLogPayload } from '@google/gemini-cli-core'; - -// Mock coreEvents -let consoleLogHandler: ((payload: ConsoleLogPayload) => void) | undefined; +import { + useConsoleMessages, + useErrorCount, + initializeConsoleStore, +} from './useConsoleMessages.js'; +import { coreEvents } from '@google/gemini-cli-core'; vi.mock('@google/gemini-cli-core', async (importOriginal) => { - // eslint-disable-next-line @typescript-eslint/no-explicit-any - const actual = (await importOriginal()) as any; + const actual = await importOriginal(); + const handlers = new Map void>(); + return { - ...actual, + ...(actual as Record), coreEvents: { - on: vi.fn((event, handler) => { - if (event === CoreEvent.ConsoleLog) { - consoleLogHandler = handler; - } + ...((actual as Record)['coreEvents'] as Record< + string, + unknown + >), + on: vi.fn((event: string, handler: (payload: unknown) => void) => { + handlers.set(event, handler); }), - off: vi.fn((event) => { - if (event === CoreEvent.ConsoleLog) { - consoleLogHandler = undefined; - } + off: vi.fn((event: string) => { + handlers.delete(event); }), - emitConsoleLog: vi.fn(), + // Helper for testing to trigger the handlers + _trigger: (event: string, payload: unknown) => { + handlers.get(event)?.(payload); + }, }, }; }); describe('useConsoleMessages', () => { + let unmounts: Array<() => void> = []; + beforeEach(() => { vi.useFakeTimers(); - consoleLogHandler = undefined; + initializeConsoleStore(); }); afterEach(() => { + for (const unmount of unmounts) { + try { + unmount(); + } catch (_e) { + // Ignore unmount errors + } + } + unmounts = []; vi.runOnlyPendingTimers(); vi.useRealTimers(); vi.restoreAllMocks(); }); const useTestableConsoleMessages = () => { - const { ...rest } = useConsoleMessages(); + const consoleMessages = useConsoleMessages(); const log = useCallback((content: string) => { - if (consoleLogHandler) { - consoleLogHandler({ type: 'log', content }); - } + // @ts-expect-error - internal testing helper + coreEvents._trigger('console-log', { type: 'log', content }); }, []); const error = useCallback((content: string) => { - if (consoleLogHandler) { - consoleLogHandler({ type: 'error', content }); - } + // @ts-expect-error - internal testing helper + coreEvents._trigger('console-log', { type: 'error', content }); + }, []); + const clearConsoleMessages = useCallback(() => { + initializeConsoleStore(); }, []); return { - ...rest, + consoleMessages, log, error, - clearConsoleMessages: rest.clearConsoleMessages, + clearConsoleMessages, }; }; const renderConsoleMessagesHook = async () => { - let hookResult: ReturnType; + let hookResult: ReturnType | undefined; function TestComponent() { hookResult = useTestableConsoleMessages(); return null; } const { unmount } = await render(); + unmounts.push(unmount); return { result: { get current() { - return hookResult; + return hookResult!; }, }, unmount, @@ -93,10 +110,7 @@ describe('useConsoleMessages', () => { act(() => { result.current.log('Test message'); - }); - - await act(async () => { - await vi.advanceTimersByTimeAsync(60); + vi.runAllTimers(); }); expect(result.current.consoleMessages).toEqual([ @@ -111,10 +125,7 @@ describe('useConsoleMessages', () => { result.current.log('Test message'); result.current.log('Test message'); result.current.log('Test message'); - }); - - await act(async () => { - await vi.advanceTimersByTimeAsync(60); + vi.runAllTimers(); }); expect(result.current.consoleMessages).toEqual([ @@ -128,10 +139,7 @@ describe('useConsoleMessages', () => { act(() => { result.current.log('First message'); result.current.error('Second message'); - }); - - await act(async () => { - await vi.advanceTimersByTimeAsync(60); + vi.runAllTimers(); }); expect(result.current.consoleMessages).toEqual([ @@ -139,53 +147,85 @@ describe('useConsoleMessages', () => { { type: 'error', content: 'Second message', count: 1 }, ]); }); +}); - it('should clear all messages when clearConsoleMessages is called', async () => { - const { result } = await renderConsoleMessagesHook(); +describe('useErrorCount', () => { + let unmounts: Array<() => void> = []; - act(() => { - result.current.log('A message'); - }); - - await act(async () => { - await vi.advanceTimersByTimeAsync(60); - }); - - expect(result.current.consoleMessages).toHaveLength(1); - - act(() => { - result.current.clearConsoleMessages(); - }); - - expect(result.current.consoleMessages).toHaveLength(0); + beforeEach(() => { + vi.useFakeTimers(); + initializeConsoleStore(); }); - it('should clear the pending timeout when clearConsoleMessages is called', async () => { - const { result } = await renderConsoleMessagesHook(); - const clearTimeoutSpy = vi.spyOn(global, 'clearTimeout'); - - act(() => { - result.current.log('A message'); - }); - - act(() => { - result.current.clearConsoleMessages(); - }); - - expect(clearTimeoutSpy).toHaveBeenCalled(); - // clearTimeoutSpy.mockRestore() is handled by afterEach restoreAllMocks + afterEach(() => { + for (const unmount of unmounts) { + try { + unmount(); + } catch (_e) { + // Ignore unmount errors + } + } + unmounts = []; + vi.runOnlyPendingTimers(); + vi.useRealTimers(); + vi.restoreAllMocks(); }); - it('should clean up the timeout on unmount', async () => { - const { result, unmount } = await renderConsoleMessagesHook(); - const clearTimeoutSpy = vi.spyOn(global, 'clearTimeout'); + const renderErrorCountHook = async () => { + let hookResult: ReturnType; + function TestComponent() { + hookResult = useErrorCount(); + return null; + } + const { unmount } = await render(); + unmounts.push(unmount); + return { + result: { + get current() { + return hookResult; + }, + }, + unmount, + }; + }; + + it('should initialize with an error count of 0', async () => { + const { result } = await renderErrorCountHook(); + expect(result.current.errorCount).toBe(0); + }); + + it('should increment error count when an error is logged', async () => { + const { result } = await renderErrorCountHook(); + act(() => { + // @ts-expect-error - internal testing helper + coreEvents._trigger('console-log', { type: 'error', content: 'error' }); + vi.runAllTimers(); + }); + expect(result.current.errorCount).toBe(1); + }); + + it('should not increment error count for non-error logs', async () => { + const { result } = await renderErrorCountHook(); + act(() => { + // @ts-expect-error - internal testing helper + coreEvents._trigger('console-log', { type: 'log', content: 'log' }); + vi.runAllTimers(); + }); + expect(result.current.errorCount).toBe(0); + }); + + it('should clear the error count', async () => { + const { result } = await renderErrorCountHook(); + act(() => { + // @ts-expect-error - internal testing helper + coreEvents._trigger('console-log', { type: 'error', content: 'error' }); + vi.runAllTimers(); + }); + expect(result.current.errorCount).toBe(1); act(() => { - result.current.log('A message'); + result.current.clearErrorCount(); }); - - unmount(); - - expect(clearTimeoutSpy).toHaveBeenCalled(); + expect(result.current.errorCount).toBe(0); }); }); diff --git a/packages/cli/src/ui/hooks/useConsoleMessages.ts b/packages/cli/src/ui/hooks/useConsoleMessages.ts index da000a9da1..7cfa0a6ce3 100644 --- a/packages/cli/src/ui/hooks/useConsoleMessages.ts +++ b/packages/cli/src/ui/hooks/useConsoleMessages.ts @@ -4,13 +4,7 @@ * SPDX-License-Identifier: Apache-2.0 */ -import { - useCallback, - useEffect, - useReducer, - useRef, - startTransition, -} from 'react'; +import { useCallback, useSyncExternalStore } from 'react'; import type { ConsoleMessageItem } from '../types.js'; import { coreEvents, @@ -18,207 +12,170 @@ import { type ConsoleLogPayload, } from '@google/gemini-cli-core'; -export interface UseConsoleMessagesReturn { - consoleMessages: ConsoleMessageItem[]; - clearConsoleMessages: () => void; -} - -type Action = - | { type: 'ADD_MESSAGES'; payload: ConsoleMessageItem[] } - | { type: 'CLEAR' }; - -function consoleMessagesReducer( - state: ConsoleMessageItem[], - action: Action, -): ConsoleMessageItem[] { - const MAX_CONSOLE_MESSAGES = 1000; - switch (action.type) { - case 'ADD_MESSAGES': { - const newMessages = [...state]; - for (const queuedMessage of action.payload) { - const lastMessage = newMessages[newMessages.length - 1]; - if ( - lastMessage && - lastMessage.type === queuedMessage.type && - lastMessage.content === queuedMessage.content - ) { - // Create a new object for the last message to ensure React detects - // the change, preventing mutation of the existing state object. - newMessages[newMessages.length - 1] = { - ...lastMessage, - count: lastMessage.count + 1, - }; - } else { - newMessages.push({ ...queuedMessage, count: 1 }); - } - } - - // Limit the number of messages to prevent memory issues - if (newMessages.length > MAX_CONSOLE_MESSAGES) { - return newMessages.slice(newMessages.length - MAX_CONSOLE_MESSAGES); - } - - return newMessages; - } - case 'CLEAR': - return []; - default: - return state; - } -} - -export function useConsoleMessages(): UseConsoleMessagesReturn { - const [consoleMessages, dispatch] = useReducer(consoleMessagesReducer, []); - const messageQueueRef = useRef([]); - const timeoutRef = useRef(null); - const isProcessingRef = useRef(false); - - const processQueue = useCallback(() => { - if (messageQueueRef.current.length > 0) { - isProcessingRef.current = true; - const messagesToProcess = messageQueueRef.current; - messageQueueRef.current = []; - startTransition(() => { - dispatch({ type: 'ADD_MESSAGES', payload: messagesToProcess }); - }); - } - timeoutRef.current = null; - }, []); - - const handleNewMessage = useCallback( - (message: ConsoleMessageItem) => { - messageQueueRef.current.push(message); - if (!isProcessingRef.current && !timeoutRef.current) { - // Batch updates using a timeout. 50ms is a reasonable delay to batch - // rapid-fire messages without noticeable lag while avoiding React update - // queue flooding. - timeoutRef.current = setTimeout(processQueue, 50); - } - }, - [processQueue], - ); - - // Once the updated consoleMessages have been committed to the screen, - // we can safely process the next batch of queued messages if any exist. - // This completely eliminates overlapping concurrent updates to this state. - useEffect(() => { - isProcessingRef.current = false; - if (messageQueueRef.current.length > 0 && !timeoutRef.current) { - timeoutRef.current = setTimeout(processQueue, 50); - } - }, [consoleMessages, processQueue]); - - useEffect(() => { - const handleConsoleLog = (payload: ConsoleLogPayload) => { - let content = payload.content; - const MAX_CONSOLE_MSG_LENGTH = 10000; - if (content.length > MAX_CONSOLE_MSG_LENGTH) { - content = - content.slice(0, MAX_CONSOLE_MSG_LENGTH) + - `... [Truncated ${content.length - MAX_CONSOLE_MSG_LENGTH} characters]`; - } - - handleNewMessage({ - type: payload.type, - content, - count: 1, - }); - }; - - const handleOutput = (payload: { - isStderr: boolean; - chunk: Uint8Array | string; - }) => { - let content = - typeof payload.chunk === 'string' - ? payload.chunk - : new TextDecoder().decode(payload.chunk); - - const MAX_OUTPUT_CHUNK_LENGTH = 10000; - if (content.length > MAX_OUTPUT_CHUNK_LENGTH) { - content = - content.slice(0, MAX_OUTPUT_CHUNK_LENGTH) + - `... [Truncated ${content.length - MAX_OUTPUT_CHUNK_LENGTH} characters]`; - } - - // It would be nice if we could show stderr as 'warn' but unfortunately - // we log non warning info to stderr before the app starts so that would - // be misleading. - handleNewMessage({ type: 'log', content, count: 1 }); - }; - - coreEvents.on(CoreEvent.ConsoleLog, handleConsoleLog); - coreEvents.on(CoreEvent.Output, handleOutput); - return () => { - coreEvents.off(CoreEvent.ConsoleLog, handleConsoleLog); - coreEvents.off(CoreEvent.Output, handleOutput); - }; - }, [handleNewMessage]); - - const clearConsoleMessages = useCallback(() => { - if (timeoutRef.current) { - clearTimeout(timeoutRef.current); - timeoutRef.current = null; - } - messageQueueRef.current = []; - isProcessingRef.current = true; - startTransition(() => { - dispatch({ type: 'CLEAR' }); - }); - }, []); - - // Cleanup on unmount - useEffect( - () => () => { - if (timeoutRef.current) { - clearTimeout(timeoutRef.current); - } - }, - [], - ); - - return { consoleMessages, clearConsoleMessages }; -} - export interface UseErrorCountReturn { errorCount: number; clearErrorCount: () => void; } +// --- Global Console Store --- + +const MAX_CONSOLE_MESSAGES = 1000; +let globalConsoleMessages: ConsoleMessageItem[] = []; +let globalErrorCount = 0; +const listeners = new Set<() => void>(); + +let messageQueue: ConsoleMessageItem[] = []; +let timeoutId: NodeJS.Timeout | null = null; + +/** + * Initializes the global console store and subscribes to coreEvents. + * Acts as a safe reset function, making it idempotent and useful for test isolation. + * Must be called during application startup. + */ +export function initializeConsoleStore() { + if (timeoutId) { + clearTimeout(timeoutId); + timeoutId = null; + } + messageQueue = []; + globalConsoleMessages = []; + globalErrorCount = 0; + notifyListeners(); + + // Safely detach first to ensure idempotency and prevent listener leaks + coreEvents.off(CoreEvent.ConsoleLog, handleConsoleLog); + coreEvents.off(CoreEvent.Output, handleOutput); + + coreEvents.on(CoreEvent.ConsoleLog, handleConsoleLog); + coreEvents.on(CoreEvent.Output, handleOutput); +} + +function notifyListeners() { + for (const listener of listeners) { + listener(); + } +} + +function processQueue() { + if (messageQueue.length === 0) return; + + // Create a new array to trigger React updates + const newMessages = [...globalConsoleMessages]; + + for (const queuedMessage of messageQueue) { + if (queuedMessage.type === 'error') { + globalErrorCount++; + } + + // Coalesce consecutive identical messages + const prev = newMessages[newMessages.length - 1]; + if ( + prev && + prev.type === queuedMessage.type && + prev.content === queuedMessage.content + ) { + newMessages[newMessages.length - 1] = { + ...prev, + count: prev.count + 1, + }; + } else { + newMessages.push({ ...queuedMessage, count: 1 }); + } + } + + globalConsoleMessages = + newMessages.length > MAX_CONSOLE_MESSAGES + ? newMessages.slice(-MAX_CONSOLE_MESSAGES) + : newMessages; + + messageQueue = []; + timeoutId = null; + notifyListeners(); +} + +function handleNewMessage(message: ConsoleMessageItem) { + messageQueue.push(message); + if (!timeoutId) { + // Batch updates using a timeout. 50ms is a reasonable delay to batch + // rapid-fire messages without noticeable lag while avoiding React update + // queue flooding. + timeoutId = setTimeout(processQueue, 50); + } +} + +// --- Subscription API for useSyncExternalStore --- + +function subscribe(listener: () => void) { + listeners.add(listener); + return () => { + listeners.delete(listener); + }; +} + +function getConsoleMessagesSnapshot() { + return globalConsoleMessages; +} + +function getErrorCountSnapshot() { + return globalErrorCount; +} + +// --- Core Event Listeners (Always active at module level) --- + +const handleConsoleLog = (payload: ConsoleLogPayload) => { + let content = payload.content; + const MAX_CONSOLE_MSG_LENGTH = 10000; + if (content.length > MAX_CONSOLE_MSG_LENGTH) { + content = + content.slice(0, MAX_CONSOLE_MSG_LENGTH) + + `... [Truncated ${content.length - MAX_CONSOLE_MSG_LENGTH} characters]`; + } + + handleNewMessage({ + type: payload.type, + content, + count: 1, + }); +}; + +const handleOutput = (payload: { + isStderr: boolean; + chunk: Uint8Array | string; +}) => { + let content = + typeof payload.chunk === 'string' + ? payload.chunk + : new TextDecoder().decode(payload.chunk); + + const MAX_OUTPUT_CHUNK_LENGTH = 10000; + if (content.length > MAX_OUTPUT_CHUNK_LENGTH) { + content = + content.slice(0, MAX_OUTPUT_CHUNK_LENGTH) + + `... [Truncated ${content.length - MAX_OUTPUT_CHUNK_LENGTH} characters]`; + } + + handleNewMessage({ type: 'log', content, count: 1 }); +}; + +/** + * Hook to access the global console message history. + * Decoupled from any component lifecycle to ensure history is preserved even + * when the UI is unmounted. + */ +export function useConsoleMessages(): ConsoleMessageItem[] { + return useSyncExternalStore(subscribe, getConsoleMessagesSnapshot); +} + +/** + * Hook to access the global error count. + * Uses the same external store as useConsoleMessages for consistency. + */ export function useErrorCount(): UseErrorCountReturn { - const [errorCount, dispatch] = useReducer( - (state: number, action: 'INCREMENT' | 'CLEAR') => { - switch (action) { - case 'INCREMENT': - return state + 1; - case 'CLEAR': - return 0; - default: - return state; - } - }, - 0, - ); - - useEffect(() => { - const handleConsoleLog = (payload: ConsoleLogPayload) => { - if (payload.type === 'error') { - startTransition(() => { - dispatch('INCREMENT'); - }); - } - }; - - coreEvents.on(CoreEvent.ConsoleLog, handleConsoleLog); - return () => { - coreEvents.off(CoreEvent.ConsoleLog, handleConsoleLog); - }; - }, []); + const errorCount = useSyncExternalStore(subscribe, getErrorCountSnapshot); const clearErrorCount = useCallback(() => { - startTransition(() => { - dispatch('CLEAR'); - }); + globalErrorCount = 0; + notifyListeners(); }, []); return { errorCount, clearErrorCount }; From 4b20d93e1d2943f04e46c712041a0717d6cc7438 Mon Sep 17 00:00:00 2001 From: Tommaso Sciortino Date: Mon, 30 Mar 2026 22:25:56 +0000 Subject: [PATCH 002/146] fix broken tests (#24279) --- packages/cli/src/gemini.tsx | 2 -- packages/cli/src/interactiveCli.tsx | 2 ++ 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/cli/src/gemini.tsx b/packages/cli/src/gemini.tsx index 4872e1b3d1..fa22f59267 100644 --- a/packages/cli/src/gemini.tsx +++ b/packages/cli/src/gemini.tsx @@ -93,7 +93,6 @@ import { setupTerminalAndTheme } from './utils/terminalTheme.js'; import { runDeferredCommand } from './deferred.js'; import { cleanupBackgroundLogs } from './utils/logCleanup.js'; import { SlashCommandConflictHandler } from './services/SlashCommandConflictHandler.js'; -import { initializeConsoleStore } from './ui/hooks/useConsoleMessages.js'; export function validateDnsResolutionOrder( order: string | undefined, @@ -295,7 +294,6 @@ export async function main() { process.exit(ExitCodes.FATAL_INPUT_ERROR); } - initializeConsoleStore(); const isDebugMode = cliConfig.isDebugMode(argv); const consolePatcher = new ConsolePatcher({ stderr: true, diff --git a/packages/cli/src/interactiveCli.tsx b/packages/cli/src/interactiveCli.tsx index a6337ef29c..2e0cd25619 100644 --- a/packages/cli/src/interactiveCli.tsx +++ b/packages/cli/src/interactiveCli.tsx @@ -46,6 +46,7 @@ import { TerminalProvider } from './ui/contexts/TerminalContext.js'; import { isAlternateBufferEnabled } from './ui/hooks/useAlternateBuffer.js'; import { OverflowProvider } from './ui/contexts/OverflowContext.js'; import { profiler } from './ui/components/DebugProfiler.js'; +import { initializeConsoleStore } from './ui/hooks/useConsoleMessages.js'; const SLOW_RENDER_MS = 200; @@ -57,6 +58,7 @@ export async function startInteractiveUI( resumedSessionData: ResumedSessionData | undefined, initializationResult: InitializationResult, ) { + initializeConsoleStore(); // Never enter Ink alternate buffer mode when screen reader mode is enabled // as there is no benefit of alternate buffer mode when using a screen reader // and the Ink alternate buffer mode requires line wrapping harmful to From 117a2d384465394f7883e60977fa56772e150584 Mon Sep 17 00:00:00 2001 From: Christian Gunderman Date: Mon, 30 Mar 2026 22:02:53 +0000 Subject: [PATCH 003/146] fix(evals): add update_topic behavioral eval (#24223) --- evals/update_topic.eval.ts | 116 +++++++++++++++++++++++++++++++++++++ 1 file changed, 116 insertions(+) create mode 100644 evals/update_topic.eval.ts diff --git a/evals/update_topic.eval.ts b/evals/update_topic.eval.ts new file mode 100644 index 0000000000..1836e7f61b --- /dev/null +++ b/evals/update_topic.eval.ts @@ -0,0 +1,116 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { describe, expect } from 'vitest'; +import { evalTest } from './test-helper.js'; + +describe('update_topic_behavior', () => { + // Constants for tool names and params for robustness + const UPDATE_TOPIC_TOOL_NAME = 'update_topic'; + + /** + * Verifies the desired behavior of the update_topic tool. update_topic is used by the + * agent to share periodic, concise updates about what the agent is working on, independent + * of the regular model output and/or thoughts. This tool is expected to be called at least + * at the start and end of the session, and typically at least once in the middle, but no + * more than 1/4 turns. + */ + evalTest('USUALLY_PASSES', { + name: 'update_topic should be used at start, end and middle for complex tasks', + prompt: `Create a simple users REST API using Express. +1. Initialize a new npm project and install express. +2. Create src/app.ts as the main entry point. +3. Create src/routes/userRoutes.ts for user routes. +4. Create src/controllers/userController.ts for user logic. +5. Implement GET /users, POST /users, and GET /users/:id using an in-memory array. +6. Add a 'start' script to package.json. +7. Finally, run a quick grep to verify the routes are in src/app.ts.`, + files: { + 'package.json': JSON.stringify( + { + name: 'users-api', + version: '1.0.0', + private: true, + }, + null, + 2, + ), + '.gemini/settings.json': JSON.stringify({ + experimental: { + topicUpdateNarration: true, + }, + }), + }, + assert: async (rig, result) => { + const toolLogs = rig.readToolLogs(); + const topicCalls = toolLogs.filter( + (l) => l.toolRequest.name === UPDATE_TOPIC_TOOL_NAME, + ); + + // 1. Assert that update_topic is called at least 3 times (start, middle, end) + expect( + topicCalls.length, + `Expected at least 3 update_topic calls, but found ${topicCalls.length}`, + ).toBeGreaterThanOrEqual(3); + + // 2. Assert update_topic is called at the very beginning (first tool call) + expect( + toolLogs[0].toolRequest.name, + 'First tool call should be update_topic', + ).toBe(UPDATE_TOPIC_TOOL_NAME); + + // 3. Assert update_topic is called near the end + const lastTopicCallIndex = toolLogs + .map((l) => l.toolRequest.name) + .lastIndexOf(UPDATE_TOPIC_TOOL_NAME); + expect( + lastTopicCallIndex, + 'Expected update_topic to be used near the end of the task', + ).toBeGreaterThanOrEqual(toolLogs.length * 0.7); + + // 4. Assert there is at least one update_topic call in the middle (between start and end phases) + const middleTopicCalls = topicCalls.slice(1, -1); + + expect( + middleTopicCalls.length, + 'Expected at least one update_topic call in the middle of the task', + ).toBeGreaterThanOrEqual(1); + + // 5. Turn Ratio Assertion: update_topic should be <= 1/2 of total turns. + // We only enforce this for tasks that take more than 5 turns, as shorter tasks + // naturally have a higher ratio when following the "start, middle, end" rule. + const uniquePromptIds = new Set( + toolLogs + .map((l) => l.toolRequest.prompt_id) + .filter((id) => id !== undefined), + ); + const totalTurns = uniquePromptIds.size; + + if (totalTurns > 5) { + const topicTurns = new Set( + topicCalls + .map((l) => l.toolRequest.prompt_id) + .filter((id) => id !== undefined), + ); + const topicTurnCount = topicTurns.size; + + const ratio = topicTurnCount / totalTurns; + + expect( + ratio, + `update_topic was used in ${topicTurnCount} out of ${totalTurns} turns (${(ratio * 100).toFixed(1)}%). Expected <= 50%.`, + ).toBeLessThanOrEqual(0.5); + + // Ideal ratio is closer to 1/5 (20%). We log high usage as a warning. + if (ratio > 0.25) { + console.warn( + `[Efficiency Warning] update_topic usage is high: ${(ratio * 100).toFixed(1)}% (Goal: ~20%)`, + ); + } + } + }, + }); +}); From dfba0e91e2f2ec8a2c66ec4476ae2c66a82fcd41 Mon Sep 17 00:00:00 2001 From: joshualitt Date: Mon, 30 Mar 2026 15:29:59 -0700 Subject: [PATCH 004/146] feat(core): Unified Context Management and Tool Distillation. (#24157) --- docs/cli/settings.md | 27 +- docs/reference/configuration.md | 64 ++- .../a2a-server/src/utils/testing_utils.ts | 8 +- packages/cli/src/config/config.ts | 12 +- packages/cli/src/config/settingsSchema.ts | 149 ++++-- packages/core/src/config/config.ts | 96 +++- packages/core/src/core/client.test.ts | 17 +- packages/core/src/core/client.ts | 12 +- .../src/scheduler/scheduler_hooks.test.ts | 1 + .../core/src/scheduler/tool-executor.test.ts | 1 + packages/core/src/scheduler/tool-executor.ts | 10 + .../agentHistoryProvider.test.ts.snap | 17 - .../src/services/agentHistoryProvider.test.ts | 468 ++++++++++++++++-- .../core/src/services/agentHistoryProvider.ts | 337 +++++++++++-- .../services/toolDistillationService.test.ts | 101 ++++ .../src/services/toolDistillationService.ts | 293 +++++++++++ packages/core/src/services/types.ts | 15 + packages/core/src/tools/web-fetch.test.ts | 36 ++ packages/core/src/tools/web-fetch.ts | 106 ++-- packages/core/src/utils/tokenCalculation.ts | 4 +- packages/core/src/utils/truncation.ts | 142 ++++++ schemas/settings.schema.json | 115 ++++- 22 files changed, 1717 insertions(+), 314 deletions(-) delete mode 100644 packages/core/src/services/__snapshots__/agentHistoryProvider.test.ts.snap create mode 100644 packages/core/src/services/toolDistillationService.test.ts create mode 100644 packages/core/src/services/toolDistillationService.ts create mode 100644 packages/core/src/services/types.ts create mode 100644 packages/core/src/utils/truncation.ts diff --git a/docs/cli/settings.md b/docs/cli/settings.md index ac1fdc98fc..7e06221b91 100644 --- a/docs/cli/settings.md +++ b/docs/cli/settings.md @@ -155,21 +155,18 @@ they appear in the UI. ### Experimental -| UI Label | Setting | Description | Default | -| ---------------------------------- | ---------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------- | ------- | -| Enable Tool Output Masking | `experimental.toolOutputMasking.enabled` | Enables tool output masking to save tokens. | `true` | -| Enable Git Worktrees | `experimental.worktrees` | Enable automated Git worktree management for parallel work. | `false` | -| Use OSC 52 Paste | `experimental.useOSC52Paste` | Use OSC 52 for pasting. This may be more robust than the default system when using remote terminal sessions (if your terminal is configured to allow it). | `false` | -| Use OSC 52 Copy | `experimental.useOSC52Copy` | Use OSC 52 for copying. This may be more robust than the default system when using remote terminal sessions (if your terminal is configured to allow it). | `false` | -| Plan | `experimental.plan` | Enable Plan Mode. | `true` | -| Model Steering | `experimental.modelSteering` | Enable model steering (user hints) to guide the model during tool execution. | `false` | -| Direct Web Fetch | `experimental.directWebFetch` | Enable web fetch behavior that bypasses LLM summarization. | `false` | -| Memory Manager Agent | `experimental.memoryManager` | Replace the built-in save_memory tool with a memory manager subagent that supports adding, removing, de-duplicating, and organizing memories. | `false` | -| Agent History Truncation | `experimental.agentHistoryTruncation` | Enable truncation window logic for the Agent History Provider. | `false` | -| Agent History Truncation Threshold | `experimental.agentHistoryTruncationThreshold` | The maximum number of messages before history is truncated. | `30` | -| Agent History Retained Messages | `experimental.agentHistoryRetainedMessages` | The number of recent messages to retain after truncation. | `15` | -| Agent History Summarization | `experimental.agentHistorySummarization` | Enable summarization of truncated content via a small model for the Agent History Provider. | `false` | -| Topic & Update Narration | `experimental.topicUpdateNarration` | Enable the experimental Topic & Update communication model for reduced chattiness and structured progress reporting. | `false` | +| UI Label | Setting | Description | Default | +| -------------------------- | ---------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------- | ------- | +| Enable Tool Output Masking | `experimental.toolOutputMasking.enabled` | Enables tool output masking to save tokens. | `true` | +| Enable Git Worktrees | `experimental.worktrees` | Enable automated Git worktree management for parallel work. | `false` | +| Use OSC 52 Paste | `experimental.useOSC52Paste` | Use OSC 52 for pasting. This may be more robust than the default system when using remote terminal sessions (if your terminal is configured to allow it). | `false` | +| Use OSC 52 Copy | `experimental.useOSC52Copy` | Use OSC 52 for copying. This may be more robust than the default system when using remote terminal sessions (if your terminal is configured to allow it). | `false` | +| Plan | `experimental.plan` | Enable Plan Mode. | `true` | +| Model Steering | `experimental.modelSteering` | Enable model steering (user hints) to guide the model during tool execution. | `false` | +| Direct Web Fetch | `experimental.directWebFetch` | Enable web fetch behavior that bypasses LLM summarization. | `false` | +| Memory Manager Agent | `experimental.memoryManager` | Replace the built-in save_memory tool with a memory manager subagent that supports adding, removing, de-duplicating, and organizing memories. | `false` | +| Enable Context Management | `experimental.contextManagement` | Enable logic for context management. | `false` | +| Topic & Update Narration | `experimental.topicUpdateNarration` | Enable the experimental Topic & Update communication model for reduced chattiness and structured progress reporting. | `false` | ### Skills diff --git a/docs/reference/configuration.md b/docs/reference/configuration.md index acfb272754..2d57206d47 100644 --- a/docs/reference/configuration.md +++ b/docs/reference/configuration.md @@ -1702,25 +1702,8 @@ their corresponding top-level category object in your `settings.json` file. - **Default:** `false` - **Requires restart:** Yes -- **`experimental.agentHistoryTruncation`** (boolean): - - **Description:** Enable truncation window logic for the Agent History - Provider. - - **Default:** `false` - - **Requires restart:** Yes - -- **`experimental.agentHistoryTruncationThreshold`** (number): - - **Description:** The maximum number of messages before history is truncated. - - **Default:** `30` - - **Requires restart:** Yes - -- **`experimental.agentHistoryRetainedMessages`** (number): - - **Description:** The number of recent messages to retain after truncation. - - **Default:** `15` - - **Requires restart:** Yes - -- **`experimental.agentHistorySummarization`** (boolean): - - **Description:** Enable summarization of truncated content via a small model - for the Agent History Provider. +- **`experimental.contextManagement`** (boolean): + - **Description:** Enable logic for context management. - **Default:** `false` - **Requires restart:** Yes @@ -1815,6 +1798,49 @@ their corresponding top-level category object in your `settings.json` file. prioritize available tools dynamically. - **Default:** `[]` +#### `contextManagement` + +- **`contextManagement.historyWindow.maxTokens`** (number): + - **Description:** The number of tokens to allow before triggering + compression. + - **Default:** `150000` + - **Requires restart:** Yes + +- **`contextManagement.historyWindow.retainedTokens`** (number): + - **Description:** The number of tokens to always retain. + - **Default:** `40000` + - **Requires restart:** Yes + +- **`contextManagement.messageLimits.normalMaxTokens`** (number): + - **Description:** The target number of tokens to budget for a normal + conversation turn. + - **Default:** `2500` + - **Requires restart:** Yes + +- **`contextManagement.messageLimits.retainedMaxTokens`** (number): + - **Description:** The maximum number of tokens a single conversation turn can + consume before truncation. + - **Default:** `12000` + - **Requires restart:** Yes + +- **`contextManagement.messageLimits.normalizationHeadRatio`** (number): + - **Description:** The ratio of tokens to retain from the beginning of a + truncated message (0.0 to 1.0). + - **Default:** `0.25` + - **Requires restart:** Yes + +- **`contextManagement.toolDistillation.maxOutputTokens`** (number): + - **Description:** Maximum tokens to show when truncating large tool outputs. + - **Default:** `10000` + - **Requires restart:** Yes + +- **`contextManagement.toolDistillation.summarizationThresholdTokens`** + (number): + - **Description:** Threshold above which truncated tool outputs will be + summarized by an LLM. + - **Default:** `20000` + - **Requires restart:** Yes + #### `admin` - **`admin.secureModeEnabled`** (boolean): diff --git a/packages/a2a-server/src/utils/testing_utils.ts b/packages/a2a-server/src/utils/testing_utils.ts index f7f1645f8c..38b914e840 100644 --- a/packages/a2a-server/src/utils/testing_utils.ts +++ b/packages/a2a-server/src/utils/testing_utils.ts @@ -109,12 +109,8 @@ export function createMockConfig( enableEnvironmentVariableRedaction: false, }, }), - isExperimentalAgentHistoryTruncationEnabled: vi.fn().mockReturnValue(false), - getExperimentalAgentHistoryTruncationThreshold: vi.fn().mockReturnValue(50), - getExperimentalAgentHistoryRetainedMessages: vi.fn().mockReturnValue(30), - isExperimentalAgentHistorySummarizationEnabled: vi - .fn() - .mockReturnValue(false), + isAutoDistillationEnabled: vi.fn().mockReturnValue(false), + getContextManagementConfig: vi.fn().mockReturnValue({ enabled: false }), ...overrides, } as unknown as Config; diff --git a/packages/cli/src/config/config.ts b/packages/cli/src/config/config.ts index 25419a2d6c..6919c0d805 100755 --- a/packages/cli/src/config/config.ts +++ b/packages/cli/src/config/config.ts @@ -977,14 +977,10 @@ export async function loadCliConfig( disabledSkills: settings.skills?.disabled, experimentalJitContext: settings.experimental?.jitContext, experimentalMemoryManager: settings.experimental?.memoryManager, - experimentalAgentHistoryTruncation: - settings.experimental?.agentHistoryTruncation, - experimentalAgentHistoryTruncationThreshold: - settings.experimental?.agentHistoryTruncationThreshold, - experimentalAgentHistoryRetainedMessages: - settings.experimental?.agentHistoryRetainedMessages, - experimentalAgentHistorySummarization: - settings.experimental?.agentHistorySummarization, + contextManagement: { + enabled: settings.experimental?.contextManagement, + ...settings?.contextManagement, + }, modelSteering: settings.experimental?.modelSteering, topicUpdateNarration: settings.experimental?.topicUpdateNarration, toolOutputMasking: settings.experimental?.toolOutputMasking, diff --git a/packages/cli/src/config/settingsSchema.ts b/packages/cli/src/config/settingsSchema.ts index c40e87db18..a3adf2dea3 100644 --- a/packages/cli/src/config/settingsSchema.ts +++ b/packages/cli/src/config/settingsSchema.ts @@ -2169,44 +2169,13 @@ const SETTINGS_SCHEMA = { 'Replace the built-in save_memory tool with a memory manager subagent that supports adding, removing, de-duplicating, and organizing memories.', showInDialog: true, }, - agentHistoryTruncation: { + contextManagement: { type: 'boolean', - label: 'Agent History Truncation', + label: 'Enable Context Management', category: 'Experimental', requiresRestart: true, default: false, - description: - 'Enable truncation window logic for the Agent History Provider.', - showInDialog: true, - }, - agentHistoryTruncationThreshold: { - type: 'number', - label: 'Agent History Truncation Threshold', - category: 'Experimental', - requiresRestart: true, - default: 30, - description: - 'The maximum number of messages before history is truncated.', - showInDialog: true, - }, - agentHistoryRetainedMessages: { - type: 'number', - label: 'Agent History Retained Messages', - category: 'Experimental', - requiresRestart: true, - default: 15, - description: - 'The number of recent messages to retain after truncation.', - showInDialog: true, - }, - agentHistorySummarization: { - type: 'boolean', - label: 'Agent History Summarization', - category: 'Experimental', - requiresRestart: true, - default: false, - description: - 'Enable summarization of truncated content via a small model for the Agent History Provider.', + description: 'Enable logic for context management.', showInDialog: true, }, topicUpdateNarration: { @@ -2485,6 +2454,118 @@ const SETTINGS_SCHEMA = { }, }, + contextManagement: { + type: 'object', + label: 'Context Management', + category: 'Experimental', + requiresRestart: true, + default: {}, + description: + 'Settings for agent history and tool distillation context management.', + showInDialog: false, + properties: { + historyWindow: { + type: 'object', + label: 'History Window Settings', + category: 'Context Management', + requiresRestart: true, + default: {}, + showInDialog: false, + properties: { + maxTokens: { + type: 'number', + label: 'Max Tokens', + category: 'Context Management', + requiresRestart: true, + default: 150_000, + description: + 'The number of tokens to allow before triggering compression.', + showInDialog: false, + }, + retainedTokens: { + type: 'number', + label: 'Retained Tokens', + category: 'Context Management', + requiresRestart: true, + default: 40_000, + description: 'The number of tokens to always retain.', + showInDialog: false, + }, + }, + }, + messageLimits: { + type: 'object', + label: 'Message Limits', + category: 'Context Management', + requiresRestart: true, + default: {}, + showInDialog: false, + properties: { + normalMaxTokens: { + type: 'number', + label: 'Normal Maximum Tokens', + category: 'Context Management', + requiresRestart: true, + default: 2500, + description: + 'The target number of tokens to budget for a normal conversation turn.', + showInDialog: false, + }, + retainedMaxTokens: { + type: 'number', + label: 'Retained Maximum Tokens', + category: 'Context Management', + requiresRestart: true, + default: 12000, + description: + 'The maximum number of tokens a single conversation turn can consume before truncation.', + showInDialog: false, + }, + normalizationHeadRatio: { + type: 'number', + label: 'Normalization Head Ratio', + category: 'Context Management', + requiresRestart: true, + default: 0.25, + description: + 'The ratio of tokens to retain from the beginning of a truncated message (0.0 to 1.0).', + showInDialog: false, + }, + }, + }, + toolDistillation: { + type: 'object', + label: 'Tool Distillation', + category: 'Context Management', + requiresRestart: true, + default: {}, + showInDialog: false, + properties: { + maxOutputTokens: { + type: 'number', + label: 'Max Output Tokens', + category: 'Context Management', + requiresRestart: true, + default: 10_000, + description: + 'Maximum tokens to show when truncating large tool outputs.', + showInDialog: false, + }, + summarizationThresholdTokens: { + type: 'number', + label: 'Tool Summarization Threshold', + category: 'Context Management', + requiresRestart: true, + default: 20_000, + description: + 'Threshold above which truncated tool outputs will be summarized by an LLM.', + showInDialog: false, + }, + }, + }, + }, + }, + admin: { type: 'object', label: 'Admin', diff --git a/packages/core/src/config/config.ts b/packages/core/src/config/config.ts index 075c5439ad..ec39016933 100644 --- a/packages/core/src/config/config.ts +++ b/packages/core/src/config/config.ts @@ -11,6 +11,7 @@ import { inspect } from 'node:util'; import process from 'node:process'; import { z } from 'zod'; import type { ConversationRecord } from '../services/chatRecordingService.js'; +import type { AgentHistoryProviderConfig } from '../services/types.js'; export type { ConversationRecord }; import { AuthType, @@ -204,6 +205,23 @@ export interface OutputSettings { format?: OutputFormat; } +export interface ContextManagementConfig { + enabled: boolean; + historyWindow: { + maxTokens: number; + retainedTokens: number; + }; + messageLimits: { + normalMaxTokens: number; + retainedMaxTokens: number; + normalizationHeadRatio: number; + }; + toolDistillation: { + maxOutputTokens: number; + summarizationThresholdTokens: number; + }; +} + export interface ToolOutputMaskingConfig { enabled: boolean; toolProtectionThreshold: number; @@ -674,6 +692,7 @@ export interface ConfigParameters { enableHooks?: boolean; enableHooksUI?: boolean; experiments?: Experiments; + contextManagement?: Partial; hooks?: { [K in HookEventName]?: HookDefinition[] }; disabledHooks?: string[]; projectHooks?: { [K in HookEventName]?: HookDefinition[] }; @@ -683,6 +702,7 @@ export interface ConfigParameters { disabledSkills?: string[]; adminSkillsEnabled?: boolean; experimentalJitContext?: boolean; + autoDistillation?: boolean; experimentalMemoryManager?: boolean; experimentalAgentHistoryTruncation?: boolean; experimentalAgentHistoryTruncationThreshold?: number; @@ -919,13 +939,8 @@ export class Config implements McpContext, AgentLoopContext { private readonly skillsSupport: boolean; private disabledSkills: string[]; private readonly adminSkillsEnabled: boolean; - private readonly experimentalJitContext: boolean; private readonly experimentalMemoryManager: boolean; - private readonly experimentalAgentHistoryTruncation: boolean; - private readonly experimentalAgentHistoryTruncationThreshold: number; - private readonly experimentalAgentHistoryRetainedMessages: number; - private readonly experimentalAgentHistorySummarization: boolean; private readonly memoryBoundaryMarkers: readonly string[]; private readonly topicUpdateNarration: boolean; private readonly disableLLMCorrection: boolean; @@ -934,6 +949,7 @@ export class Config implements McpContext, AgentLoopContext { private readonly planModeRoutingEnabled: boolean; private readonly modelSteering: boolean; private contextManager?: ContextManager; + private readonly contextManagement: ContextManagementConfig; private terminalBackground: string | undefined = undefined; private remoteAdminSettings: AdminControlsSettings | undefined; private latestApiRequest: GenerateContentParameters | undefined; @@ -1136,15 +1152,31 @@ export class Config implements McpContext, AgentLoopContext { this.experimentalJitContext = params.experimentalJitContext ?? true; this.experimentalMemoryManager = params.experimentalMemoryManager ?? false; - this.experimentalAgentHistoryTruncation = - params.experimentalAgentHistoryTruncation ?? false; - this.experimentalAgentHistoryTruncationThreshold = - params.experimentalAgentHistoryTruncationThreshold ?? 30; - this.experimentalAgentHistoryRetainedMessages = - params.experimentalAgentHistoryRetainedMessages ?? 15; - this.experimentalAgentHistorySummarization = - params.experimentalAgentHistorySummarization ?? false; this.memoryBoundaryMarkers = params.memoryBoundaryMarkers ?? ['.git']; + this.contextManagement = { + enabled: params.contextManagement?.enabled ?? false, + historyWindow: { + maxTokens: params.contextManagement?.historyWindow?.maxTokens ?? 150000, + retainedTokens: + params.contextManagement?.historyWindow?.retainedTokens ?? 40000, + }, + messageLimits: { + normalMaxTokens: + params.contextManagement?.messageLimits?.normalMaxTokens ?? 2500, + retainedMaxTokens: + params.contextManagement?.messageLimits?.retainedMaxTokens ?? 12000, + normalizationHeadRatio: + params.contextManagement?.messageLimits?.normalizationHeadRatio ?? + 0.25, + }, + toolDistillation: { + maxOutputTokens: + params.contextManagement?.toolDistillation?.maxOutputTokens ?? 10000, + summarizationThresholdTokens: + params.contextManagement?.toolDistillation + ?.summarizationThresholdTokens ?? 20000, + }, + }; this.topicUpdateNarration = params.topicUpdateNarration ?? false; this.modelSteering = params.modelSteering ?? false; this.injectionService = new InjectionService(() => @@ -2330,6 +2362,10 @@ export class Config implements McpContext, AgentLoopContext { return this.experimentalJitContext; } + isAutoDistillationEnabled(): boolean { + return this.contextManagement.enabled; + } + getMemoryBoundaryMarkers(): readonly string[] { return this.memoryBoundaryMarkers; } @@ -2338,20 +2374,22 @@ export class Config implements McpContext, AgentLoopContext { return this.experimentalMemoryManager; } - isExperimentalAgentHistoryTruncationEnabled(): boolean { - return this.experimentalAgentHistoryTruncation; + getContextManagementConfig(): ContextManagementConfig { + return this.contextManagement; } - getExperimentalAgentHistoryTruncationThreshold(): number { - return this.experimentalAgentHistoryTruncationThreshold; - } - - getExperimentalAgentHistoryRetainedMessages(): number { - return this.experimentalAgentHistoryRetainedMessages; - } - - isExperimentalAgentHistorySummarizationEnabled(): boolean { - return this.experimentalAgentHistorySummarization; + get agentHistoryProviderConfig(): AgentHistoryProviderConfig { + return { + isTruncationEnabled: this.contextManagement.enabled, + isSummarizationEnabled: this.contextManagement.enabled, + maxTokens: this.contextManagement.historyWindow.maxTokens, + retainedTokens: this.contextManagement.historyWindow.retainedTokens, + normalMessageTokens: this.contextManagement.messageLimits.normalMaxTokens, + maximumMessageTokens: + this.contextManagement.messageLimits.retainedMaxTokens, + normalizationHeadRatio: + this.contextManagement.messageLimits.normalizationHeadRatio, + }; } isTopicUpdateNarrationEnabled(): boolean { @@ -3241,6 +3279,14 @@ export class Config implements McpContext, AgentLoopContext { ); } + getToolMaxOutputTokens(): number { + return this.contextManagement.toolDistillation.maxOutputTokens; + } + + getToolSummarizationThresholdTokens(): number { + return this.contextManagement.toolDistillation.summarizationThresholdTokens; + } + getNextCompressionTruncationId(): number { return ++this.compressionTruncationCounter; } diff --git a/packages/core/src/core/client.test.ts b/packages/core/src/core/client.test.ts index e741092ce9..033b674cb5 100644 --- a/packages/core/src/core/client.test.ts +++ b/packages/core/src/core/client.test.ts @@ -279,16 +279,9 @@ describe('Gemini Client (client.ts)', () => { getActiveModel: vi.fn().mockReturnValue('test-model'), setActiveModel: vi.fn(), resetTurn: vi.fn(), - isExperimentalAgentHistoryTruncationEnabled: vi - .fn() - .mockReturnValue(false), - getExperimentalAgentHistoryTruncationThreshold: vi - .fn() - .mockReturnValue(30), - getExperimentalAgentHistoryRetainedMessages: vi.fn().mockReturnValue(15), - isExperimentalAgentHistorySummarizationEnabled: vi - .fn() - .mockReturnValue(false), + + isAutoDistillationEnabled: vi.fn().mockReturnValue(false), + getContextManagementConfig: vi.fn().mockReturnValue({ enabled: false }), getModelAvailabilityService: vi .fn() .mockReturnValue(createAvailabilityServiceMock()), @@ -716,9 +709,9 @@ describe('Gemini Client (client.ts)', () => { describe('sendMessageStream', () => { it('calls AgentHistoryProvider.manageHistory when history truncation is enabled', async () => { // Arrange - mockConfig.isExperimentalAgentHistoryTruncationEnabled = vi + mockConfig.getContextManagementConfig = vi .fn() - .mockReturnValue(true); + .mockReturnValue({ enabled: true }); const manageHistorySpy = vi .spyOn( // eslint-disable-next-line @typescript-eslint/no-explicit-any diff --git a/packages/core/src/core/client.ts b/packages/core/src/core/client.ts index 42adab3a05..765ea6df45 100644 --- a/packages/core/src/core/client.ts +++ b/packages/core/src/core/client.ts @@ -115,12 +115,10 @@ export class GeminiClient { constructor(private readonly context: AgentLoopContext) { this.loopDetector = new LoopDetectionService(this.config); this.compressionService = new ChatCompressionService(); - this.agentHistoryProvider = new AgentHistoryProvider(this.config, { - truncationThreshold: - this.config.getExperimentalAgentHistoryTruncationThreshold(), - retainedMessages: - this.config.getExperimentalAgentHistoryRetainedMessages(), - }); + this.agentHistoryProvider = new AgentHistoryProvider( + this.config.agentHistoryProviderConfig, + this.config, + ); this.toolOutputMaskingService = new ToolOutputMaskingService(); this.lastPromptId = this.config.getSessionId(); @@ -621,7 +619,7 @@ export class GeminiClient { // Check for context window overflow const modelForLimitCheck = this._getActiveModelForCurrentTurn(); - if (this.config.isExperimentalAgentHistoryTruncationEnabled()) { + if (this.config.getContextManagementConfig().enabled) { const newHistory = await this.agentHistoryProvider.manageHistory( this.getHistory(), signal, diff --git a/packages/core/src/scheduler/scheduler_hooks.test.ts b/packages/core/src/scheduler/scheduler_hooks.test.ts index 9f7796ffe9..a447d72f1f 100644 --- a/packages/core/src/scheduler/scheduler_hooks.test.ts +++ b/packages/core/src/scheduler/scheduler_hooks.test.ts @@ -75,6 +75,7 @@ function createMockConfig(overrides: Partial = {}): Config { ({ check: async () => ({ decision: 'allow' }), }) as unknown as PolicyEngine, + isAutoDistillationEnabled: () => false, } as unknown as Config; const mockConfig = Object.assign({}, baseConfig, overrides) as Config; diff --git a/packages/core/src/scheduler/tool-executor.test.ts b/packages/core/src/scheduler/tool-executor.test.ts index d94877ef7f..e6f82e149c 100644 --- a/packages/core/src/scheduler/tool-executor.test.ts +++ b/packages/core/src/scheduler/tool-executor.test.ts @@ -75,6 +75,7 @@ describe('ToolExecutor', () => { vi.mocked(fileUtils.formatTruncatedToolOutput).mockReturnValue( 'TruncatedContent...', ); + vi.spyOn(config, 'isAutoDistillationEnabled').mockReturnValue(false); }); afterEach(() => { diff --git a/packages/core/src/scheduler/tool-executor.ts b/packages/core/src/scheduler/tool-executor.ts index a761d3896f..367d69bbfb 100644 --- a/packages/core/src/scheduler/tool-executor.ts +++ b/packages/core/src/scheduler/tool-executor.ts @@ -19,6 +19,7 @@ import { import { isAbortError } from '../utils/errors.js'; import { SHELL_TOOL_NAME } from '../tools/tool-names.js'; import { DiscoveredMCPTool } from '../tools/mcp-tool.js'; +import { ToolOutputDistillationService } from '../services/toolDistillationService.js'; import { executeToolWithHooks } from '../core/coreToolHookTriggers.js'; import { saveTruncatedToolOutput, @@ -196,6 +197,15 @@ export class ToolExecutor { call: ToolCall, content: PartListUnion, ): Promise<{ truncatedContent: PartListUnion; outputFile?: string }> { + if (this.config.isAutoDistillationEnabled()) { + const distiller = new ToolOutputDistillationService( + this.config, + this.context.geminiClient, + this.context.promptId, + ); + return distiller.distill(call.request.name, call.request.callId, content); + } + const toolName = call.request.name; const callId = call.request.callId; let outputFile: string | undefined; diff --git a/packages/core/src/services/__snapshots__/agentHistoryProvider.test.ts.snap b/packages/core/src/services/__snapshots__/agentHistoryProvider.test.ts.snap deleted file mode 100644 index af7990ad52..0000000000 --- a/packages/core/src/services/__snapshots__/agentHistoryProvider.test.ts.snap +++ /dev/null @@ -1,17 +0,0 @@ -// Vitest Snapshot v1, https://vitest.dev/guide/snapshot.html - -exports[`AgentHistoryProvider > should handle summarizer failures gracefully 1`] = ` -{ - "parts": [ - { - "text": "[System Note: Prior conversation history was truncated. The most recent user message before truncation was:] - -Message 18", - }, - { - "text": "Message 20", - }, - ], - "role": "user", -} -`; diff --git a/packages/core/src/services/agentHistoryProvider.test.ts b/packages/core/src/services/agentHistoryProvider.test.ts index 7906398bb9..59da3a722c 100644 --- a/packages/core/src/services/agentHistoryProvider.test.ts +++ b/packages/core/src/services/agentHistoryProvider.test.ts @@ -6,13 +6,28 @@ import { describe, it, expect, vi, beforeEach } from 'vitest'; import { AgentHistoryProvider } from './agentHistoryProvider.js'; -import type { Content, GenerateContentResponse } from '@google/genai'; -import type { Config } from '../config/config.js'; +import { estimateTokenCountSync } from '../utils/tokenCalculation.js'; + +vi.mock('../utils/tokenCalculation.js', () => ({ + estimateTokenCountSync: vi.fn(), + ASCII_TOKENS_PER_CHAR: 0.25, + NON_ASCII_TOKENS_PER_CHAR: 1.3, +})); + +import type { Content, GenerateContentResponse, Part } from '@google/genai'; +import type { Config, ContextManagementConfig } from '../config/config.js'; import type { BaseLlmClient } from '../core/baseLlmClient.js'; +import type { AgentHistoryProviderConfig } from './types.js'; +import { + TEXT_TRUNCATION_PREFIX, + TOOL_TRUNCATION_PREFIX, + truncateProportionally, +} from 'src/utils/truncation.js'; describe('AgentHistoryProvider', () => { let config: Config; let provider: AgentHistoryProvider; + let providerConfig: AgentHistoryProviderConfig; let generateContentMock: ReturnType; beforeEach(() => { @@ -20,12 +35,14 @@ describe('AgentHistoryProvider', () => { isExperimentalAgentHistoryTruncationEnabled: vi .fn() .mockReturnValue(false), - isExperimentalAgentHistorySummarizationEnabled: vi - .fn() - .mockReturnValue(false), + getContextManagementConfig: vi.fn().mockReturnValue(false), getBaseLlmClient: vi.fn(), } as unknown as Config; + // By default, messages are small + vi.mocked(estimateTokenCountSync).mockImplementation( + (parts: Part[]) => parts.length * 100, + ); generateContentMock = vi.fn().mockResolvedValue({ candidates: [{ content: { parts: [{ text: 'Mock intent summary' }] } }], } as unknown as GenerateContentResponse); @@ -33,11 +50,16 @@ describe('AgentHistoryProvider', () => { config.getBaseLlmClient = vi.fn().mockReturnValue({ generateContent: generateContentMock, } as unknown as BaseLlmClient); - - provider = new AgentHistoryProvider(config, { - truncationThreshold: 30, - retainedMessages: 15, - }); + providerConfig = { + maxTokens: 60000, + retainedTokens: 40000, + normalMessageTokens: 2500, + maximumMessageTokens: 10000, + normalizationHeadRatio: 0.2, + isSummarizationEnabled: false, + isTruncationEnabled: false, + }; + provider = new AgentHistoryProvider(providerConfig, config); }); const createMockHistory = (count: number): Content[] => @@ -47,10 +69,7 @@ describe('AgentHistoryProvider', () => { })); it('should return history unchanged if truncation is disabled', async () => { - vi.spyOn( - config, - 'isExperimentalAgentHistoryTruncationEnabled', - ).mockReturnValue(false); + providerConfig.isTruncationEnabled = false; const history = createMockHistory(40); const result = await provider.manageHistory(history); @@ -60,10 +79,7 @@ describe('AgentHistoryProvider', () => { }); it('should return history unchanged if length is under threshold', async () => { - vi.spyOn( - config, - 'isExperimentalAgentHistoryTruncationEnabled', - ).mockReturnValue(true); + providerConfig.isTruncationEnabled = true; const history = createMockHistory(20); // Threshold is 30 const result = await provider.manageHistory(history); @@ -72,60 +88,72 @@ describe('AgentHistoryProvider', () => { expect(result.length).toBe(20); }); - it('should truncate mechanically to RETAINED_MESSAGES without summarization when sum flag is off', async () => { - vi.spyOn( - config, - 'isExperimentalAgentHistoryTruncationEnabled', - ).mockReturnValue(true); - vi.spyOn( - config, - 'isExperimentalAgentHistorySummarizationEnabled', - ).mockReturnValue(false); + it('should truncate when total tokens exceed budget, preserving structural integrity', async () => { + providerConfig.isTruncationEnabled = true; + providerConfig.maxTokens = 60000; + providerConfig.retainedTokens = 60000; + vi.spyOn(config, 'getContextManagementConfig').mockReturnValue({ + enabled: false, + } as unknown as ContextManagementConfig); - const history = createMockHistory(35); // Above 30 threshold, should truncate to 15 + // Make each message cost 4000 tokens + vi.mocked(estimateTokenCountSync).mockImplementation( + (parts: Part[]) => parts.length * 4000, + ); + const history = createMockHistory(35); // 35 * 4000 = 140,000 total tokens > maxTokens const result = await provider.manageHistory(history); + // Budget = 60000. Each message costs 4000. 60000 / 4000 = 15. + // However, some messages get normalized. + // The grace period is 15 messages. Their target is MAXIMUM_MESSAGE_TOKENS (10000). + // So the 15 newest messages remain at 4000 tokens each. + // That's 15 * 4000 = 60000 tokens EXACTLY! + // The next older message will push it over budget. + // So EXACTLY 15 messages will be retained. + // If the 15th newest message is a user message with a functionResponse, it might pull in the model call. + // In our createMockHistory, we don't use functionResponses. expect(result.length).toBe(15); expect(generateContentMock).not.toHaveBeenCalled(); - // Check fallback message logic - // Messages 20 to 34 are retained. Message 20 is 'user'. expect(result[0].role).toBe('user'); expect(result[0].parts![0].text).toContain( - 'System Note: Prior conversation history was truncated', + '### [System Note: Conversation History Truncated]', ); }); it('should call summarizer and prepend summary when summarization is enabled', async () => { - vi.spyOn( - config, - 'isExperimentalAgentHistoryTruncationEnabled', - ).mockReturnValue(true); - vi.spyOn( - config, - 'isExperimentalAgentHistorySummarizationEnabled', - ).mockReturnValue(true); + providerConfig.isTruncationEnabled = true; + providerConfig.isSummarizationEnabled = true; + providerConfig.maxTokens = 60000; + providerConfig.retainedTokens = 60000; + vi.spyOn(config, 'getContextManagementConfig').mockReturnValue({ + enabled: true, + } as unknown as ContextManagementConfig); + vi.mocked(estimateTokenCountSync).mockImplementation( + (parts: Part[]) => parts.length * 4000, + ); const history = createMockHistory(35); const result = await provider.manageHistory(history); expect(generateContentMock).toHaveBeenCalled(); - expect(result.length).toBe(15); // retained messages + expect(result.length).toBe(15); expect(result[0].role).toBe('user'); expect(result[0].parts![0].text).toContain(''); expect(result[0].parts![0].text).toContain('Mock intent summary'); }); it('should handle summarizer failures gracefully', async () => { - vi.spyOn( - config, - 'isExperimentalAgentHistoryTruncationEnabled', - ).mockReturnValue(true); - vi.spyOn( - config, - 'isExperimentalAgentHistorySummarizationEnabled', - ).mockReturnValue(true); - + providerConfig.isTruncationEnabled = true; + providerConfig.isSummarizationEnabled = true; + providerConfig.maxTokens = 60000; + providerConfig.retainedTokens = 60000; + vi.spyOn(config, 'getContextManagementConfig').mockReturnValue({ + enabled: true, + } as unknown as ContextManagementConfig); + vi.mocked(estimateTokenCountSync).mockImplementation( + (parts: Part[]) => parts.length * 4000, + ); generateContentMock.mockRejectedValue(new Error('API Error')); const history = createMockHistory(35); @@ -133,6 +161,346 @@ describe('AgentHistoryProvider', () => { expect(generateContentMock).toHaveBeenCalled(); expect(result.length).toBe(15); - expect(result[0]).toMatchSnapshot(); + // Should fallback to fallback text + expect(result[0].parts![0].text).toContain( + '[System Note: Conversation History Truncated]', + ); + }); + + it('should pass the contextual bridge to the summarizer', async () => { + providerConfig.isTruncationEnabled = true; + providerConfig.isSummarizationEnabled = true; + vi.spyOn(config, 'getContextManagementConfig').mockReturnValue({ + enabled: true, + } as unknown as ContextManagementConfig); + + // Max tokens 30 means if total tokens > 30, it WILL truncate. + providerConfig.maxTokens = 30; + // budget 20 tokens means it will keep 2 messages if they are 10 each. + providerConfig.retainedTokens = 20; + + vi.mocked(estimateTokenCountSync).mockImplementation( + (parts: Part[]) => parts.length * 10, + ); + const history: Content[] = [ + { role: 'user', parts: [{ text: 'Old Message' }] }, + { role: 'model', parts: [{ text: 'Old Response' }] }, + { role: 'user', parts: [{ text: 'Keep 1' }] }, + { role: 'user', parts: [{ text: 'Keep 2' }] }, + ]; + + await provider.manageHistory(history); + + expect(generateContentMock).toHaveBeenCalled(); + const callArgs = generateContentMock.mock.calls[0][0]; + const prompt = callArgs.contents[0].parts[0].text; + + expect(prompt).toContain('ACTIVE BRIDGE (LOOKAHEAD):'); + expect(prompt).toContain('Keep 1'); + expect(prompt).toContain('Keep 2'); + }); + + it('should detect a previous summary in the truncated head', async () => { + providerConfig.isTruncationEnabled = true; + providerConfig.isSummarizationEnabled = true; + vi.spyOn(config, 'getContextManagementConfig').mockReturnValue({ + enabled: true, + } as unknown as ContextManagementConfig); + + providerConfig.maxTokens = 20; + providerConfig.retainedTokens = 10; + + vi.mocked(estimateTokenCountSync).mockImplementation( + (parts: Part[]) => parts.length * 10, + ); + const history: Content[] = [ + { + role: 'user', + parts: [{ text: 'Previous Mandate' }], + }, + { role: 'model', parts: [{ text: 'Work' }] }, + { role: 'user', parts: [{ text: 'New Work' }] }, + ]; + + await provider.manageHistory(history); + + expect(generateContentMock).toHaveBeenCalled(); + const callArgs = generateContentMock.mock.calls[0][0]; + const prompt = callArgs.contents[0].parts[0].text; + + expect(prompt).toContain('1. **Previous Summary:**'); + expect(prompt).toContain('PREVIOUS SUMMARY AND TRUNCATED HISTORY:'); + }); + + it('should include the Action Path (necklace of function names) in the prompt', async () => { + providerConfig.isTruncationEnabled = true; + providerConfig.isSummarizationEnabled = true; + vi.spyOn(config, 'getContextManagementConfig').mockReturnValue({ + enabled: true, + } as unknown as ContextManagementConfig); + + providerConfig.maxTokens = 20; + providerConfig.retainedTokens = 10; + + vi.mocked(estimateTokenCountSync).mockImplementation( + (parts: Part[]) => parts.length * 10, + ); + const history: Content[] = [ + { + role: 'model', + parts: [ + { functionCall: { name: 'tool_a', args: {} } }, + { functionCall: { name: 'tool_b', args: {} } }, + ], + }, + { role: 'user', parts: [{ text: 'Keep' }] }, + ]; + + await provider.manageHistory(history); + + expect(generateContentMock).toHaveBeenCalled(); + const callArgs = generateContentMock.mock.calls[0][0]; + const prompt = callArgs.contents[0].parts[0].text; + + expect(prompt).toContain('The Action Path:'); + expect(prompt).toContain('tool_a → tool_b'); + }); + + describe('Tiered Normalization Logic', () => { + it('normalizes large messages incrementally: newest and exit-grace', async () => { + providerConfig.isTruncationEnabled = true; + providerConfig.retainedTokens = 30000; + providerConfig.maximumMessageTokens = 10000; + providerConfig.normalMessageTokens = 2500; // History of 35 messages. + // Index 34: Newest (Grace Zone) -> Target 10000 tokens (~40000 chars) + // Index 19: Exit Grace (35-1-15=19) -> Target 2500 tokens (~10000 chars) + // Index 10: Archived -> Should NOT be normalized in this turn (Incremental optimization) + const history = createMockHistory(35); + const hugeText = 'H'.repeat(100000); + + history[34] = { role: 'user', parts: [{ text: hugeText }] }; + history[19] = { role: 'model', parts: [{ text: hugeText }] }; + history[10] = { role: 'user', parts: [{ text: hugeText }] }; + + // Mock token count to trigger normalization (100k chars = 25k tokens @ 4 chars/token) + vi.mocked(estimateTokenCountSync).mockImplementation((parts: Part[]) => { + if (!parts?.[0]) return 10; + const text = parts[0].text || ''; + if (text.startsWith('H')) return 25000; + return 10; + }); + + const result = await provider.manageHistory(history); + + // 1. Newest message (index 34) normalized to ~40000 chars + const normalizedLast = result[34].parts![0].text!; + expect(normalizedLast).toContain(TEXT_TRUNCATION_PREFIX); + expect(normalizedLast.length).toBeLessThan(50000); + expect(normalizedLast.length).toBeGreaterThan(30000); + + // 2. Exit grace message (index 19) normalized to ~10000 chars + const normalizedArchived = result[19].parts![0].text!; + expect(normalizedArchived).toContain(TEXT_TRUNCATION_PREFIX); + expect(normalizedArchived.length).toBeLessThan(15000); + expect(normalizedArchived.length).toBeGreaterThan(8000); + + // 3. Archived message (index 10) IS touched and normalized to ~10000 chars + const normalizedPastArchived = result[10].parts![0].text!; + expect(normalizedPastArchived).toContain(TEXT_TRUNCATION_PREFIX); + expect(normalizedPastArchived.length).toBeLessThan(15000); + expect(normalizedPastArchived.length).toBeGreaterThan(8000); + }); + + it('normalize function responses correctly by targeting large string values', async () => { + providerConfig.isTruncationEnabled = true; + providerConfig.maximumMessageTokens = 1000; + + const hugeValue = 'O'.repeat(5000); + const history: Content[] = [ + { + role: 'user', + parts: [ + { + functionResponse: { + name: 'test_tool', + id: '1', + response: { + stdout: hugeValue, + stderr: 'small error', + exitCode: 0, + }, + }, + }, + ], + }, + ]; + + vi.mocked(estimateTokenCountSync).mockImplementation( + (parts: readonly Part[]) => { + if (parts?.[0]?.functionResponse) return 5000; + return 10; + }, + ); + + const result = await provider.manageHistory(history); + + const fr = result[0].parts![0].functionResponse!; + const resp = fr.response as Record; + + // stdout should be truncated + expect(resp['stdout']).toContain(TOOL_TRUNCATION_PREFIX); + expect((resp['stdout'] as string).length).toBeLessThan(hugeValue.length); + + // stderr and exitCode should be PRESERVED (JSON integrity) + expect(resp['stderr']).toBe('small error'); + expect(resp['exitCode']).toBe(0); + + // Schema should be intact + expect(fr.name).toBe('test_tool'); + expect(fr.id).toBe('1'); + }); + }); + + describe('truncateProportionally', () => { + it('returns original string if under target chars', () => { + const str = 'A'.repeat(50); + expect(truncateProportionally(str, 100, TEXT_TRUNCATION_PREFIX)).toBe( + str, + ); + }); + + it('truncates proportionally with prefix and ellipsis', () => { + const str = 'A'.repeat(500) + 'B'.repeat(500); // 1000 chars + const target = 100; + const result = truncateProportionally( + str, + target, + TEXT_TRUNCATION_PREFIX, + ); + + expect(result.startsWith(TEXT_TRUNCATION_PREFIX)).toBe(true); + expect(result).toContain('\n...\n'); + + // The prefix and ellipsis take up some space + // It should keep ~20% head and ~80% tail of the *available* space + const ellipsis = '\n...\n'; + const overhead = TEXT_TRUNCATION_PREFIX.length + ellipsis.length + 1; // +1 for the newline after prefix + const availableChars = Math.max(0, target - overhead); + const expectedHeadChars = Math.floor(availableChars * 0.2); + const expectedTailChars = availableChars - expectedHeadChars; + + // Extract parts around the ellipsis + const parts = result.split(ellipsis); + expect(parts.length).toBe(2); + + // Remove prefix + newline from the first part to check head length + const actualHead = parts[0].replace(TEXT_TRUNCATION_PREFIX + '\n', ''); + const actualTail = parts[1]; + + expect(actualHead.length).toBe(expectedHeadChars); + expect(actualTail.length).toBe(expectedTailChars); + }); + + it('handles very small targets gracefully by just returning prefix', () => { + const str = 'A'.repeat(100); + const result = truncateProportionally(str, 10, TEXT_TRUNCATION_PREFIX); + expect(result).toBe(TEXT_TRUNCATION_PREFIX); + }); + }); + + describe('Multi-part Proportional Normalization', () => { + it('distributes token budget proportionally across multiple large parts', async () => { + providerConfig.isTruncationEnabled = true; + providerConfig.maximumMessageTokens = 2500; // Small limit to trigger normalization on last msg + + const history = createMockHistory(35); + + // Make newest message (index 34) have two large parts + // Part 1: 10000 chars (~2500 tokens at 4 chars/token) + // Part 2: 30000 chars (~7500 tokens at 4 chars/token) + // Total tokens = 10000. Target = 2500. Ratio = 0.25. + const part1Text = 'A'.repeat(10000); + const part2Text = 'B'.repeat(30000); + + history[34] = { + role: 'user', + parts: [{ text: part1Text }, { text: part2Text }], + }; + + vi.mocked(estimateTokenCountSync).mockImplementation( + (parts: readonly Part[]) => { + if (!parts || parts.length === 0) return 0; + let tokens = 0; + for (const p of parts) { + if (p.text?.startsWith('A')) tokens += 2500; + else if (p.text?.startsWith('B')) tokens += 7500; + else tokens += 10; + } + return tokens; + }, + ); + + const result = await provider.manageHistory(history); + + const normalizedMsg = result[34]; + expect(normalizedMsg.parts!.length).toBe(2); + + const p1 = normalizedMsg.parts![0].text!; + const p2 = normalizedMsg.parts![1].text!; + + expect(p1).toContain(TEXT_TRUNCATION_PREFIX); + expect(p2).toContain(TEXT_TRUNCATION_PREFIX); + + // Part 1: Target chars ~ 2500 * 0.25 * 4 = 2500 + // Part 2: Target chars ~ 7500 * 0.25 * 4 = 7500 + expect(p1.length).toBeLessThan(3500); + expect(p2.length).toBeLessThan(9000); + expect(p1.length).toBeLessThan(p2.length); + }); + + it('preserves small parts while truncating large parts in the same message', async () => { + providerConfig.isTruncationEnabled = true; + providerConfig.maximumMessageTokens = 2500; + + const history = createMockHistory(35); + + const smallText = 'Hello I am small'; + const hugeText = 'B'.repeat(40000); // 10000 tokens + + history[34] = { + role: 'user', + parts: [{ text: smallText }, { text: hugeText }], + }; + + vi.mocked(estimateTokenCountSync).mockImplementation( + (parts: readonly Part[]) => { + if (!parts || parts.length === 0) return 0; + let tokens = 0; + for (const p of parts) { + if (p.text === smallText) tokens += 10; + else if (p.text?.startsWith('B')) tokens += 10000; + else tokens += 10; + } + return tokens; + }, + ); + + const result = await provider.manageHistory(history); + + const normalizedMsg = result[34]; + expect(normalizedMsg.parts!.length).toBe(2); + + const p1 = normalizedMsg.parts![0].text!; + const p2 = normalizedMsg.parts![1].text!; + + // Small part should be preserved + expect(p1).toBe(smallText); + + // Huge part should be truncated + expect(p2).toContain(TEXT_TRUNCATION_PREFIX); + // Target tokens for huge part = ~2500 * (10000/10010) = ~2500 + // Target chars = ~10000 + expect(p2.length).toBeLessThan(12000); + }); }); }); diff --git a/packages/core/src/services/agentHistoryProvider.ts b/packages/core/src/services/agentHistoryProvider.ts index fa9f23d437..166d59360e 100644 --- a/packages/core/src/services/agentHistoryProvider.ts +++ b/packages/core/src/services/agentHistoryProvider.ts @@ -4,21 +4,27 @@ * SPDX-License-Identifier: Apache-2.0 */ -import type { Content } from '@google/genai'; -import type { Config } from '../config/config.js'; +import type { Content, Part } from '@google/genai'; import { getResponseText } from '../utils/partUtils.js'; +import { estimateTokenCountSync } from '../utils/tokenCalculation.js'; import { LlmRole } from '../telemetry/llmRole.js'; import { debugLogger } from '../utils/debugLogger.js'; - -export interface AgentHistoryProviderConfig { - truncationThreshold: number; - retainedMessages: number; -} +import type { AgentHistoryProviderConfig } from './types.js'; +import type { Config } from '../config/config.js'; +import { + MIN_TARGET_TOKENS, + MIN_CHARS_FOR_TRUNCATION, + TEXT_TRUNCATION_PREFIX, + estimateCharsFromTokens, + truncateProportionally, + normalizeFunctionResponse, +} from '../utils/truncation.js'; export class AgentHistoryProvider { + // TODO(joshualitt): just pass the BaseLlmClient instead of the whole Config. constructor( - private readonly config: Config, private readonly providerConfig: AgentHistoryProviderConfig, + private readonly config: Config, ) {} /** @@ -29,12 +35,29 @@ export class AgentHistoryProvider { history: readonly Content[], abortSignal?: AbortSignal, ): Promise { - if (!this.shouldTruncate(history)) { + if (!this.providerConfig.isTruncationEnabled || history.length === 0) { return history; } + // Step 1: Normalize newest messages. + const normalizedHistory = this.enforceMessageSizeLimits(history); + + const totalTokens = estimateTokenCountSync( + normalizedHistory.flatMap((c) => c.parts || []), + ); + + // Step 2: Check if truncation is needed based on the token threshold (High Watermark) + if (totalTokens <= this.providerConfig.maxTokens) { + return normalizedHistory; + } + + // Step 3: Split into keep/truncate boundaries const { messagesToKeep, messagesToTruncate } = - this.splitHistoryForTruncation(history); + this.splitHistoryForTruncation(normalizedHistory); + + if (messagesToTruncate.length === 0) { + return messagesToKeep; + } debugLogger.log( `AgentHistoryProvider: Truncating ${messagesToTruncate.length} messages, retaining ${messagesToKeep.length} messages.`, @@ -42,65 +65,230 @@ export class AgentHistoryProvider { const summaryText = await this.getSummaryText( messagesToTruncate, + messagesToKeep, abortSignal, ); return this.mergeSummaryWithHistory(summaryText, messagesToKeep); } - private shouldTruncate(history: readonly Content[]): boolean { - if (!this.config.isExperimentalAgentHistoryTruncationEnabled()) { - return false; + /** + * Enforces message size limits on the most recent message and the message + * that just exited the grace zone. + * - Recent messages have a high MAXIMUM limit. + * - Older messages (already processed) are restricted to the NORMAL limit + * once they exit the grace period. + */ + private enforceMessageSizeLimits( + history: readonly Content[], + ): readonly Content[] { + if (history.length === 0) return history; + + let hasChanges = false; + let accumulatedTokens = 0; + + // Scan backwards to find the index where the token budget is exhausted + let graceStartIndex = 0; + for (let i = history.length - 1; i >= 0; i--) { + const msgTokens = estimateTokenCountSync(history[i].parts || []); + accumulatedTokens += msgTokens; + if (accumulatedTokens > this.providerConfig.retainedTokens) { + graceStartIndex = i + 1; + break; + } } - return history.length > this.providerConfig.truncationThreshold; + + const newHistory = history.map((msg, i) => { + const targetTokens = + i < graceStartIndex + ? this.providerConfig.normalMessageTokens + : this.providerConfig.maximumMessageTokens; + + const normalizedMsg = this.normalizeMessage(msg, targetTokens); + if (normalizedMsg !== msg) { + hasChanges = true; + } + return normalizedMsg; + }); + + return hasChanges ? newHistory : history; } + /** + * Normalizes a message by proportionally masking its text or function response + * if its total token count exceeds the target token limit. + */ + private normalizeMessage(msg: Content, targetTokens: number): Content { + const currentTokens = estimateTokenCountSync(msg.parts || []); + if (currentTokens <= targetTokens) { + return msg; + } + + // Calculate the compression ratio to apply to all large parts + const ratio = targetTokens / currentTokens; + + // Proportional compression of the parts to fit the targetTokens budget + // while maintaining API structure (never dropping a part completely). + const newParts: Part[] = []; + for (const part of msg.parts || []) { + if (part.text) { + const partTokens = estimateTokenCountSync([part]); + const targetPartTokens = Math.max( + MIN_TARGET_TOKENS, + Math.floor(partTokens * ratio), + ); + const targetChars = estimateCharsFromTokens( + part.text, + targetPartTokens, + ); + + if ( + part.text.length > targetChars && + targetChars > MIN_CHARS_FOR_TRUNCATION + ) { + const newText = truncateProportionally( + part.text, + targetChars, + TEXT_TRUNCATION_PREFIX, + this.providerConfig.normalizationHeadRatio, + ); + newParts.push({ text: newText }); + } else { + newParts.push(part); + } + } else if (part.functionResponse) { + newParts.push( + normalizeFunctionResponse( + part, + ratio, + this.providerConfig.normalizationHeadRatio, + ), + ); + } else { + newParts.push(part); + } + } + + return { ...msg, parts: newParts }; + } + + /** + * Determines the boundary for splitting history based on the token budget, + * keeping recent messages under a specific target token threshold, + * while ensuring structural integrity (e.g. keeping functionCall/functionResponse pairs). + */ private splitHistoryForTruncation(history: readonly Content[]): { messagesToKeep: readonly Content[]; messagesToTruncate: readonly Content[]; } { + let accumulatedTokens = 0; + let truncationBoundary = 0; // The index of the first message to keep + + // Scan backwards to calculate the boundary based on token budget + for (let i = history.length - 1; i >= 0; i--) { + const msg = history[i]; + const msgTokens = estimateTokenCountSync(msg.parts || []); + + // Token Budget + if (accumulatedTokens + msgTokens > this.providerConfig.retainedTokens) { + // Exceeded budget, stop retaining messages here. + truncationBoundary = i + 1; + break; + } + + accumulatedTokens += msgTokens; + } + + // Ensure structural integrity of the boundary + truncationBoundary = this.adjustBoundaryForIntegrity( + history, + truncationBoundary, + ); + + const messagesToKeep = history.slice(truncationBoundary); + const messagesToTruncate = history.slice(0, truncationBoundary); + return { - messagesToKeep: history.slice(-this.providerConfig.retainedMessages), - messagesToTruncate: history.slice( - 0, - history.length - this.providerConfig.retainedMessages, - ), + messagesToKeep, + messagesToTruncate, }; } + /** + * Adjusts the truncation boundary backwards to prevent breaking functionCall/functionResponse pairs. + */ + private adjustBoundaryForIntegrity( + history: readonly Content[], + boundary: number, + ): number { + let currentBoundary = boundary; + // Ensure we don't start at index 0 or out of bounds. + if (currentBoundary <= 0 || currentBoundary >= history.length) { + return currentBoundary; + } + + while ( + currentBoundary > 0 && + currentBoundary < history.length && + history[currentBoundary].role === 'user' && + history[currentBoundary].parts?.some((p) => p.functionResponse) && + history[currentBoundary - 1].role === 'model' && + history[currentBoundary - 1].parts?.some((p) => p.functionCall) + ) { + currentBoundary--; // Include the functionCall in the retained history + } + return currentBoundary; + } + private getFallbackSummaryText( messagesToTruncate: readonly Content[], ): string { - const defaultNote = - 'System Note: Prior conversation history was truncated to maintain performance and focus. Important context should have been saved to memory.'; + const userMessages = messagesToTruncate.filter((m) => m.role === 'user'); + const modelMessages = messagesToTruncate.filter((m) => m.role === 'model'); - let lastUserText = ''; - for (let i = messagesToTruncate.length - 1; i >= 0; i--) { - const msg = messagesToTruncate[i]; - if (msg.role === 'user') { - lastUserText = - msg.parts - ?.map((p) => p.text || '') - .join('') - .trim() || ''; - if (lastUserText) { - break; - } - } - } + const lastUserText = userMessages + .slice(-1)[0] + ?.parts?.map((p) => p.text || '') + .join('') + .trim(); + + const actionPath = modelMessages + .flatMap( + (m) => + m.parts + ?.filter((p) => p.functionCall) + .map((p) => p.functionCall!.name) || [], + ) + .join(' → '); + + const summaryParts = [ + '### [System Note: Conversation History Truncated]', + 'Prior context was offloaded to maintain performance. Key highlights from the truncated history:', + ]; if (lastUserText) { - return `[System Note: Prior conversation history was truncated. The most recent user message before truncation was:]\n\n${lastUserText}`; + summaryParts.push(`- **Last User Intent:** "${lastUserText}"`); } - return defaultNote; + if (actionPath) { + summaryParts.push(`- **Action Path:** ${actionPath}`); + } + + summaryParts.push( + '- **Notice:** For deeper context, review persistent memory or task-specific logs.', + ); + + return summaryParts.join('\n'); } private async getSummaryText( messagesToTruncate: readonly Content[], + messagesToKeep: readonly Content[], abortSignal?: AbortSignal, ): Promise { - if (!this.config.isExperimentalAgentHistorySummarizationEnabled()) { + if (messagesToTruncate.length === 0) return ''; + + if (!this.providerConfig.isSummarizationEnabled) { debugLogger.log( 'AgentHistoryProvider: Summarization disabled, using fallback note.', ); @@ -108,12 +296,15 @@ export class AgentHistoryProvider { } try { - const summary = await this.generateIntentSummary( + // Use the first few messages of the Grace Zone as a "contextual bridge" + // to give the summarizer lookahead into the current state. + const bridge = messagesToKeep.slice(0, 5); + + return await this.generateIntentSummary( messagesToTruncate, + bridge, abortSignal, ); - debugLogger.log('AgentHistoryProvider: Summarization successful.'); - return summary; } catch (error) { debugLogger.log('AgentHistoryProvider: Summarization failed.', error); return this.getFallbackSummaryText(messagesToTruncate); @@ -124,6 +315,8 @@ export class AgentHistoryProvider { summaryText: string, messagesToKeep: readonly Content[], ): readonly Content[] { + if (!summaryText) return messagesToKeep; + if (messagesToKeep.length === 0) { return [{ role: 'user', parts: [{ text: summaryText }] }]; } @@ -152,22 +345,57 @@ export class AgentHistoryProvider { private async generateIntentSummary( messagesToTruncate: readonly Content[], + bridge: readonly Content[], abortSignal?: AbortSignal, ): Promise { - const prompt = `Create a succinct, agent-continuity focused intent summary of the truncated conversation history. -Distill the essence of the ongoing work by capturing: -- The Original Mandate: What the user (or calling agent) originally requested and why. -- The Agent's Strategy: How you (the agent) are approaching the task and where the work is taking place (e.g., specific files, directories, or architectural layers). -- Evolving Context: Any significant shifts in the user's intent or the agent's technical approach over the course of the truncated history. + // 1. Identify and extract any existing summary from the truncated head + const firstMsg = messagesToTruncate[0]; + const firstPartText = firstMsg?.parts?.[0]?.text || ''; + const hasPreviousSummary = firstPartText.includes(''); -Write this summary to orient the active agent. Do NOT predict next steps or summarize the current task state, as those are covered by the active history. Focus purely on foundational context and strategic continuity.`; + // 2. Extract "The Action Path" (necklace of function names) + const actionPath = messagesToTruncate + .filter((m) => m.role === 'model') + .flatMap( + (m) => + m.parts + ?.filter((p) => p.functionCall) + .map((p) => p.functionCall!.name) || [], + ) + .join(' → '); + + const prompt = `### State Update: Agent Continuity + +The conversation history has been truncated. You are generating a highly factual state summary to preserve the agent's exact working context. + +You have these signals to synthesize: +${hasPreviousSummary ? '1. **Previous Summary:** The existing state before this truncation.\n' : ''}2. **The Action Path:** A chronological list of tools called: [${actionPath}] +3. **Truncated History:** The specific actions, tool inputs, and tool outputs being offloaded. +4. **Active Bridge:** The first few turns of the "Grace Zone" (what follows immediately after this summary), showing the current tactical moment. + +### Your Goal: +Distill these into a high-density Markdown block that orientates the agent on the CONCRETE STATE of the workspace: +- **Primary Goal:** The ultimate objective requested by the user. +- **Verified Facts:** What has been definitively completed or proven (e.g., "File X was created", "Bug Y was reproduced"). +- **Working Set:** The exact file paths currently being analyzed or modified. +- **Active Blockers:** Exact error messages or failing test names currently preventing progress. + +### Constraints: +- **Format:** Wrap the entire response in tags. +- **Factuality:** Base all points strictly on the provided history. Do not invent rationale or assume success without proof. Use exact names and quotes. +- **Brevity:** Maximum 15 lines. No conversational preamble. + +${hasPreviousSummary ? 'PREVIOUS SUMMARY AND TRUNCATED HISTORY:' : 'TRUNCATED HISTORY:'} +${JSON.stringify(messagesToTruncate)} + +ACTIVE BRIDGE (LOOKAHEAD): +${JSON.stringify(bridge)}`; const summaryResponse = await this.config .getBaseLlmClient() .generateContent({ modelConfigKey: { model: 'agent-history-provider-summarizer' }, contents: [ - ...messagesToTruncate, { role: 'user', parts: [{ text: prompt }], @@ -179,7 +407,16 @@ Write this summary to orient the active agent. Do NOT predict next steps or summ }); let summary = getResponseText(summaryResponse) ?? ''; - summary = summary.replace(/<\/?intent_summary>/g, '').trim(); - return `\n${summary}\n`; + // Clean up if the model included extra tags or markdown + summary = summary + .replace(/```markdown/g, '') + .replace(/```/g, '') + .trim(); + + if (!summary.includes('')) { + summary = `\n${summary}\n`; + } + + return summary; } } diff --git a/packages/core/src/services/toolDistillationService.test.ts b/packages/core/src/services/toolDistillationService.test.ts new file mode 100644 index 0000000000..f8a8e3762b --- /dev/null +++ b/packages/core/src/services/toolDistillationService.test.ts @@ -0,0 +1,101 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { describe, it, expect, vi, beforeEach } from 'vitest'; +import { ToolOutputDistillationService } from './toolDistillationService.js'; +import type { Config, Part } from '../index.js'; +import type { GeminiClient } from '../core/client.js'; + +describe('ToolOutputDistillationService', () => { + let mockConfig: Config; + let mockGeminiClient: GeminiClient; + let service: ToolOutputDistillationService; + + beforeEach(() => { + mockConfig = { + getToolMaxOutputTokens: vi.fn().mockReturnValue(100), + getToolSummarizationThresholdTokens: vi.fn().mockReturnValue(100), + getUsageStatisticsEnabled: vi.fn().mockReturnValue(false), + storage: { + getProjectTempDir: vi.fn().mockReturnValue('/tmp/gemini'), + }, + telemetry: { + logEvent: vi.fn(), + }, + } as unknown as Config; + mockGeminiClient = { + generateContent: vi.fn().mockResolvedValue({ + candidates: [{ content: { parts: [{ text: 'Mock Intent Summary' }] } }], + }), + } as unknown as GeminiClient; + service = new ToolOutputDistillationService( + mockConfig, + mockGeminiClient, + 'test-prompt-id', + ); + }); + + it('should generate a structural map for oversized content within limits', async () => { + // > threshold * SUMMARIZATION_THRESHOLD (100 * 4 = 400) + const largeContent = 'A'.repeat(500); + const result = await service.distill('test-tool', 'call-1', largeContent); + + expect(mockGeminiClient.generateContent).toHaveBeenCalled(); + const text = + typeof result.truncatedContent === 'string' + ? result.truncatedContent + : (result.truncatedContent as Array<{ text: string }>)[0].text; + expect(text).toContain('Strategic Significance'); + }); + + it('should structurally truncate functionResponse while preserving schema', async () => { + // threshold is 100 + const hugeValue = 'H'.repeat(1000); + const content = [ + { + functionResponse: { + name: 'test_tool', + id: '123', + response: { + stdout: hugeValue, + stderr: 'no error', + }, + }, + }, + ] as unknown as Part[]; + + const result = await service.distill('test-tool', 'call-1', content); + const truncatedParts = result.truncatedContent as Part[]; + expect(truncatedParts.length).toBe(1); + const fr = truncatedParts[0].functionResponse!; + const resp = fr.response as Record; + expect(fr.name).toBe('test_tool'); + expect(resp['stderr']).toBe('no error'); + expect(resp['stdout'] as string).toContain('[Message Normalized'); + expect(resp['stdout'] as string).toContain('Full output saved to'); + }); + + it('should skip structural map for extremely large content exceeding MAX_DISTILLATION_SIZE', async () => { + const massiveContent = 'A'.repeat(1_000_001); // > MAX_DISTILLATION_SIZE + const result = await service.distill('test-tool', 'call-2', massiveContent); + + expect(mockGeminiClient.generateContent).not.toHaveBeenCalled(); + const text = + typeof result.truncatedContent === 'string' + ? result.truncatedContent + : (result.truncatedContent as Array<{ text: string }>)[0].text; + expect(text).not.toContain('Strategic Significance'); + }); + + it('should skip structural map for content below summarization threshold', async () => { + // > threshold but < threshold * SUMMARIZATION_THRESHOLD + const mediumContent = 'A'.repeat(110); + const result = await service.distill('test-tool', 'call-3', mediumContent); + + expect(mockGeminiClient.generateContent).not.toHaveBeenCalled(); + expect(result.truncatedContent).not.toContain('Mock Intent Summary'); + }); +}); diff --git a/packages/core/src/services/toolDistillationService.ts b/packages/core/src/services/toolDistillationService.ts new file mode 100644 index 0000000000..a47638d02b --- /dev/null +++ b/packages/core/src/services/toolDistillationService.ts @@ -0,0 +1,293 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { + LlmRole, + ToolOutputTruncatedEvent, + logToolOutputTruncated, + debugLogger, + type Config, +} from '../index.js'; +import type { PartListUnion } from '@google/genai'; +import { type GeminiClient } from '../core/client.js'; +import { saveTruncatedToolOutput } from '../utils/fileUtils.js'; +import { + READ_FILE_TOOL_NAME, + READ_MANY_FILES_TOOL_NAME, +} from '../tools/tool-names.js'; + +import { + truncateProportionally, + TOOL_TRUNCATION_PREFIX, + MIN_TARGET_TOKENS, + estimateCharsFromTokens, + normalizeFunctionResponse, +} from '../utils/truncation.js'; + +// Skip structural map generation for outputs larger than this threshold (in characters) +// as it consumes excessive tokens and may not be representative of the full content. +const MAX_DISTILLATION_SIZE = 1_000_000; + +export interface DistilledToolOutput { + truncatedContent: PartListUnion; + outputFile?: string; +} + +export class ToolOutputDistillationService { + constructor( + private readonly config: Config, + private readonly geminiClient: GeminiClient, + private readonly promptId: string, + ) {} + + /** + * Distills a tool's output if it exceeds configured length thresholds, preserving + * the agent's context window. This includes saving the raw output to disk, replacing + * the output with a truncated placeholder, and optionally summarizing the output + * via a secondary LLM call if the output is massively oversized. + */ + async distill( + toolName: string, + callId: string, + content: PartListUnion, + ): Promise { + // Explicitly bypass escape hatches that natively handle large outputs + if (this.isExemptFromDistillation(toolName)) { + return { truncatedContent: content }; + } + + const maxTokens = this.config.getToolMaxOutputTokens(); + const thresholdChars = maxTokens * 4; + if (thresholdChars <= 0) { + return { truncatedContent: content }; + } + + const originalContentLength = this.calculateContentLength(content); + + if (originalContentLength > thresholdChars) { + return this.performDistillation( + toolName, + callId, + content, + originalContentLength, + thresholdChars, + ); + } + + return { truncatedContent: content }; + } + + private isExemptFromDistillation(toolName: string): boolean { + return ( + toolName === READ_FILE_TOOL_NAME || toolName === READ_MANY_FILES_TOOL_NAME + ); + } + + private calculateContentLength(content: PartListUnion): number { + if (typeof content === 'string') { + return content.length; + } + + if (Array.isArray(content)) { + return content.reduce((acc, part) => { + if (typeof part === 'string') return acc + part.length; + if (part.text) return acc + part.text.length; + if (part.functionResponse?.response) { + // Estimate length of the response object + return acc + JSON.stringify(part.functionResponse.response).length; + } + return acc; + }, 0); + } + + return 0; + } + + private stringifyContent(content: PartListUnion): string { + if (typeof content === 'string') return content; + // For arrays or other objects, we preserve the structural JSON to maintain + // the ability to reconstruct the parts if needed from the saved output. + return JSON.stringify(content, null, 2); + } + + private async performDistillation( + toolName: string, + callId: string, + content: PartListUnion, + originalContentLength: number, + threshold: number, + ): Promise { + const stringifiedContent = this.stringifyContent(content); + + // Save the raw, untruncated string to disk for human review + const { outputFile: savedPath } = await saveTruncatedToolOutput( + stringifiedContent, + toolName, + callId, + this.config.storage.getProjectTempDir(), + this.promptId, + ); + + // If the output is massively oversized, attempt to generate an intent summary + let intentSummaryText = ''; + const summarizationThresholdTokens = + this.config.getToolSummarizationThresholdTokens(); + const summarizationThresholdChars = summarizationThresholdTokens * 4; + + if ( + originalContentLength > summarizationThresholdChars && + originalContentLength <= MAX_DISTILLATION_SIZE + ) { + const summary = await this.generateIntentSummary( + toolName, + stringifiedContent, + Math.floor(MAX_DISTILLATION_SIZE), + ); + + if (summary) { + intentSummaryText = `\n\n--- Strategic Significance of Truncated Content ---\n${summary}`; + } + } + + // Perform structural truncation + const ratio = threshold / originalContentLength; + const truncatedContent = this.truncateContentStructurally( + content, + ratio, + savedPath || 'Output offloaded to disk', + intentSummaryText, + ); + + logToolOutputTruncated( + this.config, + new ToolOutputTruncatedEvent(this.promptId, { + toolName, + originalContentLength, + truncatedContentLength: this.calculateContentLength(truncatedContent), + threshold, + }), + ); + + return { + truncatedContent, + outputFile: savedPath, + }; + } + + /** + * Truncates content while maintaining its Part structure. + */ + private truncateContentStructurally( + content: PartListUnion, + ratio: number, + savedPath: string, + intentSummary: string, + ): PartListUnion { + if (typeof content === 'string') { + const targetTokens = Math.max( + MIN_TARGET_TOKENS, + Math.floor((content.length / 4) * ratio), + ); + const targetChars = estimateCharsFromTokens(content, targetTokens); + + return ( + truncateProportionally(content, targetChars, TOOL_TRUNCATION_PREFIX) + + `\n\nFull output saved to: ${savedPath}` + + intentSummary + ); + } + + if (!Array.isArray(content)) return content; + + return content.map((part) => { + if (typeof part === 'string') { + const text = part; + const targetTokens = Math.max( + MIN_TARGET_TOKENS, + Math.floor((text.length / 4) * ratio), + ); + const targetChars = estimateCharsFromTokens(text, targetTokens); + return truncateProportionally( + text, + targetChars, + TOOL_TRUNCATION_PREFIX, + ); + } + + if (part.text) { + const text = part.text; + const targetTokens = Math.max( + MIN_TARGET_TOKENS, + Math.floor((text.length / 4) * ratio), + ); + const targetChars = estimateCharsFromTokens(text, targetTokens); + return { + text: + truncateProportionally(text, targetChars, TOOL_TRUNCATION_PREFIX) + + `\n\nFull output saved to: ${savedPath}` + + intentSummary, + }; + } + + if (part.functionResponse) { + return normalizeFunctionResponse( + part, + ratio, + 0.2, // default headRatio + savedPath, + intentSummary, + ); + } + + return part; + }); + } + + /** + * Calls the secondary model to distill the strategic "why" signals and intent + * of the truncated content before it is offloaded. + */ + private async generateIntentSummary( + toolName: string, + stringifiedContent: string, + maxPreviewLen: number, + ): Promise { + try { + const controller = new AbortController(); + const timeoutId = setTimeout(() => controller.abort(), 15000); // 15s timeout + + const promptText = `The following output from the tool '${toolName}' is large and has been truncated. Extract the most critical factual information from this output so the main agent doesn't lose context. + +Focus strictly on concrete data points: +1. Exact error messages, exception types, or exit codes. +2. Specific file paths or line numbers mentioned. +3. Definitive outcomes (e.g., 'Compilation succeeded', '3 tests failed'). + +Do not philosophize about the strategic intent. Keep the extraction under 10 lines and use exact quotes where helpful. + +Output to summarize: +${stringifiedContent.slice(0, maxPreviewLen)}...`; + + const summaryResponse = await this.geminiClient.generateContent( + { model: 'agent-history-provider-summarizer' }, + [{ role: 'user', parts: [{ text: promptText }] }], + controller.signal, + LlmRole.UTILITY_COMPRESSOR, + ); + + clearTimeout(timeoutId); + + return summaryResponse.candidates?.[0]?.content?.parts?.[0]?.text; + } catch (e) { + // Fail gracefully, summarization is a progressive enhancement + debugLogger.debug( + 'Failed to generate intent summary for truncated output:', + e instanceof Error ? e.message : String(e), + ); + return undefined; + } + } +} diff --git a/packages/core/src/services/types.ts b/packages/core/src/services/types.ts new file mode 100644 index 0000000000..da6609ad37 --- /dev/null +++ b/packages/core/src/services/types.ts @@ -0,0 +1,15 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +export interface AgentHistoryProviderConfig { + maxTokens: number; + retainedTokens: number; + normalMessageTokens: number; + maximumMessageTokens: number; + normalizationHeadRatio: number; + isSummarizationEnabled: boolean; + isTruncationEnabled: boolean; +} diff --git a/packages/core/src/tools/web-fetch.test.ts b/packages/core/src/tools/web-fetch.test.ts index f52ff214f4..0c7757ebb8 100644 --- a/packages/core/src/tools/web-fetch.test.ts +++ b/packages/core/src/tools/web-fetch.test.ts @@ -293,6 +293,7 @@ describe('WebFetchTool', () => { })), }, isInteractive: () => false, + isAutoDistillationEnabled: vi.fn().mockReturnValue(false), } as unknown as Config; }); @@ -1118,5 +1119,40 @@ describe('WebFetchTool', () => { ); expect(result.error?.type).toBe(ToolErrorType.WEB_FETCH_PROCESSING_ERROR); }); + + it('should bypass truncation if isAutoDistillationEnabled is true', async () => { + vi.spyOn(mockConfig, 'isAutoDistillationEnabled').mockReturnValue(true); + const largeContent = 'a'.repeat(300000); // Larger than MAX_CONTENT_LENGTH (250000) + mockFetch('https://example.com/large-text', { + status: 200, + headers: new Headers({ 'content-type': 'text/plain' }), + text: () => Promise.resolve(largeContent), + }); + + const tool = new WebFetchTool(mockConfig, bus); + const invocation = tool.build({ url: 'https://example.com/large-text' }); + const result = await invocation.execute(new AbortController().signal); + + expect((result.llmContent as string).length).toBe(300000); // No truncation + }); + + it('should truncate if isAutoDistillationEnabled is false', async () => { + vi.spyOn(mockConfig, 'isAutoDistillationEnabled').mockReturnValue(false); + const largeContent = 'a'.repeat(300000); // Larger than MAX_CONTENT_LENGTH (250000) + mockFetch('https://example.com/large-text2', { + status: 200, + headers: new Headers({ 'content-type': 'text/plain' }), + text: () => Promise.resolve(largeContent), + }); + + const tool = new WebFetchTool(mockConfig, bus); + const invocation = tool.build({ url: 'https://example.com/large-text2' }); + const result = await invocation.execute(new AbortController().signal); + + expect((result.llmContent as string).length).toBeLessThan(300000); + expect(result.llmContent).toContain( + '[Content truncated due to size limit]', + ); + }); }); }); diff --git a/packages/core/src/tools/web-fetch.ts b/packages/core/src/tools/web-fetch.ts index dc90d892ef..065b33c27d 100644 --- a/packages/core/src/tools/web-fetch.ts +++ b/packages/core/src/tools/web-fetch.ts @@ -338,9 +338,15 @@ class WebFetchToolInvocation extends BaseToolInvocation< textContent = rawContent; } - // Cap at MAX_CONTENT_LENGTH initially to avoid excessive memory usage - // before the global budget allocation. - return truncateString(textContent, MAX_CONTENT_LENGTH, ''); + if (!this.context.config.isAutoDistillationEnabled()) { + return truncateString( + textContent, + MAX_CONTENT_LENGTH, + TRUNCATION_WARNING, + ); + } + + return textContent; } private filterAndValidateUrls(urls: string[]): { @@ -406,28 +412,32 @@ class WebFetchToolInvocation extends BaseToolInvocation< }; } - // Smart Budget Allocation (Water-filling algorithm) for successes - const sortedSuccesses = [...successes].sort( - (a, b) => a.content.length - b.content.length, - ); - - let remainingBudget = MAX_CONTENT_LENGTH; - let remainingUrls = sortedSuccesses.length; const finalContentsByUrl = new Map(); - - for (const success of sortedSuccesses) { - const fairShare = Math.floor(remainingBudget / remainingUrls); - const allocated = Math.min(success.content.length, fairShare); - - const truncated = truncateString( - success.content, - allocated, - TRUNCATION_WARNING, + if (this.context.config.isAutoDistillationEnabled()) { + successes.forEach((success) => + finalContentsByUrl.set(success.url, success.content), ); + } else { + // Smart Budget Allocation (Water-filling algorithm) for successes + const sortedSuccesses = [...successes].sort( + (a, b) => a.content.length - b.content.length, + ); + let remainingBudget = MAX_CONTENT_LENGTH; + let remainingUrls = sortedSuccesses.length; + for (const success of sortedSuccesses) { + const fairShare = Math.floor(remainingBudget / remainingUrls); + const allocated = Math.min(success.content.length, fairShare); - finalContentsByUrl.set(success.url, truncated); - remainingBudget -= truncated.length; - remainingUrls--; + const truncated = truncateString( + success.content, + allocated, + TRUNCATION_WARNING, + ); + + finalContentsByUrl.set(success.url, truncated); + remainingBudget -= truncated.length; + remainingUrls--; + } } const aggregatedContent = uniqueUrls @@ -648,14 +658,21 @@ ${aggregatedContent} ); if (status >= 400) { - const rawResponseText = bodyBuffer.toString('utf8'); + let rawResponseText = bodyBuffer.toString('utf8'); + if (!this.context.config.isAutoDistillationEnabled()) { + rawResponseText = truncateString( + rawResponseText, + 10000, + '\n\n... [Error response truncated] ...', + ); + } const headers: Record = {}; response.headers.forEach((value, key) => { headers[key] = value; }); const errorContent = `Request failed with status ${status} Headers: ${JSON.stringify(headers, null, 2)} -Response: ${truncateString(rawResponseText, 10000, '\n\n... [Error response truncated] ...')}`; +Response: ${rawResponseText}`; debugLogger.error( `[WebFetchTool] Experimental fetch failed with status ${status} for ${url}`, ); @@ -671,11 +688,10 @@ Response: ${truncateString(rawResponseText, 10000, '\n\n... [Error response trun lowContentType.includes('text/plain') || lowContentType.includes('application/json') ) { - const text = truncateString( - bodyBuffer.toString('utf8'), - MAX_CONTENT_LENGTH, - TRUNCATION_WARNING, - ); + let text = bodyBuffer.toString('utf8'); + if (!this.context.config.isAutoDistillationEnabled()) { + text = truncateString(text, MAX_CONTENT_LENGTH, TRUNCATION_WARNING); + } return { llmContent: text, returnDisplay: `Fetched ${contentType} content from ${url}`, @@ -684,16 +700,19 @@ Response: ${truncateString(rawResponseText, 10000, '\n\n... [Error response trun if (lowContentType.includes('text/html')) { const html = bodyBuffer.toString('utf8'); - const textContent = truncateString( - convert(html, { - wordwrap: false, - selectors: [ - { selector: 'a', options: { ignoreHref: false, baseUrl: url } }, - ], - }), - MAX_CONTENT_LENGTH, - TRUNCATION_WARNING, - ); + let textContent = convert(html, { + wordwrap: false, + selectors: [ + { selector: 'a', options: { ignoreHref: false, baseUrl: url } }, + ], + }); + if (!this.context.config.isAutoDistillationEnabled()) { + textContent = truncateString( + textContent, + MAX_CONTENT_LENGTH, + TRUNCATION_WARNING, + ); + } return { llmContent: textContent, returnDisplay: `Fetched and converted HTML content from ${url}`, @@ -718,11 +737,10 @@ Response: ${truncateString(rawResponseText, 10000, '\n\n... [Error response trun } // Fallback for unknown types - try as text - const text = truncateString( - bodyBuffer.toString('utf8'), - MAX_CONTENT_LENGTH, - TRUNCATION_WARNING, - ); + let text = bodyBuffer.toString('utf8'); + if (!this.context.config.isAutoDistillationEnabled()) { + text = truncateString(text, MAX_CONTENT_LENGTH, TRUNCATION_WARNING); + } return { llmContent: text, returnDisplay: `Fetched ${contentType || 'unknown'} content from ${url}`, diff --git a/packages/core/src/utils/tokenCalculation.ts b/packages/core/src/utils/tokenCalculation.ts index d5a7fdc9eb..b61b7cbb5d 100644 --- a/packages/core/src/utils/tokenCalculation.ts +++ b/packages/core/src/utils/tokenCalculation.ts @@ -10,10 +10,10 @@ import { debugLogger } from './debugLogger.js'; // Token estimation constants // ASCII characters (0-127) are roughly 4 chars per token -const ASCII_TOKENS_PER_CHAR = 0.25; +export const ASCII_TOKENS_PER_CHAR = 0.25; // Non-ASCII characters (including CJK) are often 1-2 tokens per char. // We use 1.3 as a conservative estimate to avoid underestimation. -const NON_ASCII_TOKENS_PER_CHAR = 1.3; +export const NON_ASCII_TOKENS_PER_CHAR = 1.3; // Fixed token estimate for images const IMAGE_TOKEN_ESTIMATE = 3000; // Fixed token estimate for PDFs (~100 pages at 258 tokens/page) diff --git a/packages/core/src/utils/truncation.ts b/packages/core/src/utils/truncation.ts new file mode 100644 index 0000000000..8736460b75 --- /dev/null +++ b/packages/core/src/utils/truncation.ts @@ -0,0 +1,142 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import type { Part } from '@google/genai'; +import { + estimateTokenCountSync, + ASCII_TOKENS_PER_CHAR, + NON_ASCII_TOKENS_PER_CHAR, +} from './tokenCalculation.js'; + +export const MIN_TARGET_TOKENS = 10; +export const MIN_CHARS_FOR_TRUNCATION = 100; +export const TEXT_TRUNCATION_PREFIX = + '[Message Normalized: Exceeded size limit]'; +export const TOOL_TRUNCATION_PREFIX = + '[Message Normalized: Tool output exceeded size limit]'; + +/** + * Estimates the character limit for a target token count, accounting for ASCII vs Non-ASCII. + * Uses a weighted average based on the provided text to decide how many characters + * fit into the target token budget. + */ +export function estimateCharsFromTokens( + text: string, + targetTokens: number, +): number { + if (text.length === 0) return 0; + + // Count ASCII vs Non-ASCII in a sample of the text. + let asciiCount = 0; + const sampleLen = Math.min(text.length, 1000); + for (let i = 0; i < sampleLen; i++) { + if (text.charCodeAt(i) <= 127) { + asciiCount++; + } + } + + const asciiRatio = asciiCount / sampleLen; + // Weighted tokens per character: + const avgTokensPerChar = + asciiRatio * ASCII_TOKENS_PER_CHAR + + (1 - asciiRatio) * NON_ASCII_TOKENS_PER_CHAR; + + // Characters = Tokens / (Tokens per Character) + return Math.floor(targetTokens / avgTokensPerChar); +} + +/** + * Truncates a string to a target length, keeping a proportional amount of the head and tail, + * and prepending a prefix. + */ +export function truncateProportionally( + str: string, + targetChars: number, + prefix: string, + headRatio: number = 0.2, +): string { + if (str.length <= targetChars) return str; + + const ellipsis = '\n...\n'; + const overhead = prefix.length + ellipsis.length + 1; // +1 for the newline after prefix + const availableChars = Math.max(0, targetChars - overhead); + + if (availableChars <= 0) { + return prefix; // Safe fallback if target is extremely small + } + + const headChars = Math.floor(availableChars * headRatio); + const tailChars = availableChars - headChars; + + return `${prefix}\n${str.substring(0, headChars)}${ellipsis}${str.substring(str.length - tailChars)}`; +} + +/** + * Safely normalizes a function response by truncating large string values + * within the response object while maintaining its JSON structure. + */ +export function normalizeFunctionResponse( + part: Part, + ratio: number, + headRatio: number = 0.2, + savedPath?: string, + intentSummary?: string, +): Part { + const fr = part.functionResponse; + if (!fr || !fr.response) return part; + + const responseObj = fr.response; + if (typeof responseObj !== 'object' || responseObj === null) return part; + + let hasChanges = false; + const newResponse: Record = {}; + + // For function responses, we truncate individual string values that are large. + // This preserves the schema keys (stdout, stderr, etc). + for (const [key, value] of Object.entries(responseObj)) { + if (typeof value === 'string' && value.length > MIN_CHARS_FOR_TRUNCATION) { + const valueTokens = estimateTokenCountSync([{ text: value }]); + const targetValueTokens = Math.max( + MIN_TARGET_TOKENS, + Math.floor(valueTokens * ratio), + ); + const targetChars = estimateCharsFromTokens(value, targetValueTokens); + + if (value.length > targetChars) { + let truncated = truncateProportionally( + value, + targetChars, + TOOL_TRUNCATION_PREFIX, + headRatio, + ); + if (savedPath) { + truncated += `\n\nFull output saved to: ${savedPath}`; + } + if (intentSummary) { + truncated += intentSummary; + } + newResponse[key] = truncated; + hasChanges = true; + } else { + newResponse[key] = value; + } + } else { + newResponse[key] = value; + } + } + + if (!hasChanges) return part; + + return { + functionResponse: { + // This spread should be safe as we mostly care about the function + // response properties. + // eslint-disable-next-line @typescript-eslint/no-misused-spread + ...fr, + response: newResponse, + }, + }; +} diff --git a/schemas/settings.schema.json b/schemas/settings.schema.json index 52a6f1e183..c5db73b1f2 100644 --- a/schemas/settings.schema.json +++ b/schemas/settings.schema.json @@ -2925,31 +2925,10 @@ "default": false, "type": "boolean" }, - "agentHistoryTruncation": { - "title": "Agent History Truncation", - "description": "Enable truncation window logic for the Agent History Provider.", - "markdownDescription": "Enable truncation window logic for the Agent History Provider.\n\n- Category: `Experimental`\n- Requires restart: `yes`\n- Default: `false`", - "default": false, - "type": "boolean" - }, - "agentHistoryTruncationThreshold": { - "title": "Agent History Truncation Threshold", - "description": "The maximum number of messages before history is truncated.", - "markdownDescription": "The maximum number of messages before history is truncated.\n\n- Category: `Experimental`\n- Requires restart: `yes`\n- Default: `30`", - "default": 30, - "type": "number" - }, - "agentHistoryRetainedMessages": { - "title": "Agent History Retained Messages", - "description": "The number of recent messages to retain after truncation.", - "markdownDescription": "The number of recent messages to retain after truncation.\n\n- Category: `Experimental`\n- Requires restart: `yes`\n- Default: `15`", - "default": 15, - "type": "number" - }, - "agentHistorySummarization": { - "title": "Agent History Summarization", - "description": "Enable summarization of truncated content via a small model for the Agent History Provider.", - "markdownDescription": "Enable summarization of truncated content via a small model for the Agent History Provider.\n\n- Category: `Experimental`\n- Requires restart: `yes`\n- Default: `false`", + "contextManagement": { + "title": "Enable Context Management", + "description": "Enable logic for context management.", + "markdownDescription": "Enable logic for context management.\n\n- Category: `Experimental`\n- Requires restart: `yes`\n- Default: `false`", "default": false, "type": "boolean" }, @@ -3144,6 +3123,92 @@ "items": {} } }, + "contextManagement": { + "title": "Context Management", + "description": "Settings for agent history and tool distillation context management.", + "markdownDescription": "Settings for agent history and tool distillation context management.\n\n- Category: `Experimental`\n- Requires restart: `yes`\n- Default: `{}`", + "default": {}, + "type": "object", + "properties": { + "historyWindow": { + "title": "History Window Settings", + "markdownDescription": "Description not provided.\n\n- Category: `Context Management`\n- Requires restart: `yes`\n- Default: `{}`", + "default": {}, + "type": "object", + "properties": { + "maxTokens": { + "title": "Max Tokens", + "description": "The number of tokens to allow before triggering compression.", + "markdownDescription": "The number of tokens to allow before triggering compression.\n\n- Category: `Context Management`\n- Requires restart: `yes`\n- Default: `150000`", + "default": 150000, + "type": "number" + }, + "retainedTokens": { + "title": "Retained Tokens", + "description": "The number of tokens to always retain.", + "markdownDescription": "The number of tokens to always retain.\n\n- Category: `Context Management`\n- Requires restart: `yes`\n- Default: `40000`", + "default": 40000, + "type": "number" + } + }, + "additionalProperties": false + }, + "messageLimits": { + "title": "Message Limits", + "markdownDescription": "Description not provided.\n\n- Category: `Context Management`\n- Requires restart: `yes`\n- Default: `{}`", + "default": {}, + "type": "object", + "properties": { + "normalMaxTokens": { + "title": "Normal Maximum Tokens", + "description": "The target number of tokens to budget for a normal conversation turn.", + "markdownDescription": "The target number of tokens to budget for a normal conversation turn.\n\n- Category: `Context Management`\n- Requires restart: `yes`\n- Default: `2500`", + "default": 2500, + "type": "number" + }, + "retainedMaxTokens": { + "title": "Retained Maximum Tokens", + "description": "The maximum number of tokens a single conversation turn can consume before truncation.", + "markdownDescription": "The maximum number of tokens a single conversation turn can consume before truncation.\n\n- Category: `Context Management`\n- Requires restart: `yes`\n- Default: `12000`", + "default": 12000, + "type": "number" + }, + "normalizationHeadRatio": { + "title": "Normalization Head Ratio", + "description": "The ratio of tokens to retain from the beginning of a truncated message (0.0 to 1.0).", + "markdownDescription": "The ratio of tokens to retain from the beginning of a truncated message (0.0 to 1.0).\n\n- Category: `Context Management`\n- Requires restart: `yes`\n- Default: `0.25`", + "default": 0.25, + "type": "number" + } + }, + "additionalProperties": false + }, + "toolDistillation": { + "title": "Tool Distillation", + "markdownDescription": "Description not provided.\n\n- Category: `Context Management`\n- Requires restart: `yes`\n- Default: `{}`", + "default": {}, + "type": "object", + "properties": { + "maxOutputTokens": { + "title": "Max Output Tokens", + "description": "Maximum tokens to show when truncating large tool outputs.", + "markdownDescription": "Maximum tokens to show when truncating large tool outputs.\n\n- Category: `Context Management`\n- Requires restart: `yes`\n- Default: `10000`", + "default": 10000, + "type": "number" + }, + "summarizationThresholdTokens": { + "title": "Tool Summarization Threshold", + "description": "Threshold above which truncated tool outputs will be summarized by an LLM.", + "markdownDescription": "Threshold above which truncated tool outputs will be summarized by an LLM.\n\n- Category: `Context Management`\n- Requires restart: `yes`\n- Default: `20000`", + "default": 20000, + "type": "number" + } + }, + "additionalProperties": false + } + }, + "additionalProperties": false + }, "admin": { "title": "Admin", "description": "Settings configured remotely by enterprise admins.", From 991dca4c409342c82feb6daf06a774edd6400353 Mon Sep 17 00:00:00 2001 From: Christian Gunderman Date: Mon, 30 Mar 2026 22:30:55 +0000 Subject: [PATCH 005/146] Default enable narration for the team. (#24224) --- .gemini/settings.json | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.gemini/settings.json b/.gemini/settings.json index 9051dc78de..18f81884d2 100644 --- a/.gemini/settings.json +++ b/.gemini/settings.json @@ -3,7 +3,8 @@ "plan": true, "extensionReloading": true, "modelSteering": true, - "memoryManager": true + "memoryManager": true, + "topicUpdateNarration": true }, "general": { "devtools": true From 5b5f87abc755e963862d534968f73a42ded5a9ff Mon Sep 17 00:00:00 2001 From: Abhi <43648792+abhipatel12@users.noreply.github.com> Date: Mon, 30 Mar 2026 19:10:57 -0400 Subject: [PATCH 006/146] fix(core): ensure default agents provide tools and use model-specific schemas (#24268) --- .../core/src/agents/local-executor.test.ts | 26 ++++++++++++++++--- packages/core/src/agents/local-executor.ts | 8 ++++-- 2 files changed, 28 insertions(+), 6 deletions(-) diff --git a/packages/core/src/agents/local-executor.test.ts b/packages/core/src/agents/local-executor.test.ts index 32499bbaf1..ba2a84345a 100644 --- a/packages/core/src/agents/local-executor.test.ts +++ b/packages/core/src/agents/local-executor.test.ts @@ -348,10 +348,9 @@ describe('LocalAgentExecutor', () => { get: () => 'test-prompt-id', configurable: true, }); - parentToolRegistry = new ToolRegistry(mockConfig, mockConfig.messageBus); - parentToolRegistry.registerTool( - new LSTool(mockConfig, mockConfig.messageBus), - ); + const { messageBus } = mockConfig as unknown as { messageBus: MessageBus }; + parentToolRegistry = new ToolRegistry(mockConfig, messageBus); + parentToolRegistry.registerTool(new LSTool(mockConfig, messageBus)); parentToolRegistry.registerTool( new MockTool({ name: READ_FILE_TOOL_NAME }), ); @@ -779,6 +778,25 @@ describe('LocalAgentExecutor', () => { // Assert that there is exactly ONE schema for this tool expect(foundSchemas).toHaveLength(1); }); + + it('should provide tools to the model when toolConfig is OMITTED (default to all tools)', async () => { + const fullDefinition = createTestDefinition(); + const { toolConfig: _, ...definition } = fullDefinition; + + const executor = await LocalAgentExecutor.create( + definition as LocalAgentDefinition, + mockConfig, + onActivity, + ); + + const toolsList = ( + executor as unknown as { prepareToolsList: () => FunctionDeclaration[] } + ).prepareToolsList(); + + // Verify that LS_TOOL_NAME is in the list (since LS was registered in beforeEach) + const toolNames = toolsList.map((t) => t.name); + expect(toolNames).toContain(LS_TOOL_NAME); + }); }); describe('run (Execution Loop and Logic)', () => { diff --git a/packages/core/src/agents/local-executor.ts b/packages/core/src/agents/local-executor.ts index c9e4341f03..113ee18f91 100644 --- a/packages/core/src/agents/local-executor.ts +++ b/packages/core/src/agents/local-executor.ts @@ -1329,9 +1329,13 @@ export class LocalAgentExecutor { toolsList.push(toolRef); } } - // Add schemas from tools that were explicitly registered by name, wildcard, or instance. - toolsList.push(...this.toolRegistry.getFunctionDeclarations()); } + // Add schemas from tools that were explicitly registered by name, wildcard, or instance. + toolsList.push( + ...this.toolRegistry.getFunctionDeclarations( + this.definition.modelConfig.model, + ), + ); // Always inject complete_task. // Configure its schema based on whether output is expected. From 3e95b8ec5966396a12964a636d89289dcc084c6f Mon Sep 17 00:00:00 2001 From: Sehoon Shon Date: Mon, 30 Mar 2026 19:30:33 -0400 Subject: [PATCH 007/146] feat(cli): show Flash Lite Preview model regardless of user tier (#23904) --- .../src/ui/components/ModelDialog.test.tsx | 31 +------------------ .../cli/src/ui/components/ModelDialog.tsx | 7 +---- 2 files changed, 2 insertions(+), 36 deletions(-) diff --git a/packages/cli/src/ui/components/ModelDialog.test.tsx b/packages/cli/src/ui/components/ModelDialog.test.tsx index fd5df5db89..e40f39befc 100644 --- a/packages/cli/src/ui/components/ModelDialog.test.tsx +++ b/packages/cli/src/ui/components/ModelDialog.test.tsx @@ -21,7 +21,6 @@ import { PREVIEW_GEMINI_FLASH_MODEL, PREVIEW_GEMINI_3_1_FLASH_LITE_MODEL, AuthType, - UserTierId, } from '@google/gemini-cli-core'; import type { Config, ModelSlashCommandEvent } from '@google/gemini-cli-core'; @@ -56,7 +55,6 @@ describe('', () => { const mockGetGemini31FlashLiteLaunchedSync = vi.fn(); const mockGetProModelNoAccess = vi.fn(); const mockGetProModelNoAccessSync = vi.fn(); - const mockGetUserTier = vi.fn(); interface MockConfig extends Partial { setModel: (model: string, isTemporary?: boolean) => void; @@ -67,7 +65,6 @@ describe('', () => { getGemini31FlashLiteLaunchedSync: () => boolean; getProModelNoAccess: () => Promise; getProModelNoAccessSync: () => boolean; - getUserTier: () => UserTierId | undefined; } const mockConfig: MockConfig = { @@ -79,7 +76,6 @@ describe('', () => { getGemini31FlashLiteLaunchedSync: mockGetGemini31FlashLiteLaunchedSync, getProModelNoAccess: mockGetProModelNoAccess, getProModelNoAccessSync: mockGetProModelNoAccessSync, - getUserTier: mockGetUserTier, }; beforeEach(() => { @@ -90,7 +86,6 @@ describe('', () => { mockGetGemini31FlashLiteLaunchedSync.mockReturnValue(false); mockGetProModelNoAccess.mockResolvedValue(false); mockGetProModelNoAccessSync.mockReturnValue(false); - mockGetUserTier.mockReturnValue(UserTierId.STANDARD); // Default implementation for getDisplayString mockGetDisplayString.mockImplementation((val: string) => { @@ -136,7 +131,6 @@ describe('', () => { mockGetProModelNoAccess.mockResolvedValue(true); mockGetHasAccessToPreviewModel.mockReturnValue(true); mockGetGemini31FlashLiteLaunchedSync.mockReturnValue(true); - mockGetUserTier.mockReturnValue(UserTierId.FREE); mockGetDisplayString.mockImplementation((val: string) => val); const { lastFrame, unmount } = await renderComponent(); @@ -442,34 +436,11 @@ describe('', () => { unmount(); }); - it('hides Flash Lite Preview model for users with pro access', async () => { - mockGetProModelNoAccessSync.mockReturnValue(false); - mockGetProModelNoAccess.mockResolvedValue(false); - mockGetHasAccessToPreviewModel.mockReturnValue(true); - const { lastFrame, stdin, waitUntilReady, unmount } = - await renderComponent(); - - // Go to manual view - await act(async () => { - stdin.write('\u001B[B'); // Manual - }); - await waitUntilReady(); - await act(async () => { - stdin.write('\r'); - }); - await waitUntilReady(); - - const output = lastFrame(); - expect(output).not.toContain(PREVIEW_GEMINI_3_1_FLASH_LITE_MODEL); - unmount(); - }); - - it('shows Flash Lite Preview model for free tier users', async () => { + it('shows Flash Lite Preview model regardless of tier when flag is enabled', async () => { mockGetProModelNoAccessSync.mockReturnValue(false); mockGetProModelNoAccess.mockResolvedValue(false); mockGetHasAccessToPreviewModel.mockReturnValue(true); mockGetGemini31FlashLiteLaunchedSync.mockReturnValue(true); - mockGetUserTier.mockReturnValue(UserTierId.FREE); const { lastFrame, stdin, waitUntilReady, unmount } = await renderComponent(); diff --git a/packages/cli/src/ui/components/ModelDialog.tsx b/packages/cli/src/ui/components/ModelDialog.tsx index 0bd7918248..618bc353c1 100644 --- a/packages/cli/src/ui/components/ModelDialog.tsx +++ b/packages/cli/src/ui/components/ModelDialog.tsx @@ -23,7 +23,6 @@ import { AuthType, PREVIEW_GEMINI_3_1_CUSTOM_TOOLS_MODEL, isProModel, - UserTierId, } from '@google/gemini-cli-core'; import { useKeypress } from '../hooks/useKeypress.js'; import { theme } from '../semantic-colors.js'; @@ -190,7 +189,6 @@ export function ModelDialog({ onClose }: ModelDialogProps): React.JSX.Element { }, [config, shouldShowPreviewModels, manualModelSelected, useGemini31]); const manualOptions = useMemo(() => { - const isFreeTier = config?.getUserTier() === UserTierId.FREE; // --- DYNAMIC PATH --- if ( config?.getExperimentalDynamicModelConfiguration?.() === true && @@ -207,9 +205,6 @@ export function ModelDialog({ onClose }: ModelDialogProps): React.JSX.Element { if (m.tier === 'auto') return false; // Pro models are shown for users with pro access if (!hasAccessToProModel && m.tier === 'pro') return false; - // 3.1 Preview Flash-lite is only available on free tier - if (m.tier === 'flash-lite' && m.isPreview && !isFreeTier) - return false; // Flag Guard: Versioned models only show if their flag is active. if (id === PREVIEW_GEMINI_3_1_MODEL && !useGemini31) return false; @@ -292,7 +287,7 @@ export function ModelDialog({ onClose }: ModelDialogProps): React.JSX.Element { }, ]; - if (isFreeTier && useGemini31FlashLite) { + if (useGemini31FlashLite) { previewOptions.push({ value: PREVIEW_GEMINI_3_1_FLASH_LITE_MODEL, title: getDisplayString(PREVIEW_GEMINI_3_1_FLASH_LITE_MODEL), From 1df5c98b33b39b3e72dd9ad7afde0b7ac2265e5c Mon Sep 17 00:00:00 2001 From: Jarrod Whelan <150866123+jwhelangoog@users.noreply.github.com> Date: Mon, 30 Mar 2026 16:43:29 -0700 Subject: [PATCH 008/146] feat(cli): implement compact tool output (#20974) --- docs/cli/settings.md | 1 + docs/reference/configuration.md | 5 + packages/cli/src/config/settingsSchema.ts | 10 + packages/cli/src/test-utils/mockConfig.ts | 11 + packages/cli/src/test-utils/render.tsx | 16 + packages/cli/src/ui/AppContainer.tsx | 78 ++- ...ternateBufferQuittingDisplay.test.tsx.snap | 3 + .../__snapshots__/MainContent.test.tsx.snap | 9 +- .../messages/DenseToolMessage.test.tsx | 577 ++++++++++++++++++ .../components/messages/DenseToolMessage.tsx | 563 +++++++++++++++++ .../components/messages/DiffRenderer.test.tsx | 3 + .../ui/components/messages/DiffRenderer.tsx | 155 ++--- .../messages/ShellToolMessage.test.tsx | 143 +++-- .../messages/ToolConfirmationMessage.test.tsx | 17 + .../ToolGroupMessage.compact.test.tsx | 178 ++++++ .../messages/ToolGroupMessage.test.tsx | 2 +- .../components/messages/ToolGroupMessage.tsx | 420 +++++++++---- .../components/messages/ToolResultDisplay.tsx | 29 +- .../ToolStickyHeaderRegression.test.tsx | 6 +- ...snapshot-for-a-Rejected-tool-call.snap.svg | 11 + ...ccepted-file-edit-with-diff-stats.snap.svg | 33 + .../DenseToolMessage.test.tsx.snap | 143 +++++ .../ShellToolMessage.test.tsx.snap | 9 +- .../ToolGroupMessage.compact.test.tsx.snap | 35 ++ .../ToolGroupMessage.test.tsx.snap | 12 +- .../ToolResultDisplay.test.tsx.snap | 3 +- .../ToolStickyHeaderRegression.test.tsx.snap | 4 +- packages/cli/src/ui/constants.ts | 3 + .../ui/contexts/ToolActionsContext.test.tsx | 133 +++- .../src/ui/contexts/ToolActionsContext.tsx | 50 +- packages/cli/src/ui/hooks/useGeminiStream.ts | 139 ++++- packages/cli/src/ui/types.ts | 1 + .../__snapshots__/borderStyles.test.tsx.snap | 9 +- packages/cli/src/ui/utils/fileUtils.ts | 19 + packages/cli/src/ui/utils/toolLayoutUtils.ts | 2 +- packages/core/src/tools/grep-utils.ts | 30 +- packages/core/src/tools/grep.test.ts | 26 +- packages/core/src/tools/grep.ts | 4 +- packages/core/src/tools/ls.test.ts | 33 +- packages/core/src/tools/ls.ts | 12 +- .../core/src/tools/read-many-files.test.ts | 45 +- packages/core/src/tools/read-many-files.ts | 22 +- packages/core/src/tools/ripGrep.test.ts | 37 +- packages/core/src/tools/tools.ts | 8 +- schemas/settings.schema.json | 7 + 45 files changed, 2670 insertions(+), 386 deletions(-) create mode 100644 packages/cli/src/ui/components/messages/DenseToolMessage.test.tsx create mode 100644 packages/cli/src/ui/components/messages/DenseToolMessage.tsx create mode 100644 packages/cli/src/ui/components/messages/ToolGroupMessage.compact.test.tsx create mode 100644 packages/cli/src/ui/components/messages/__snapshots__/DenseToolMessage-DenseToolMessage-Visual-Regression-matches-SVG-snapshot-for-a-Rejected-tool-call.snap.svg create mode 100644 packages/cli/src/ui/components/messages/__snapshots__/DenseToolMessage-DenseToolMessage-Visual-Regression-matches-SVG-snapshot-for-an-Accepted-file-edit-with-diff-stats.snap.svg create mode 100644 packages/cli/src/ui/components/messages/__snapshots__/DenseToolMessage.test.tsx.snap create mode 100644 packages/cli/src/ui/components/messages/__snapshots__/ToolGroupMessage.compact.test.tsx.snap create mode 100644 packages/cli/src/ui/utils/fileUtils.ts diff --git a/docs/cli/settings.md b/docs/cli/settings.md index 7e06221b91..c5d411d2ce 100644 --- a/docs/cli/settings.md +++ b/docs/cli/settings.md @@ -59,6 +59,7 @@ they appear in the UI. | Hide Tips | `ui.hideTips` | Hide helpful tips in the UI | `false` | | Escape Pasted @ Symbols | `ui.escapePastedAtSymbols` | When enabled, @ symbols in pasted text are escaped to prevent unintended @path expansion. | `false` | | Show Shortcuts Hint | `ui.showShortcutsHint` | Show the "? for shortcuts" hint above the input. | `true` | +| Compact Tool Output | `ui.compactToolOutput` | Display tool outputs (like directory listings and file reads) in a compact, structured format. | `false` | | Hide Banner | `ui.hideBanner` | Hide the application banner | `false` | | Hide Context Summary | `ui.hideContextSummary` | Hide the context summary (GEMINI.md, MCP servers) above the input. | `false` | | Hide CWD | `ui.footer.hideCWD` | Hide the current working directory in the footer. | `false` | diff --git a/docs/reference/configuration.md b/docs/reference/configuration.md index 2d57206d47..9c4ef6e6bc 100644 --- a/docs/reference/configuration.md +++ b/docs/reference/configuration.md @@ -257,6 +257,11 @@ their corresponding top-level category object in your `settings.json` file. - **Description:** Show the "? for shortcuts" hint above the input. - **Default:** `true` +- **`ui.compactToolOutput`** (boolean): + - **Description:** Display tool outputs (like directory listings and file + reads) in a compact, structured format. + - **Default:** `false` + - **`ui.hideBanner`** (boolean): - **Description:** Hide the application banner - **Default:** `false` diff --git a/packages/cli/src/config/settingsSchema.ts b/packages/cli/src/config/settingsSchema.ts index a3adf2dea3..d614eabea7 100644 --- a/packages/cli/src/config/settingsSchema.ts +++ b/packages/cli/src/config/settingsSchema.ts @@ -561,6 +561,16 @@ const SETTINGS_SCHEMA = { description: 'Show the "? for shortcuts" hint above the input.', showInDialog: true, }, + compactToolOutput: { + type: 'boolean', + label: 'Compact Tool Output', + category: 'UI', + requiresRestart: false, + default: false, + description: + 'Display tool outputs (like directory listings and file reads) in a compact, structured format.', + showInDialog: true, + }, hideBanner: { type: 'boolean', label: 'Hide Banner', diff --git a/packages/cli/src/test-utils/mockConfig.ts b/packages/cli/src/test-utils/mockConfig.ts index 260bafdf2b..daf109d928 100644 --- a/packages/cli/src/test-utils/mockConfig.ts +++ b/packages/cli/src/test-utils/mockConfig.ts @@ -194,6 +194,17 @@ export function createMockSettings( user: { settings: {} }, workspace: { settings: {} }, errors: [], + subscribe: vi.fn().mockReturnValue(() => {}), + getSnapshot: vi.fn().mockReturnValue({ + system: { settings: {} }, + systemDefaults: { settings: {} }, + user: { settings: {} }, + workspace: { settings: {} }, + isTrusted: true, + errors: [], + merged, + }), + setValue: vi.fn(), ...overrides, merged, } as unknown as LoadedSettings; diff --git a/packages/cli/src/test-utils/render.tsx b/packages/cli/src/test-utils/render.tsx index 6ca30dd8b9..69153d3d6c 100644 --- a/packages/cli/src/test-utils/render.tsx +++ b/packages/cli/src/test-utils/render.tsx @@ -613,6 +613,7 @@ export const renderWithProviders = async ( mouseEventsEnabled = false, config, uiActions, + toolActions, persistentState, appState = mockAppState, }: { @@ -623,6 +624,11 @@ export const renderWithProviders = async ( mouseEventsEnabled?: boolean; config?: Config; uiActions?: Partial; + toolActions?: Partial<{ + isExpanded: (callId: string) => boolean; + toggleExpansion: (callId: string) => void; + toggleAllExpansion: (callIds: string[]) => void; + }>; persistentState?: { get?: typeof persistentStateMock.get; set?: typeof persistentStateMock.set; @@ -710,6 +716,16 @@ export const renderWithProviders = async ( { const [adminSettingsChanged, setAdminSettingsChanged] = useState(false); + const [expandedTools, setExpandedTools] = useState>(new Set()); + + const toggleExpansion = useCallback((callId: string) => { + setExpandedTools((prev) => { + const next = new Set(prev); + if (next.has(callId)) { + next.delete(callId); + } else { + next.add(callId); + } + return next; + }); + }, []); + + const toggleAllExpansion = useCallback((callIds: string[]) => { + setExpandedTools((prev) => { + const next = new Set(prev); + const anyCollapsed = callIds.some((id) => !next.has(id)); + + if (anyCollapsed) { + callIds.forEach((id) => next.add(id)); + } else { + callIds.forEach((id) => next.delete(id)); + } + return next; + }); + }, []); + + const isExpanded = useCallback( + (callId: string) => expandedTools.has(callId), + [expandedTools], + ); + const [shellModeActive, setShellModeActive] = useState(false); const [modelSwitchedFromQuotaError, setModelSwitchedFromQuotaError] = useState(false); @@ -1137,11 +1171,6 @@ Logging in with Google... Restarting Gemini CLI to continue. [pendingSlashCommandHistoryItems, pendingGeminiHistoryItems], ); - const hasPendingToolConfirmation = useMemo( - () => isToolAwaitingConfirmation(pendingHistoryItems), - [pendingHistoryItems], - ); - toggleBackgroundTasksRef.current = toggleBackgroundTasks; isBackgroundTaskVisibleRef.current = isBackgroundTaskVisible; backgroundTasksRef.current = backgroundTasks; @@ -1727,13 +1756,25 @@ Logging in with Google... Restarting Gemini CLI to continue. return true; } + const toggleLastTurnTools = () => { + triggerExpandHint(true); + + const targetToolCallIds = getLastTurnToolCallIds( + historyManager.history, + pendingHistoryItems, + ); + + if (targetToolCallIds.length > 0) { + toggleAllExpansion(targetToolCallIds); + } + }; + let enteringConstrainHeightMode = false; if (!constrainHeight) { enteringConstrainHeightMode = true; setConstrainHeight(true); if (keyMatchers[Command.SHOW_MORE_LINES](key)) { - // If the user manually collapses the view, show the hint and reset the x-second timer. - triggerExpandHint(true); + toggleLastTurnTools(); } if (!isAlternateBuffer) { refreshStatic(); @@ -1781,11 +1822,8 @@ Logging in with Google... Restarting Gemini CLI to continue. !enteringConstrainHeightMode ) { setConstrainHeight(false); - // If the user manually expands the view, show the hint and reset the x-second timer. - triggerExpandHint(true); - if (!isAlternateBuffer) { - refreshStatic(); - } + toggleLastTurnTools(); + refreshStatic(); return true; } else if ( (keyMatchers[Command.FOCUS_SHELL_INPUT](key) || @@ -1890,6 +1928,9 @@ Logging in with Google... Restarting Gemini CLI to continue. triggerExpandHint, keyMatchers, isHelpDismissKey, + historyManager.history, + pendingHistoryItems, + toggleAllExpansion, ], ); @@ -2033,6 +2074,11 @@ Logging in with Google... Restarting Gemini CLI to continue. authState === AuthState.AwaitingApiKeyInput || !!newAgents; + const hasPendingToolConfirmation = useMemo( + () => isToolAwaitingConfirmation(pendingHistoryItems), + [pendingHistoryItems], + ); + const hasConfirmUpdateExtensionRequests = confirmUpdateExtensionRequests.length > 0; const hasLoopDetectionConfirmationRequest = @@ -2639,7 +2685,13 @@ Logging in with Google... Restarting Gemini CLI to continue. startupWarnings: props.startupWarnings || [], }} > - + diff --git a/packages/cli/src/ui/components/__snapshots__/AlternateBufferQuittingDisplay.test.tsx.snap b/packages/cli/src/ui/components/__snapshots__/AlternateBufferQuittingDisplay.test.tsx.snap index d4dc67bbc6..68e202752e 100644 --- a/packages/cli/src/ui/components/__snapshots__/AlternateBufferQuittingDisplay.test.tsx.snap +++ b/packages/cli/src/ui/components/__snapshots__/AlternateBufferQuittingDisplay.test.tsx.snap @@ -43,10 +43,12 @@ Tips for getting started: │ ✓ tool1 Description for tool 1 │ │ │ ╰──────────────────────────────────────────────────────────────────────────╯ + ╭──────────────────────────────────────────────────────────────────────────╮ │ ✓ tool2 Description for tool 2 │ │ │ ╰──────────────────────────────────────────────────────────────────────────╯ + ╭──────────────────────────────────────────────────────────────────────────╮ │ o tool3 Description for tool 3 │ │ │ @@ -93,6 +95,7 @@ Tips for getting started: │ ✓ tool1 Description for tool 1 │ │ │ ╰──────────────────────────────────────────────────────────────────────────╯ + ╭──────────────────────────────────────────────────────────────────────────╮ │ ✓ tool2 Description for tool 2 │ │ │ diff --git a/packages/cli/src/ui/components/__snapshots__/MainContent.test.tsx.snap b/packages/cli/src/ui/components/__snapshots__/MainContent.test.tsx.snap index 0e8e29e54d..07a28039d1 100644 --- a/packages/cli/src/ui/components/__snapshots__/MainContent.test.tsx.snap +++ b/packages/cli/src/ui/components/__snapshots__/MainContent.test.tsx.snap @@ -6,12 +6,11 @@ AppHeader(full) ╭──────────────────────────────────────────────────────────────────────────────────────────────╮ │ ⊶ Shell Command Running a long command... │ │ │ -│ Line 10 │ │ Line 11 │ │ Line 12 │ │ Line 13 │ │ Line 14 │ -│ Line 15 █ │ +│ Line 15 │ │ Line 16 █ │ │ Line 17 █ │ │ Line 18 █ │ @@ -27,12 +26,11 @@ AppHeader(full) ╭──────────────────────────────────────────────────────────────────────────────────────────────╮ │ ⊶ Shell Command Running a long command... │ │ │ -│ Line 10 │ │ Line 11 │ │ Line 12 │ │ Line 13 │ │ Line 14 │ -│ Line 15 █ │ +│ Line 15 │ │ Line 16 █ │ │ Line 17 █ │ │ Line 18 █ │ @@ -47,8 +45,7 @@ exports[`MainContent > MainContent Tool Output Height Logic > 'Normal mode - Con ╭──────────────────────────────────────────────────────────────────────────────────────────────╮ │ ⊶ Shell Command Running a long command... │ │ │ -│ ... first 10 lines hidden (Ctrl+O to show) ... │ -│ Line 11 │ +│ ... first 11 lines hidden (Ctrl+O to show) ... │ │ Line 12 │ │ Line 13 │ │ Line 14 │ diff --git a/packages/cli/src/ui/components/messages/DenseToolMessage.test.tsx b/packages/cli/src/ui/components/messages/DenseToolMessage.test.tsx new file mode 100644 index 0000000000..1767eb10ad --- /dev/null +++ b/packages/cli/src/ui/components/messages/DenseToolMessage.test.tsx @@ -0,0 +1,577 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { describe, it, expect } from 'vitest'; +import { renderWithProviders } from '../../../test-utils/render.js'; +import { DenseToolMessage } from './DenseToolMessage.js'; +import { + CoreToolCallStatus, + type DiffStat, + type FileDiff, + type GrepResult, + type ListDirectoryResult, + type ReadManyFilesResult, + makeFakeConfig, +} from '@google/gemini-cli-core'; +import type { + SerializableConfirmationDetails, + ToolResultDisplay, +} from '../../types.js'; + +import { createMockSettings } from '../../../test-utils/settings.js'; + +describe('DenseToolMessage', () => { + const defaultProps = { + callId: 'call-1', + name: 'test-tool', + description: 'Test description', + status: CoreToolCallStatus.Success, + resultDisplay: 'Success result' as ToolResultDisplay, + confirmationDetails: undefined, + terminalWidth: 80, + }; + + it('renders correctly for a successful string result', async () => { + const { lastFrame, waitUntilReady } = await renderWithProviders( + , + ); + await waitUntilReady(); + const output = lastFrame(); + expect(output).toContain('test-tool'); + expect(output).toContain('Test description'); + expect(output).toContain('→ Success result'); + expect(output).toMatchSnapshot(); + }); + + it('truncates long string results', async () => { + const longResult = 'A'.repeat(200); + const { lastFrame, waitUntilReady } = await renderWithProviders( + , + ); + await waitUntilReady(); + const output = lastFrame(); + expect(output).toContain('…'); + expect(lastFrame()).toMatchSnapshot(); + }); + + it('flattens newlines in string results', async () => { + const multilineResult = 'Line 1\nLine 2'; + const { lastFrame, waitUntilReady } = await renderWithProviders( + , + ); + await waitUntilReady(); + const output = lastFrame(); + expect(output).toContain('→ Line 1 Line 2'); + expect(output).toMatchSnapshot(); + }); + + it('renders correctly for file diff results with stats', async () => { + const diffResult: FileDiff = { + fileDiff: '@@ -1,1 +1,1 @@\n-old line\n+diff content', + fileName: 'test.ts', + filePath: '/path/to/test.ts', + originalContent: 'old content', + newContent: 'new content', + diffStat: { + user_added_lines: 5, + user_removed_lines: 2, + user_added_chars: 50, + user_removed_chars: 20, + model_added_lines: 10, + model_removed_lines: 4, + model_added_chars: 100, + model_removed_chars: 40, + }, + }; + const { lastFrame, waitUntilReady } = await renderWithProviders( + , + {}, + ); + await waitUntilReady(); + const output = lastFrame(); + expect(output).toContain('test.ts → Accepted (+15, -6)'); + expect(output).toContain('diff content'); + expect(output).toMatchSnapshot(); + }); + + it('renders correctly for Edit tool using confirmationDetails', async () => { + const confirmationDetails = { + type: 'edit' as const, + title: 'Confirm Edit', + fileName: 'styles.scss', + filePath: '/path/to/styles.scss', + fileDiff: + '@@ -1,1 +1,1 @@\n-body { color: blue; }\n+body { color: red; }', + originalContent: 'body { color: blue; }', + newContent: 'body { color: red; }', + }; + const { lastFrame, waitUntilReady } = await renderWithProviders( + , + {}, + ); + await waitUntilReady(); + const output = lastFrame(); + expect(output).toContain('Edit'); + expect(output).toContain('styles.scss'); + expect(output).toContain('→ Confirming'); + expect(output).toContain('body { color: red; }'); + expect(output).toMatchSnapshot(); + }); + + it('renders correctly for Rejected Edit tool', async () => { + const diffResult: FileDiff = { + fileDiff: '@@ -1,1 +1,1 @@\n-old line\n+new line', + fileName: 'styles.scss', + filePath: '/path/to/styles.scss', + originalContent: 'old line', + newContent: 'new line', + diffStat: { + user_added_lines: 1, + user_removed_lines: 1, + user_added_chars: 0, + user_removed_chars: 0, + model_added_lines: 0, + model_removed_lines: 0, + model_added_chars: 0, + model_removed_chars: 0, + }, + }; + const { lastFrame, waitUntilReady } = await renderWithProviders( + , + {}, + ); + await waitUntilReady(); + const output = lastFrame(); + expect(output).toContain('Edit'); + expect(output).toContain('styles.scss → Rejected (+1, -1)'); + expect(output).toContain('- old line'); + expect(output).toContain('+ new line'); + expect(output).toMatchSnapshot(); + }); + + it('renders correctly for Rejected Edit tool with confirmationDetails and diffStat', async () => { + const confirmationDetails = { + type: 'edit' as const, + title: 'Confirm Edit', + fileName: 'styles.scss', + filePath: '/path/to/styles.scss', + fileDiff: + '@@ -1,1 +1,1 @@\n-body { color: blue; }\n+body { color: red; }', + originalContent: 'body { color: blue; }', + newContent: 'body { color: red; }', + diffStat: { + user_added_lines: 1, + user_removed_lines: 1, + user_added_chars: 0, + user_removed_chars: 0, + model_added_lines: 0, + model_removed_lines: 0, + model_added_chars: 0, + model_removed_chars: 0, + } as DiffStat, + }; + const { lastFrame, waitUntilReady } = await renderWithProviders( + , + {}, + ); + await waitUntilReady(); + const output = lastFrame(); + expect(output).toContain('Edit'); + expect(output).toContain('styles.scss → Rejected (+1, -1)'); + expect(output).toMatchSnapshot(); + }); + + it('renders correctly for WriteFile tool', async () => { + const diffResult: FileDiff = { + fileDiff: '@@ -1,1 +1,1 @@\n-old content\n+new content', + fileName: 'config.json', + filePath: '/path/to/config.json', + originalContent: 'old content', + newContent: 'new content', + diffStat: { + user_added_lines: 1, + user_removed_lines: 1, + user_added_chars: 0, + user_removed_chars: 0, + model_added_lines: 0, + model_removed_lines: 0, + model_added_chars: 0, + model_removed_chars: 0, + }, + }; + const { lastFrame, waitUntilReady } = await renderWithProviders( + , + {}, + ); + await waitUntilReady(); + const output = lastFrame(); + expect(output).toContain('WriteFile'); + expect(output).toContain('config.json → Accepted (+1, -1)'); + expect(output).toContain('+ new content'); + expect(output).toMatchSnapshot(); + }); + + it('renders correctly for Rejected WriteFile tool', async () => { + const diffResult: FileDiff = { + fileDiff: '@@ -1,1 +1,1 @@\n-old content\n+new content', + fileName: 'config.json', + filePath: '/path/to/config.json', + originalContent: 'old content', + newContent: 'new content', + }; + const { lastFrame, waitUntilReady } = await renderWithProviders( + , + {}, + ); + await waitUntilReady(); + const output = lastFrame(); + expect(output).toContain('WriteFile'); + expect(output).toContain('config.json'); + expect(output).toContain('→ Rejected'); + expect(output).toContain('- old content'); + expect(output).toContain('+ new content'); + expect(output).toMatchSnapshot(); + }); + + it('renders correctly for Errored Edit tool', async () => { + const diffResult: FileDiff = { + fileDiff: '@@ -1,1 +1,1 @@\n-old line\n+new line', + fileName: 'styles.scss', + filePath: '/path/to/styles.scss', + originalContent: 'old line', + newContent: 'new line', + diffStat: { + user_added_lines: 1, + user_removed_lines: 1, + user_added_chars: 0, + user_removed_chars: 0, + model_added_lines: 0, + model_removed_lines: 0, + model_added_chars: 0, + model_removed_chars: 0, + }, + }; + const { lastFrame, waitUntilReady } = await renderWithProviders( + , + ); + await waitUntilReady(); + const output = lastFrame(); + expect(output).toContain('Edit'); + expect(output).toContain('styles.scss → Failed (+1, -1)'); + expect(output).toMatchSnapshot(); + }); + + it('renders correctly for grep results', async () => { + const grepResult: GrepResult = { + summary: 'Found 2 matches', + matches: [ + { + filePath: 'file1.ts', + absolutePath: '/file1.ts', + lineNumber: 10, + line: 'match 1', + }, + { + filePath: 'file2.ts', + absolutePath: '/file2.ts', + lineNumber: 20, + line: 'match 2', + }, + ], + }; + const { lastFrame, waitUntilReady } = await renderWithProviders( + , + ); + await waitUntilReady(); + const output = lastFrame(); + expect(output).toContain('→ Found 2 matches'); + // Matches are rendered in a secondary list for high-signal summaries + expect(output).toContain('file1.ts:10: match 1'); + expect(output).toContain('file2.ts:20: match 2'); + expect(output).toMatchSnapshot(); + }); + + it('renders correctly for ls results', async () => { + const lsResult: ListDirectoryResult = { + summary: 'Listed 2 files. (1 ignored)', + files: ['file1.ts', 'dir1'], + }; + const { lastFrame, waitUntilReady } = await renderWithProviders( + , + ); + await waitUntilReady(); + const output = lastFrame(); + expect(output).toContain('→ Listed 2 files. (1 ignored)'); + // Directory listings should not have a payload in dense mode + expect(output).not.toContain('file1.ts'); + expect(output).not.toContain('dir1'); + expect(output).toMatchSnapshot(); + }); + + it('renders correctly for ReadManyFiles results', async () => { + const rmfResult: ReadManyFilesResult = { + summary: 'Read 3 file(s)', + files: ['file1.ts', 'file2.ts', 'file3.ts'], + include: ['**/*.ts'], + skipped: [{ path: 'skipped.bin', reason: 'binary' }], + }; + const { lastFrame, waitUntilReady } = await renderWithProviders( + , + ); + await waitUntilReady(); + const output = lastFrame(); + expect(output).toContain('Attempting to read files from **/*.ts'); + expect(output).toContain('→ Read 3 file(s) (1 ignored)'); + expect(output).toContain('file1.ts'); + expect(output).toContain('file2.ts'); + expect(output).toContain('file3.ts'); + expect(output).toMatchSnapshot(); + }); + + it('renders correctly for todo updates', async () => { + const todoResult = { + todos: [], + }; + const { lastFrame, waitUntilReady } = await renderWithProviders( + , + ); + await waitUntilReady(); + const output = lastFrame(); + expect(output).toContain('→ Todos updated'); + expect(output).toMatchSnapshot(); + }); + + it('renders generic output message for unknown object results', async () => { + const genericResult = { + some: 'data', + } as unknown as ToolResultDisplay; + const { lastFrame, waitUntilReady } = await renderWithProviders( + , + ); + await waitUntilReady(); + const output = lastFrame(); + expect(output).toContain('→ Returned (possible empty result)'); + expect(output).toMatchSnapshot(); + }); + + it('renders correctly for error status with string message', async () => { + const { lastFrame, waitUntilReady } = await renderWithProviders( + , + ); + await waitUntilReady(); + const output = lastFrame(); + expect(output).toContain('→ Error occurred'); + expect(output).toMatchSnapshot(); + }); + + it('renders generic failure message for error status without string message', async () => { + const { lastFrame, waitUntilReady } = await renderWithProviders( + , + ); + await waitUntilReady(); + const output = lastFrame(); + expect(output).toContain('→ Failed'); + expect(output).toMatchSnapshot(); + }); + + it('does not render result arrow if resultDisplay is missing', async () => { + const { lastFrame, waitUntilReady } = await renderWithProviders( + , + ); + await waitUntilReady(); + const output = lastFrame(); + expect(output).not.toContain('→'); + expect(output).toMatchSnapshot(); + }); + + describe('Toggleable Diff View (Alternate Buffer)', () => { + const diffResult: FileDiff = { + fileDiff: '@@ -1,1 +1,1 @@\n-old line\n+new line', + fileName: 'test.ts', + filePath: '/path/to/test.ts', + originalContent: 'old content', + newContent: 'new content', + }; + + it('hides diff content by default when in alternate buffer mode', async () => { + const { lastFrame, waitUntilReady } = await renderWithProviders( + , + { + config: makeFakeConfig({ useAlternateBuffer: true }), + settings: createMockSettings({ ui: { useAlternateBuffer: true } }), + }, + ); + await waitUntilReady(); + const output = lastFrame(); + expect(output).toContain('Accepted'); + expect(output).not.toContain('new line'); + expect(output).toMatchSnapshot(); + }); + + it('shows diff content by default when NOT in alternate buffer mode', async () => { + const { lastFrame, waitUntilReady } = await renderWithProviders( + , + { + config: makeFakeConfig({ useAlternateBuffer: false }), + settings: createMockSettings({ ui: { useAlternateBuffer: false } }), + }, + ); + await waitUntilReady(); + const output = lastFrame(); + expect(output).toContain('Accepted'); + expect(output).toContain('new line'); + expect(output).toMatchSnapshot(); + }); + + it('shows diff content when expanded via ToolActionsContext', async () => { + const { lastFrame, waitUntilReady } = await renderWithProviders( + , + { + config: makeFakeConfig({ useAlternateBuffer: true }), + settings: createMockSettings({ ui: { useAlternateBuffer: true } }), + toolActions: { + isExpanded: () => true, + }, + }, + ); + await waitUntilReady(); + + // Verify it shows the diff when expanded + expect(lastFrame()).toContain('new line'); + }); + }); + + describe('Visual Regression', () => { + it('matches SVG snapshot for an Accepted file edit with diff stats', async () => { + const diffResult: FileDiff = { + fileName: 'test.ts', + filePath: '/mock/test.ts', + fileDiff: '--- a/test.ts\n+++ b/test.ts\n@@ -1 +1 @@\n-old\n+new', + originalContent: 'old', + newContent: 'new', + diffStat: { + model_added_lines: 1, + model_removed_lines: 1, + model_added_chars: 3, + model_removed_chars: 3, + user_added_lines: 0, + user_removed_lines: 0, + user_added_chars: 0, + user_removed_chars: 0, + }, + }; + + const renderResult = await renderWithProviders( + , + ); + + await renderResult.waitUntilReady(); + await expect(renderResult).toMatchSvgSnapshot(); + }); + + it('matches SVG snapshot for a Rejected tool call', async () => { + const renderResult = await renderWithProviders( + , + ); + + await renderResult.waitUntilReady(); + await expect(renderResult).toMatchSvgSnapshot(); + }); + }); +}); diff --git a/packages/cli/src/ui/components/messages/DenseToolMessage.tsx b/packages/cli/src/ui/components/messages/DenseToolMessage.tsx new file mode 100644 index 0000000000..6e81d07931 --- /dev/null +++ b/packages/cli/src/ui/components/messages/DenseToolMessage.tsx @@ -0,0 +1,563 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import type React from 'react'; +import { useMemo, useState, useRef } from 'react'; +import { Box, Text, type DOMElement } from 'ink'; +import { + CoreToolCallStatus, + type FileDiff, + type ListDirectoryResult, + type ReadManyFilesResult, + isFileDiff, + hasSummary, + isGrepResult, + isListResult, + isReadManyFilesResult, +} from '@google/gemini-cli-core'; +import { + type IndividualToolCallDisplay, + type ToolResultDisplay, + isTodoList, +} from '../../types.js'; +import { useAlternateBuffer } from '../../hooks/useAlternateBuffer.js'; +import { ToolStatusIndicator } from './ToolShared.js'; +import { theme } from '../../semantic-colors.js'; +import { + DiffRenderer, + renderDiffLines, + isNewFile, + parseDiffWithLineNumbers, +} from './DiffRenderer.js'; +import { useMouseClick } from '../../hooks/useMouseClick.js'; +import { ScrollableList } from '../shared/ScrollableList.js'; +import { COMPACT_TOOL_SUBVIEW_MAX_LINES } from '../../constants.js'; +import { useSettings } from '../../contexts/SettingsContext.js'; +import { colorizeCode } from '../../utils/CodeColorizer.js'; +import { useToolActions } from '../../contexts/ToolActionsContext.js'; +import { getFileExtension } from '../../utils/fileUtils.js'; + +const PAYLOAD_MARGIN_LEFT = 6; +const PAYLOAD_BORDER_CHROME_WIDTH = 4; // paddingX=1 (2 cols) + borders (2 cols) +const PAYLOAD_SCROLL_GUTTER = 4; +const PAYLOAD_MAX_WIDTH = 120 + PAYLOAD_SCROLL_GUTTER; + +interface DenseToolMessageProps extends IndividualToolCallDisplay { + terminalWidth: number; + availableTerminalHeight?: number; +} + +interface ViewParts { + // brief description of action + description?: React.ReactNode; + // result summary or status text + summary?: React.ReactNode; + // detailed output, e.g. diff or command output + payload?: React.ReactNode; +} + +interface PayloadResult { + summary: string; + payload: string; +} + +const hasPayload = (res: unknown): res is PayloadResult => { + if (!hasSummary(res)) return false; + if (!('payload' in res)) return false; + + const value = (res as { payload?: unknown }).payload; + return typeof value === 'string'; +}; + +const RenderItemsList: React.FC<{ + items?: string[]; + maxVisible?: number; +}> = ({ items, maxVisible = 20 }) => { + if (!items || items.length === 0) return null; + return ( + + {items.slice(0, maxVisible).map((item, i) => ( + + {item} + + ))} + {items.length > maxVisible && ( + + ... and {items.length - maxVisible} more + + )} + + ); +}; + +function getFileOpData( + diff: FileDiff, + status: CoreToolCallStatus, + resultDisplay: ToolResultDisplay | undefined, + terminalWidth: number, + availableTerminalHeight: number | undefined, + isClickable: boolean, +): ViewParts { + const added = + (diff.diffStat?.model_added_lines ?? 0) + + (diff.diffStat?.user_added_lines ?? 0); + const removed = + (diff.diffStat?.model_removed_lines ?? 0) + + (diff.diffStat?.user_removed_lines ?? 0); + + const isAcceptedOrConfirming = + status === CoreToolCallStatus.Success || + status === CoreToolCallStatus.Executing || + status === CoreToolCallStatus.AwaitingApproval; + + const addColor = isAcceptedOrConfirming + ? theme.status.success + : theme.text.secondary; + const removeColor = isAcceptedOrConfirming + ? theme.status.error + : theme.text.secondary; + + // Always show diff stats if available, using neutral colors for rejected + const showDiffStat = !!diff.diffStat; + + const description = ( + + + {diff.fileName} + + + ); + let resultSummary = ''; + let resultColor = theme.text.secondary; + + if (status === CoreToolCallStatus.AwaitingApproval) { + resultSummary = 'Confirming'; + } else if ( + status === CoreToolCallStatus.Success || + status === CoreToolCallStatus.Executing + ) { + resultSummary = 'Accepted'; + resultColor = theme.text.accent; + } else if (status === CoreToolCallStatus.Cancelled) { + resultSummary = 'Rejected'; + resultColor = theme.status.error; + } else if (status === CoreToolCallStatus.Error) { + resultSummary = + typeof resultDisplay === 'string' ? resultDisplay : 'Failed'; + resultColor = theme.status.error; + } + + const summary = ( + + {resultSummary && ( + + →{' '} + + {resultSummary.replace(/\n/g, ' ')} + + + )} + {showDiffStat && ( + + + {'('} + +{added} + {', '} + -{removed} + {')'} + + + )} + + ); + + const payload = ( + + ); + + return { description, summary, payload }; +} + +function getReadManyFilesData(result: ReadManyFilesResult): ViewParts { + const items = result.files ?? []; + const maxVisible = 10; + const includePatterns = result.include?.join(', ') ?? ''; + const description = ( + + Attempting to read files from {includePatterns} + + ); + + const skippedCount = result.skipped?.length ?? 0; + const summaryStr = `Read ${items.length} file(s)${ + skippedCount > 0 ? ` (${skippedCount} ignored)` : '' + }`; + const summary = → {summaryStr}; + const hasItems = items.length > 0; + const payload = hasItems ? ( + + {hasItems && } + + ) : undefined; + + return { description, summary, payload }; +} + +function getListDirectoryData( + result: ListDirectoryResult, + originalDescription?: string, +): ViewParts { + const description = originalDescription ? ( + + {originalDescription} + + ) : undefined; + const summary = → {result.summary}; + + // For directory listings, we want NO payload in dense mode + return { description, summary, payload: undefined }; +} + +function getListResultData( + result: ListDirectoryResult | ReadManyFilesResult, + originalDescription?: string, +): ViewParts { + if (isReadManyFilesResult(result)) { + return getReadManyFilesData(result); + } + return getListDirectoryData(result, originalDescription); +} + +function getGenericSuccessData( + resultDisplay: unknown, + originalDescription?: string, +): ViewParts { + let summary: React.ReactNode; + let payload: React.ReactNode; + + const description = originalDescription ? ( + + {originalDescription} + + ) : undefined; + + if (typeof resultDisplay === 'string') { + const flattened = resultDisplay.replace(/\n/g, ' ').trim(); + summary = ( + + → {flattened} + + ); + } else if (isGrepResult(resultDisplay)) { + summary = → {resultDisplay.summary}; + const matches = resultDisplay.matches; + if (matches.length > 0) { + payload = ( + + `${m.filePath}:${m.lineNumber}: ${m.line.trim()}`, + )} + maxVisible={10} + /> + + ); + } + } else if (isTodoList(resultDisplay)) { + summary = ( + + → Todos updated + + ); + } else if (hasPayload(resultDisplay)) { + summary = → {resultDisplay.summary}; + payload = ( + + {resultDisplay.payload} + + ); + } else { + summary = ( + + → Returned (possible empty result) + + ); + } + + return { description, summary, payload }; +} + +export const DenseToolMessage: React.FC = (props) => { + const { + callId, + name, + status, + resultDisplay, + confirmationDetails, + outputFile, + terminalWidth, + availableTerminalHeight, + description: originalDescription, + } = props; + + const settings = useSettings(); + const isAlternateBuffer = useAlternateBuffer(); + const { isExpanded: isExpandedInContext, toggleExpansion } = useToolActions(); + + // Handle optional context members + const [localIsExpanded, setLocalIsExpanded] = useState(false); + const isExpanded = isExpandedInContext + ? isExpandedInContext(callId) + : localIsExpanded; + + const [isFocused, setIsFocused] = useState(false); + const toggleRef = useRef(null); + + // Unified File Data Extraction (Safely bridge resultDisplay and confirmationDetails) + const diff = useMemo((): FileDiff | undefined => { + if (isFileDiff(resultDisplay)) return resultDisplay; + if (confirmationDetails?.type === 'edit') { + const details = confirmationDetails; + return { + fileName: details.fileName, + fileDiff: details.fileDiff, + filePath: details.filePath, + originalContent: details.originalContent, + newContent: details.newContent, + diffStat: details.diffStat, + }; + } + return undefined; + }, [resultDisplay, confirmationDetails]); + + const handleToggle = () => { + const next = !isExpanded; + if (!next) { + setIsFocused(false); + } else { + setIsFocused(true); + } + + if (toggleExpansion) { + toggleExpansion(callId); + } else { + setLocalIsExpanded(next); + } + }; + + useMouseClick(toggleRef, handleToggle, { + isActive: isAlternateBuffer && !!diff, + }); + + // State-to-View Coordination + const viewParts = useMemo((): ViewParts => { + if (diff) { + return getFileOpData( + diff, + status, + resultDisplay, + terminalWidth, + availableTerminalHeight, + isAlternateBuffer, + ); + } + if (isListResult(resultDisplay)) { + return getListResultData(resultDisplay, originalDescription); + } + + if (isGrepResult(resultDisplay)) { + return getGenericSuccessData(resultDisplay, originalDescription); + } + + if (status === CoreToolCallStatus.Success && resultDisplay) { + return getGenericSuccessData(resultDisplay, originalDescription); + } + if (status === CoreToolCallStatus.Error) { + const text = + typeof resultDisplay === 'string' + ? resultDisplay.replace(/\n/g, ' ') + : 'Failed'; + const errorSummary = ( + + → {text} + + ); + const descriptionText = originalDescription ? ( + + {originalDescription} + + ) : undefined; + return { + description: descriptionText, + summary: errorSummary, + payload: undefined, + }; + } + + const descriptionText = originalDescription ? ( + + {originalDescription} + + ) : undefined; + return { + description: descriptionText, + summary: undefined, + payload: undefined, + }; + }, [ + diff, + status, + resultDisplay, + terminalWidth, + availableTerminalHeight, + originalDescription, + isAlternateBuffer, + ]); + + const { description, summary } = viewParts; + + const diffLines = useMemo(() => { + if (!diff || !isExpanded || !isAlternateBuffer) return []; + + const parsedLines = parseDiffWithLineNumbers(diff.fileDiff); + const isNewFileResult = isNewFile(parsedLines); + + if (isNewFileResult) { + const addedContent = parsedLines + .filter((line) => line.type === 'add') + .map((line) => line.content) + .join('\n'); + + const fileExtension = getFileExtension(diff.fileName); + + return colorizeCode({ + code: addedContent, + language: fileExtension, + maxWidth: terminalWidth - PAYLOAD_MARGIN_LEFT, + settings, + disableColor: status === CoreToolCallStatus.Cancelled, + returnLines: true, + }); + } else { + return renderDiffLines({ + parsedLines, + filename: diff.fileName, + terminalWidth: terminalWidth - PAYLOAD_MARGIN_LEFT, + disableColor: status === CoreToolCallStatus.Cancelled, + }); + } + }, [diff, isExpanded, isAlternateBuffer, terminalWidth, settings, status]); + + const showPayload = useMemo(() => { + const policy = !isAlternateBuffer || !diff || isExpanded; + if (!policy) return false; + + if (diff) { + if (isAlternateBuffer) { + return isExpanded && diffLines.length > 0; + } + // In non-alternate buffer mode, we always show the diff. + return true; + } + + return !!(viewParts.payload || outputFile); + }, [ + isAlternateBuffer, + diff, + isExpanded, + diffLines.length, + viewParts.payload, + outputFile, + ]); + + const keyExtractor = (_item: React.ReactNode, index: number) => + `diff-line-${index}`; + const renderItem = ({ item }: { item: React.ReactNode }) => ( + {item} + ); + + return ( + + + + + + {name}{' '} + + + + {description} + + {summary && ( + + {summary} + + )} + + + {showPayload && isAlternateBuffer && diffLines.length > 0 && ( + + 1} + hasFocus={isFocused} + width={Math.min( + PAYLOAD_MAX_WIDTH, + terminalWidth - + PAYLOAD_MARGIN_LEFT - + PAYLOAD_BORDER_CHROME_WIDTH - + PAYLOAD_SCROLL_GUTTER, + )} + /> + + )} + + {showPayload && (!isAlternateBuffer || !diff) && viewParts.payload && ( + + {viewParts.payload} + + )} + + {showPayload && outputFile && ( + + + (Output saved to: {outputFile}) + + + )} + + ); +}; diff --git a/packages/cli/src/ui/components/messages/DiffRenderer.test.tsx b/packages/cli/src/ui/components/messages/DiffRenderer.test.tsx index 46b0c0097c..5f75d6e009 100644 --- a/packages/cli/src/ui/components/messages/DiffRenderer.test.tsx +++ b/packages/cli/src/ui/components/messages/DiffRenderer.test.tsx @@ -55,6 +55,7 @@ index 0000000..e69de29 maxWidth: 80, theme: undefined, settings: expect.anything(), + disableColor: false, }), ); }); @@ -89,6 +90,7 @@ index 0000000..e69de29 maxWidth: 80, theme: undefined, settings: expect.anything(), + disableColor: false, }), ); }); @@ -119,6 +121,7 @@ index 0000000..e69de29 maxWidth: 80, theme: undefined, settings: expect.anything(), + disableColor: false, }), ); }); diff --git a/packages/cli/src/ui/components/messages/DiffRenderer.tsx b/packages/cli/src/ui/components/messages/DiffRenderer.tsx index 0859bc13f3..ddee2e55df 100644 --- a/packages/cli/src/ui/components/messages/DiffRenderer.tsx +++ b/packages/cli/src/ui/components/messages/DiffRenderer.tsx @@ -7,21 +7,21 @@ import type React from 'react'; import { useMemo } from 'react'; import { Box, Text, useIsScreenReaderEnabled } from 'ink'; -import crypto from 'node:crypto'; import { colorizeCode, colorizeLine } from '../../utils/CodeColorizer.js'; import { MaxSizedBox } from '../shared/MaxSizedBox.js'; import { theme as semanticTheme } from '../../semantic-colors.js'; import type { Theme } from '../../themes/theme.js'; import { useSettings } from '../../contexts/SettingsContext.js'; +import { getFileExtension } from '../../utils/fileUtils.js'; -interface DiffLine { +export interface DiffLine { type: 'add' | 'del' | 'context' | 'hunk' | 'other'; oldLine?: number; newLine?: number; content: string; } -function parseDiffWithLineNumbers(diffContent: string): DiffLine[] { +export function parseDiffWithLineNumbers(diffContent: string): DiffLine[] { const lines = diffContent.split(/\r?\n/); const result: DiffLine[] = []; let currentOldLine = 0; @@ -88,6 +88,7 @@ interface DiffRendererProps { availableTerminalHeight?: number; terminalWidth: number; theme?: Theme; + disableColor?: boolean; } const DEFAULT_TAB_WIDTH = 4; // Spaces per tab for normalization @@ -99,6 +100,7 @@ export const DiffRenderer: React.FC = ({ availableTerminalHeight, terminalWidth, theme, + disableColor = false, }) => { const settings = useSettings(); @@ -111,17 +113,7 @@ export const DiffRenderer: React.FC = ({ return parseDiffWithLineNumbers(diffContent); }, [diffContent]); - const isNewFile = useMemo(() => { - if (parsedLines.length === 0) return false; - return parsedLines.every( - (line) => - line.type === 'add' || - line.type === 'hunk' || - line.type === 'other' || - line.content.startsWith('diff --git') || - line.content.startsWith('new file mode'), - ); - }, [parsedLines]); + const isNewFileResult = useMemo(() => isNewFile(parsedLines), [parsedLines]); const renderedOutput = useMemo(() => { if (!diffContent || typeof diffContent !== 'string') { @@ -151,14 +143,14 @@ export const DiffRenderer: React.FC = ({ ); } - if (isNewFile) { + if (isNewFileResult) { // Extract only the added lines' content const addedContent = parsedLines .filter((line) => line.type === 'add') .map((line) => line.content) .join('\n'); // Attempt to infer language from filename, default to plain text if no filename - const fileExtension = filename?.split('.').pop() || null; + const fileExtension = getFileExtension(filename); const language = fileExtension ? getLanguageFromExtension(fileExtension) : null; @@ -169,39 +161,71 @@ export const DiffRenderer: React.FC = ({ maxWidth: terminalWidth, theme, settings, + disableColor, }); } else { - return renderDiffContent( - parsedLines, - filename, - tabWidth, - availableTerminalHeight, - terminalWidth, + const key = filename ? `diff-box-${filename}` : undefined; + + return ( + + {renderDiffLines({ + parsedLines, + filename, + tabWidth, + terminalWidth, + disableColor, + })} + ); } }, [ diffContent, parsedLines, screenReaderEnabled, - isNewFile, + isNewFileResult, filename, availableTerminalHeight, terminalWidth, theme, settings, tabWidth, + disableColor, ]); return renderedOutput; }; -const renderDiffContent = ( - parsedLines: DiffLine[], - filename: string | undefined, +export const isNewFile = (parsedLines: DiffLine[]): boolean => { + if (parsedLines.length === 0) return false; + return parsedLines.every( + (line) => + line.type === 'add' || + line.type === 'hunk' || + line.type === 'other' || + line.content.startsWith('diff --git') || + line.content.startsWith('new file mode'), + ); +}; + +export interface RenderDiffLinesOptions { + parsedLines: DiffLine[]; + filename?: string; + tabWidth?: number; + terminalWidth: number; + disableColor?: boolean; +} + +export const renderDiffLines = ({ + parsedLines, + filename, tabWidth = DEFAULT_TAB_WIDTH, - availableTerminalHeight: number | undefined, - terminalWidth: number, -) => { + terminalWidth, + disableColor = false, +}: RenderDiffLinesOptions): React.ReactNode[] => { // 1. Normalize whitespace (replace tabs with spaces) *before* further processing const normalizedLines = parsedLines.map((line) => ({ ...line, @@ -214,15 +238,16 @@ const renderDiffContent = ( ); if (displayableLines.length === 0) { - return ( + return [ No changes detected. - - ); + , + ]; } const maxLineNumber = Math.max( @@ -232,7 +257,7 @@ const renderDiffContent = ( ); const gutterWidth = Math.max(1, maxLineNumber.toString().length); - const fileExtension = filename?.split('.').pop() || null; + const fileExtension = getFileExtension(filename); const language = fileExtension ? getLanguageFromExtension(fileExtension) : null; @@ -252,10 +277,6 @@ const renderDiffContent = ( baseIndentation = 0; } - const key = filename - ? `diff-box-${filename}` - : `diff-box-${crypto.createHash('sha1').update(JSON.stringify(parsedLines)).digest('hex')}`; - let lastLineNumber: number | null = null; const MAX_CONTEXT_LINES_WITHOUT_GAP = 5; @@ -321,12 +342,26 @@ const renderDiffContent = ( const displayContent = line.content.substring(baseIndentation); - const backgroundColor = - line.type === 'add' + const backgroundColor = disableColor + ? undefined + : line.type === 'add' ? semanticTheme.background.diff.added : line.type === 'del' ? semanticTheme.background.diff.removed : undefined; + + const gutterColor = disableColor + ? undefined + : semanticTheme.text.secondary; + + const symbolColor = disableColor + ? undefined + : line.type === 'add' + ? semanticTheme.status.success + : line.type === 'del' + ? semanticTheme.status.error + : undefined; + acc.push( - {gutterNumStr} + {gutterNumStr} {line.type === 'context' ? ( <> {prefixSymbol} - {colorizeLine(displayContent, language)} + + {colorizeLine( + displayContent, + language, + undefined, + disableColor, + )} + ) : ( - - - {prefixSymbol} - {' '} - {colorizeLine(displayContent, language)} + + {prefixSymbol}{' '} + {colorizeLine(displayContent, language, undefined, disableColor)} )} , @@ -371,15 +398,7 @@ const renderDiffContent = ( [], ); - return ( - - {content} - - ); + return content; }; const getLanguageFromExtension = (extension: string): string | null => { diff --git a/packages/cli/src/ui/components/messages/ShellToolMessage.test.tsx b/packages/cli/src/ui/components/messages/ShellToolMessage.test.tsx index 4f703dcfe6..9456ad0f2d 100644 --- a/packages/cli/src/ui/components/messages/ShellToolMessage.test.tsx +++ b/packages/cli/src/ui/components/messages/ShellToolMessage.test.tsx @@ -19,8 +19,12 @@ import { renderWithProviders } from '../../../test-utils/render.js'; import { createMockSettings } from '../../../test-utils/settings.js'; import { makeFakeConfig } from '@google/gemini-cli-core'; import { waitFor } from '../../../test-utils/async.js'; -import { describe, it, expect, vi, beforeEach } from 'vitest'; +import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest'; import { SHELL_COMMAND_NAME, ACTIVE_SHELL_MAX_LINES } from '../../constants.js'; +import { + SHELL_CONTENT_OVERHEAD, + TOOL_RESULT_STANDARD_RESERVED_LINE_COUNT, +} from '../../utils/toolLayoutUtils.js'; describe('', () => { const baseProps: ShellToolMessageProps = { @@ -35,6 +39,7 @@ describe('', () => { isFirst: true, borderColor: 'green', borderDimColor: false, + isExpandable: false, config: { getEnableInteractiveShell: () => true, } as unknown as Config, @@ -52,6 +57,11 @@ describe('', () => { beforeEach(() => { vi.clearAllMocks(); + vi.useFakeTimers(); + }); + + afterEach(() => { + vi.useRealTimers(); }); describe('interactive shell focus', () => { @@ -59,14 +69,14 @@ describe('', () => { ['SHELL_COMMAND_NAME', SHELL_COMMAND_NAME], ['SHELL_TOOL_NAME', SHELL_TOOL_NAME], ])('clicks inside the shell area sets focus for %s', async (_, name) => { - const { lastFrame, simulateClick, unmount } = await renderWithProviders( - , - { uiActions, mouseEventsEnabled: true }, - ); + const { lastFrame, simulateClick, unmount, waitUntilReady } = + await renderWithProviders( + , + { uiActions, mouseEventsEnabled: true }, + ); - await waitFor(() => { - expect(lastFrame()).toContain('A shell command'); - }); + await waitUntilReady(); + expect(lastFrame()).toContain('A shell command'); await simulateClick(2, 2); @@ -75,6 +85,7 @@ describe('', () => { }); unmount(); }); + it('resets focus when shell finishes', async () => { let updateStatus: (s: CoreToolCallStatus) => void = () => {}; @@ -86,19 +97,21 @@ describe('', () => { return ; }; - const { lastFrame, unmount } = await renderWithProviders(, { - uiActions, - uiState: { - streamingState: StreamingState.Idle, - embeddedShellFocused: true, - activePtyId: 1, + const { lastFrame, unmount, waitUntilReady } = await renderWithProviders( + , + { + uiActions, + uiState: { + streamingState: StreamingState.Idle, + embeddedShellFocused: true, + activePtyId: 1, + }, }, - }); + ); // Verify it is initially focused - await waitFor(() => { - expect(lastFrame()).toContain('(Shift+Tab to unfocus)'); - }); + await waitUntilReady(); + expect(lastFrame()).toContain('(Shift+Tab to unfocus)'); // Now update status to Success await act(async () => { @@ -184,29 +197,33 @@ describe('', () => { [ 'respects availableTerminalHeight when it is smaller than ACTIVE_SHELL_MAX_LINES', 10, - 7, + 10 - TOOL_RESULT_STANDARD_RESERVED_LINE_COUNT, // 7 (Header height is 3, but calculation uses reserved=3) false, true, + false, ], [ 'uses ACTIVE_SHELL_MAX_LINES when availableTerminalHeight is large', 100, - ACTIVE_SHELL_MAX_LINES - 4, + ACTIVE_SHELL_MAX_LINES - SHELL_CONTENT_OVERHEAD, // 11 false, true, + false, ], [ 'uses full availableTerminalHeight when focused in alternate buffer mode', 100, - 97, + 100 - TOOL_RESULT_STANDARD_RESERVED_LINE_COUNT, // 97 true, false, + false, ], [ 'defaults to ACTIVE_SHELL_MAX_LINES in alternate buffer when availableTerminalHeight is undefined', undefined, - ACTIVE_SHELL_MAX_LINES - 4, + ACTIVE_SHELL_MAX_LINES - SHELL_CONTENT_OVERHEAD, // 11 false, + true, false, ], ])( @@ -217,29 +234,34 @@ describe('', () => { expectedMaxLines, focused, constrainHeight, + isExpandable, ) => { - const { lastFrame, unmount } = await renderWithProviders( - , - { - uiActions, - config: makeFakeConfig({ useAlternateBuffer: true }), - settings: createMockSettings({ - ui: { useAlternateBuffer: true }, - }), - uiState: { - activePtyId: focused ? 1 : 2, - embeddedShellFocused: focused, - constrainHeight, + const { lastFrame, waitUntilReady, unmount } = + await renderWithProviders( + , + { + uiActions, + config: makeFakeConfig({ useAlternateBuffer: true }), + settings: createMockSettings({ + ui: { useAlternateBuffer: true }, + }), + uiState: { + activePtyId: focused ? 1 : 2, + embeddedShellFocused: focused, + constrainHeight, + }, }, - }, - ); + ); + + await waitUntilReady(); const frame = lastFrame(); expect(frame.match(/Line \d+/g)?.length).toBe(expectedMaxLines); @@ -249,7 +271,7 @@ describe('', () => { ); it('fully expands in standard mode when availableTerminalHeight is undefined', async () => { - const { lastFrame, unmount } = await renderWithProviders( + const { lastFrame, unmount, waitUntilReady } = await renderWithProviders( ', () => { }, ); - await waitFor(() => { - const frame = lastFrame(); - // Should show all 100 lines - expect(frame.match(/Line \d+/g)?.length).toBe(100); - }); + await waitUntilReady(); + const frame = lastFrame(); + // Should show all 100 lines + expect(frame.match(/Line \d+/g)?.length).toBe(100); unmount(); }); it('fully expands in alternate buffer mode when constrainHeight is false and isExpandable is true', async () => { - const { lastFrame, unmount } = await renderWithProviders( + const { lastFrame, unmount, waitUntilReady } = await renderWithProviders( ', () => { }, ); - await waitFor(() => { - const frame = lastFrame(); - // Should show all 100 lines because constrainHeight is false and isExpandable is true - expect(frame.match(/Line \d+/g)?.length).toBe(100); - }); + await waitUntilReady(); + const frame = lastFrame(); + // Should show all 100 lines because constrainHeight is false and isExpandable is true + expect(frame.match(/Line \d+/g)?.length).toBe(100); expect(lastFrame()).toMatchSnapshot(); unmount(); }); it('stays constrained in alternate buffer mode when isExpandable is false even if constrainHeight is false', async () => { - const { lastFrame, unmount } = await renderWithProviders( + const { lastFrame, unmount, waitUntilReady } = await renderWithProviders( ', () => { }, ); - await waitFor(() => { - const frame = lastFrame(); - // Should still be constrained to 11 (15 - 4) because isExpandable is false - expect(frame.match(/Line \d+/g)?.length).toBe(11); - }); + await waitUntilReady(); + const frame = lastFrame(); + // Should still be constrained to 11 (15 - 4) because isExpandable is false + expect(frame.match(/Line \d+/g)?.length).toBe( + ACTIVE_SHELL_MAX_LINES - SHELL_CONTENT_OVERHEAD, + ); expect(lastFrame()).toMatchSnapshot(); unmount(); }); diff --git a/packages/cli/src/ui/components/messages/ToolConfirmationMessage.test.tsx b/packages/cli/src/ui/components/messages/ToolConfirmationMessage.test.tsx index e0f4430c6c..f04b47a63e 100644 --- a/packages/cli/src/ui/components/messages/ToolConfirmationMessage.test.tsx +++ b/packages/cli/src/ui/components/messages/ToolConfirmationMessage.test.tsx @@ -34,6 +34,9 @@ describe('ToolConfirmationMessage', () => { confirm: mockConfirm, cancel: vi.fn(), isDiffingEnabled: false, + isExpanded: vi.fn().mockReturnValue(false), + toggleExpansion: vi.fn(), + toggleAllExpansion: vi.fn(), }); const mockConfig = { @@ -458,7 +461,11 @@ describe('ToolConfirmationMessage', () => { confirm: vi.fn(), cancel: vi.fn(), isDiffingEnabled: false, + isExpanded: vi.fn().mockReturnValue(false), + toggleExpansion: vi.fn(), + toggleAllExpansion: vi.fn(), }); + const { lastFrame, unmount } = await renderWithProviders( { confirm: vi.fn(), cancel: vi.fn(), isDiffingEnabled: false, + isExpanded: vi.fn().mockReturnValue(false), + toggleExpansion: vi.fn(), + toggleAllExpansion: vi.fn(), }); + const { lastFrame, unmount } = await renderWithProviders( { confirm: vi.fn(), cancel: vi.fn(), isDiffingEnabled: true, + isExpanded: vi.fn().mockReturnValue(false), + toggleExpansion: vi.fn(), + toggleAllExpansion: vi.fn(), }); const { lastFrame, unmount } = await renderWithProviders( @@ -728,6 +742,9 @@ describe('ToolConfirmationMessage', () => { confirm: mockConfirm, cancel: vi.fn(), isDiffingEnabled: false, + isExpanded: vi.fn().mockReturnValue(false), + toggleExpansion: vi.fn(), + toggleAllExpansion: vi.fn(), }); const confirmationDetails: SerializableConfirmationDetails = { type: 'info', diff --git a/packages/cli/src/ui/components/messages/ToolGroupMessage.compact.test.tsx b/packages/cli/src/ui/components/messages/ToolGroupMessage.compact.test.tsx new file mode 100644 index 0000000000..659ae48bbf --- /dev/null +++ b/packages/cli/src/ui/components/messages/ToolGroupMessage.compact.test.tsx @@ -0,0 +1,178 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { renderWithProviders } from '../../../test-utils/render.js'; +import { createMockSettings } from '../../../test-utils/mockConfig.js'; +import { ToolGroupMessage } from './ToolGroupMessage.js'; +import { + CoreToolCallStatus, + LS_DISPLAY_NAME, + READ_FILE_DISPLAY_NAME, +} from '@google/gemini-cli-core'; +import { expect, it, describe } from 'vitest'; +import type { IndividualToolCallDisplay } from '../../types.js'; + +describe('ToolGroupMessage Compact Rendering', () => { + const defaultProps = { + item: { + id: '1', + role: 'assistant', + content: '', + timestamp: new Date(), + type: 'help' as const, // Adding type property to satisfy HistoryItem type + }, + terminalWidth: 80, + }; + + const compactSettings = createMockSettings({ + merged: { + ui: { + compactToolOutput: true, + }, + }, + }); + + it('renders consecutive compact tools without empty lines between them', async () => { + const toolCalls: IndividualToolCallDisplay[] = [ + { + callId: 'call1', + name: LS_DISPLAY_NAME, + status: CoreToolCallStatus.Success, + resultDisplay: 'file1.txt\nfile2.txt', + description: 'Listing files', + confirmationDetails: undefined, + isClientInitiated: true, + parentCallId: undefined, + }, + { + callId: 'call2', + name: LS_DISPLAY_NAME, + status: CoreToolCallStatus.Success, + resultDisplay: 'file3.txt', + description: 'Listing files', + confirmationDetails: undefined, + isClientInitiated: true, + parentCallId: undefined, + }, + ]; + + const { lastFrame, waitUntilReady } = await renderWithProviders( + , + { settings: compactSettings }, + ); + + await waitUntilReady(); + const output = lastFrame(); + + expect(output).toMatchSnapshot(); + }); + + it('does not add an extra empty line between a compact tool and a standard tool', async () => { + const toolCalls: IndividualToolCallDisplay[] = [ + { + callId: 'call1', + name: LS_DISPLAY_NAME, + status: CoreToolCallStatus.Success, + resultDisplay: 'file1.txt', + description: 'Listing files', + confirmationDetails: undefined, + isClientInitiated: true, + parentCallId: undefined, + }, + { + callId: 'call2', + name: 'non-compact-tool', + status: CoreToolCallStatus.Success, + resultDisplay: 'some large output', + description: 'Doing something', + confirmationDetails: undefined, + isClientInitiated: true, + parentCallId: undefined, + }, + ]; + + const { lastFrame, waitUntilReady } = await renderWithProviders( + , + { settings: compactSettings }, + ); + + await waitUntilReady(); + const output = lastFrame(); + expect(output).toMatchSnapshot(); + }); + + it('does not add an extra empty line if a compact tool has a dense payload', async () => { + const toolCalls: IndividualToolCallDisplay[] = [ + { + callId: 'call1', + name: LS_DISPLAY_NAME, + status: CoreToolCallStatus.Success, + resultDisplay: 'file1.txt', + description: 'Listing files', + confirmationDetails: undefined, + isClientInitiated: true, + parentCallId: undefined, + }, + { + callId: 'call2', + name: READ_FILE_DISPLAY_NAME, + status: CoreToolCallStatus.Success, + resultDisplay: { + summary: 'read file', + payload: 'file content', + files: ['file.txt'], + }, // Dense payload + description: 'Reading file', + confirmationDetails: undefined, + isClientInitiated: true, + parentCallId: undefined, + }, + ]; + + const { lastFrame, waitUntilReady } = await renderWithProviders( + , + { settings: compactSettings }, + ); + + await waitUntilReady(); + const output = lastFrame(); + expect(output).toMatchSnapshot(); + }); + + it('does not add an extra empty line between a standard tool and a compact tool', async () => { + const toolCalls: IndividualToolCallDisplay[] = [ + { + callId: 'call1', + name: 'non-compact-tool', + status: CoreToolCallStatus.Success, + resultDisplay: 'some large output', + description: 'Doing something', + confirmationDetails: undefined, + isClientInitiated: true, + parentCallId: undefined, + }, + { + callId: 'call2', + name: LS_DISPLAY_NAME, + status: CoreToolCallStatus.Success, + resultDisplay: 'file1.txt', + description: 'Listing files', + confirmationDetails: undefined, + isClientInitiated: true, + parentCallId: undefined, + }, + ]; + + const { lastFrame, waitUntilReady } = await renderWithProviders( + , + { settings: compactSettings }, + ); + + await waitUntilReady(); + const output = lastFrame(); + expect(output).toMatchSnapshot(); + }); +}); diff --git a/packages/cli/src/ui/components/messages/ToolGroupMessage.test.tsx b/packages/cli/src/ui/components/messages/ToolGroupMessage.test.tsx index bfc19e344f..e31c32899f 100644 --- a/packages/cli/src/ui/components/messages/ToolGroupMessage.test.tsx +++ b/packages/cli/src/ui/components/messages/ToolGroupMessage.test.tsx @@ -481,7 +481,7 @@ describe('', () => { ]; const item = createItem(toolCalls); const { lastFrame, unmount } = await renderWithProviders( - + , { diff --git a/packages/cli/src/ui/components/messages/ToolGroupMessage.tsx b/packages/cli/src/ui/components/messages/ToolGroupMessage.tsx index 29ab48a09c..ee740787c2 100644 --- a/packages/cli/src/ui/components/messages/ToolGroupMessage.tsx +++ b/packages/cli/src/ui/components/messages/ToolGroupMessage.tsx @@ -5,7 +5,7 @@ */ import type React from 'react'; -import { useMemo } from 'react'; +import { useMemo, Fragment } from 'react'; import { Box, Text } from 'ink'; import type { HistoryItem, @@ -17,6 +17,7 @@ import { ToolMessage } from './ToolMessage.js'; import { ShellToolMessage } from './ShellToolMessage.js'; import { TopicMessage, isTopicTool } from './TopicMessage.js'; import { SubagentGroupDisplay } from './SubagentGroupDisplay.js'; +import { DenseToolMessage } from './DenseToolMessage.js'; import { theme } from '../../semantic-colors.js'; import { useConfig } from '../../contexts/ConfigContext.js'; import { isShellTool } from './ToolShared.js'; @@ -24,10 +25,84 @@ import { shouldHideToolCall, CoreToolCallStatus, Kind, + EDIT_DISPLAY_NAME, + GLOB_DISPLAY_NAME, + WEB_SEARCH_DISPLAY_NAME, + READ_FILE_DISPLAY_NAME, + LS_DISPLAY_NAME, + GREP_DISPLAY_NAME, + WEB_FETCH_DISPLAY_NAME, + WRITE_FILE_DISPLAY_NAME, + READ_MANY_FILES_DISPLAY_NAME, + isFileDiff, + isGrepResult, + isListResult, } from '@google/gemini-cli-core'; import { useUIState } from '../../contexts/UIStateContext.js'; import { getToolGroupBorderAppearance } from '../../utils/borderStyles.js'; import { useSettings } from '../../contexts/SettingsContext.js'; +import { + TOOL_RESULT_STATIC_HEIGHT, + TOOL_RESULT_STANDARD_RESERVED_LINE_COUNT, +} from '../../utils/toolLayoutUtils.js'; + +const COMPACT_OUTPUT_ALLOWLIST = new Set([ + EDIT_DISPLAY_NAME, + GLOB_DISPLAY_NAME, + WEB_SEARCH_DISPLAY_NAME, + READ_FILE_DISPLAY_NAME, + LS_DISPLAY_NAME, + GREP_DISPLAY_NAME, + WEB_FETCH_DISPLAY_NAME, + WRITE_FILE_DISPLAY_NAME, + READ_MANY_FILES_DISPLAY_NAME, +]); + +// Helper to identify if a tool should use the compact view +export const isCompactTool = ( + tool: IndividualToolCallDisplay, + isCompactModeEnabled: boolean, +): boolean => { + const hasCompactOutputSupport = COMPACT_OUTPUT_ALLOWLIST.has(tool.name); + const displayStatus = mapCoreStatusToDisplayStatus(tool.status); + return ( + isCompactModeEnabled && + hasCompactOutputSupport && + displayStatus !== ToolCallStatus.Confirming + ); +}; + +// Helper to identify if a compact tool has a payload (diff, list, etc.) +export const hasDensePayload = (tool: IndividualToolCallDisplay): boolean => { + if (tool.outputFile) return true; + const res = tool.resultDisplay; + if (!res) return false; + + // TODO(24053): Usage of type guards makes this class too aware of internals + if (isFileDiff(res)) return true; + if (tool.confirmationDetails?.type === 'edit') return true; + if (isGrepResult(res) && res.matches.length > 0) return true; + + // ReadManyFilesResult check (has 'include' and 'files') + if (isListResult(res) && 'include' in res) { + const includeProp = (res as { include?: unknown }).include; + if (Array.isArray(includeProp) && res.files.length > 0) { + return true; + } + } + + // Generic summary/payload pattern + if ( + typeof res === 'object' && + res !== null && + 'summary' in res && + 'payload' in res + ) { + return true; + } + + return false; +}; interface ToolGroupMessageProps { item: HistoryItem | HistoryItemWithoutId; @@ -54,11 +129,13 @@ export const ToolGroupMessage: React.FC = ({ }) => { const settings = useSettings(); const isLowErrorVerbosity = settings.merged.ui?.errorVerbosity !== 'full'; + const isCompactModeEnabled = settings.merged.ui?.compactToolOutput === true; // Filter out tool calls that should be hidden (e.g. in-progress Ask User, or Plan Mode operations). - const toolCalls = useMemo( + const visibleToolCalls = useMemo( () => allToolCalls.filter((t) => { + // Hide internal errors unless full verbosity if ( isLowErrorVerbosity && t.status === CoreToolCallStatus.Error && @@ -66,19 +143,34 @@ export const ToolGroupMessage: React.FC = ({ ) { return false; } + // Standard hiding logic (e.g. Plan Mode internal edits) + if ( + shouldHideToolCall({ + displayName: t.name, + status: t.status, + approvalMode: t.approvalMode, + hasResultDisplay: !!t.resultDisplay, + parentCallId: t.parentCallId, + }) + ) { + return false; + } - return !shouldHideToolCall({ - displayName: t.name, - status: t.status, - approvalMode: t.approvalMode, - hasResultDisplay: !!t.resultDisplay, - parentCallId: t.parentCallId, - }); + // We HIDE tools that are still in pre-execution states (Confirming, Pending) + // from the History log. They live in the Global Queue or wait for their turn. + // Only show tools that are actually running or finished. + const displayStatus = mapCoreStatusToDisplayStatus(t.status); + + // We hide Confirming tools from the history log because they are + // currently being rendered in the interactive ToolConfirmationQueue. + // We show everything else, including Pending (waiting to run) and + // Canceled (rejected by user), to ensure the history is complete + // and to avoid tools "vanishing" after approval. + return displayStatus !== ToolCallStatus.Confirming; }), [allToolCalls, isLowErrorVerbosity], ); - const config = useConfig(); const { activePtyId, embeddedShellFocused, @@ -86,6 +178,8 @@ export const ToolGroupMessage: React.FC = ({ pendingHistoryItems, } = useUIState(); + const config = useConfig(); + const { borderColor, borderDimColor } = useMemo( () => getToolGroupBorderAppearance( @@ -104,41 +198,6 @@ export const ToolGroupMessage: React.FC = ({ ], ); - // We HIDE tools that are still in pre-execution states (Confirming, Pending) - // from the History log. They live in the Global Queue or wait for their turn. - // Only show tools that are actually running or finished. - // We explicitly exclude Pending and Confirming to ensure they only - // appear in the Global Queue until they are approved and start executing. - const visibleToolCalls = useMemo( - () => - toolCalls.filter((t) => { - const displayStatus = mapCoreStatusToDisplayStatus(t.status); - // We hide Confirming tools from the history log because they are - // currently being rendered in the interactive ToolConfirmationQueue. - // We show everything else, including Pending (waiting to run) and - // Canceled (rejected by user), to ensure the history is complete - // and to avoid tools "vanishing" after approval. - return displayStatus !== ToolCallStatus.Confirming; - }), - - [toolCalls], - ); - - const staticHeight = /* border */ 2; - - let countToolCallsWithResults = 0; - for (const tool of visibleToolCalls) { - if ( - tool.kind !== Kind.Agent && - tool.resultDisplay !== undefined && - tool.resultDisplay !== '' - ) { - countToolCallsWithResults++; - } - } - const countOneLineToolCalls = - visibleToolCalls.filter((t) => t.kind !== Kind.Agent).length - - countToolCallsWithResults; const groupedTools = useMemo(() => { const groups: Array< IndividualToolCallDisplay | IndividualToolCallDisplay[] @@ -158,10 +217,81 @@ export const ToolGroupMessage: React.FC = ({ return groups; }, [visibleToolCalls]); + const staticHeight = useMemo(() => { + let height = 0; + for (let i = 0; i < groupedTools.length; i++) { + const group = groupedTools[i]; + const isFirst = i === 0; + const isLast = i === groupedTools.length - 1; + const prevGroup = i > 0 ? groupedTools[i - 1] : null; + const prevIsCompact = + prevGroup && + !Array.isArray(prevGroup) && + isCompactTool(prevGroup, isCompactModeEnabled); + + const nextGroup = !isLast ? groupedTools[i + 1] : null; + const nextIsCompact = + nextGroup && + !Array.isArray(nextGroup) && + isCompactTool(nextGroup, isCompactModeEnabled); + + const isAgentGroup = Array.isArray(group); + const isCompact = + !isAgentGroup && isCompactTool(group, isCompactModeEnabled); + + const showClosingBorder = !isCompact && (nextIsCompact || isLast); + + if (isFirst) { + height += borderTopOverride ? 1 : 0; + } else if (isCompact !== prevIsCompact) { + // Add a 1-line gap when transitioning between compact and standard tools (or vice versa) + height += 1; + } + + const isFirstProp = !!(isFirst + ? (borderTopOverride ?? true) + : prevIsCompact); + + if (isAgentGroup) { + // Agent group + height += 1; // Header + height += group.length; // 1 line per agent + if (isFirstProp) height += 1; // Top border + if (showClosingBorder) height += 1; // Bottom border + } else { + if (isCompact) { + height += 1; // Base height for compact tool + } else { + // Static overhead for standard tool header: + height += + TOOL_RESULT_STATIC_HEIGHT + + TOOL_RESULT_STANDARD_RESERVED_LINE_COUNT; + } + } + } + return height; + }, [groupedTools, isCompactModeEnabled, borderTopOverride]); + + let countToolCallsWithResults = 0; + for (const tool of visibleToolCalls) { + if (tool.kind !== Kind.Agent) { + if (isCompactTool(tool, isCompactModeEnabled)) { + if (hasDensePayload(tool)) { + countToolCallsWithResults++; + } + } else if ( + tool.resultDisplay !== undefined && + tool.resultDisplay !== '' + ) { + countToolCallsWithResults++; + } + } + } + const availableTerminalHeightPerToolMessage = availableTerminalHeight ? Math.max( Math.floor( - (availableTerminalHeight - staticHeight - countOneLineToolCalls) / + (availableTerminalHeight - staticHeight) / Math.max(1, countToolCallsWithResults), ), 1, @@ -176,7 +306,11 @@ export const ToolGroupMessage: React.FC = ({ // explicit "closing slice" (tools: []) used to bridge static/pending sections, // and only if it's actually continuing an open box from above. const isExplicitClosingSlice = allToolCalls.length === 0; - if (visibleToolCalls.length === 0 && !isExplicitClosingSlice) { + const shouldShowGroup = + visibleToolCalls.length > 0 || + (isExplicitClosingSlice && borderBottomOverride === true); + + if (!shouldShowGroup) { return null; } @@ -191,7 +325,24 @@ export const ToolGroupMessage: React.FC = ({ */ width={terminalWidth} paddingRight={TOOL_MESSAGE_HORIZONTAL_MARGIN} + // When border will be present, add margin of 1 to create spacing from the + // previous message. + marginBottom={(borderBottomOverride ?? true) ? 1 : 0} > + {visibleToolCalls.length === 0 && + isExplicitClosingSlice && + borderBottomOverride === true && ( + + )} {groupedTools.map((group, index) => { let isFirst = index === 0; if (!isFirst) { @@ -207,98 +358,149 @@ export const ToolGroupMessage: React.FC = ({ isFirst = allPreviousWereTopics; } - const resolvedIsFirst = - borderTopOverride !== undefined - ? borderTopOverride && isFirst - : isFirst; + const isLast = index === groupedTools.length - 1; - if (Array.isArray(group)) { + const prevGroup = index > 0 ? groupedTools[index - 1] : null; + const prevIsCompact = + prevGroup && + !Array.isArray(prevGroup) && + isCompactTool(prevGroup, isCompactModeEnabled); + + const nextGroup = !isLast ? groupedTools[index + 1] : null; + const nextIsCompact = + nextGroup && + !Array.isArray(nextGroup) && + isCompactTool(nextGroup, isCompactModeEnabled); + + const isAgentGroup = Array.isArray(group); + const isCompact = + !isAgentGroup && isCompactTool(group, isCompactModeEnabled); + const isTopicToolCall = !isAgentGroup && isTopicTool(group.name); + + // When border is present, add margin of 1 to create spacing from the + // previous message. + let marginTop = 0; + if (isFirst) { + marginTop = (borderTopOverride ?? false) ? 1 : 0; + } else if (isCompact && prevIsCompact) { + marginTop = 0; + } else if (isCompact || prevIsCompact) { + marginTop = 1; + } else { + // For subsequent standard tools scenarios, the ToolMessage and + // ShellToolMessage components manage their own top spacing by passing + // `isFirst=false` to their internal StickyHeader which then applies + // a paddingTop=1 to create desired gap between standard tool outputs. + marginTop = 0; + } + + const isFirstProp = !!(isFirst + ? (borderTopOverride ?? true) + : prevIsCompact); + + const showClosingBorder = + !isCompact && !isTopicToolCall && (nextIsCompact || isLast); + + if (isAgentGroup) { return ( - + marginTop={marginTop} + flexDirection="column" + width={contentWidth} + > + + {showClosingBorder && ( + + )} + ); } const tool = group; const isShellToolCall = isShellTool(tool.name); - const isTopicToolCall = isTopicTool(tool.name); const commonProps = { ...tool, availableTerminalHeight: availableTerminalHeightPerToolMessage, terminalWidth: contentWidth, emphasis: 'medium' as const, - isFirst: resolvedIsFirst, + isFirst: isCompact ? false : isFirstProp, borderColor, borderDimColor, isExpandable, }; return ( - - {isTopicToolCall ? ( - - ) : isShellToolCall ? ( - - ) : ( - - )} - {tool.outputFile && ( + + + {isCompact ? ( + + ) : isTopicToolCall ? ( + + ) : isShellToolCall ? ( + + ) : ( + + )} + {!isCompact && tool.outputFile && ( + + + + Output too long and was saved to: {tool.outputFile} + + + + )} + + {showClosingBorder && ( - - - Output too long and was saved to: {tool.outputFile} - - - + /> )} - + ); })} - {/* - We have to keep the bottom border separate so it doesn't get - drawn over by the sticky header directly inside it. - */} - {(visibleToolCalls.length > 0 || borderBottomOverride !== undefined) && - borderBottomOverride !== false && - (visibleToolCalls.length === 0 || - !visibleToolCalls.every((tool) => isTopicTool(tool.name))) && ( - - )} ); diff --git a/packages/cli/src/ui/components/messages/ToolResultDisplay.tsx b/packages/cli/src/ui/components/messages/ToolResultDisplay.tsx index 3b7cfaa8da..4b51ae8ab8 100644 --- a/packages/cli/src/ui/components/messages/ToolResultDisplay.tsx +++ b/packages/cli/src/ui/components/messages/ToolResultDisplay.tsx @@ -15,6 +15,7 @@ import { type AnsiOutput, type AnsiLine, isSubagentProgress, + isStructuredToolResult, } from '@google/gemini-cli-core'; import { useUIState } from '../../contexts/UIStateContext.js'; import { tryParseJSON } from '../../../utils/jsonoutput.js'; @@ -123,7 +124,28 @@ export const ToolResultDisplay: React.FC = ({ {contentData} ); - } else if (typeof contentData === 'object' && 'fileDiff' in contentData) { + } else if (isStructuredToolResult(contentData)) { + if (renderOutputAsMarkdown) { + content = ( + + ); + } else { + content = ( + + {contentData.summary} + + ); + } + } else if ( + typeof contentData === 'object' && + contentData !== null && + 'fileDiff' in contentData + ) { content = ( = ({ // Final render based on session mode if (isAlternateBuffer) { + // Use maxLines if provided, otherwise fall back to the calculated available height + const effectiveMaxHeight = maxLines ?? availableHeight; + return ( { expect(lastFrame()).toContain('tool-1'); }); expect(lastFrame()).toContain('Description for tool-1'); - // Content lines 1-4 should be scrolled off + // Content lines 1-5 should be scrolled off expect(lastFrame()).not.toContain('c1-01'); - expect(lastFrame()).not.toContain('c1-04'); - // Line 6 and 7 should be visible (terminalHeight=5 means only 2 lines of content show below 3-line header) + expect(lastFrame()).not.toContain('c1-05'); + // Line 6 and 7 should be visible (terminalHeight=5 means 2 lines of content show below 3-line header) expect(lastFrame()).toContain('c1-06'); expect(lastFrame()).toContain('c1-07'); expect(lastFrame()).toMatchSnapshot(); diff --git a/packages/cli/src/ui/components/messages/__snapshots__/DenseToolMessage-DenseToolMessage-Visual-Regression-matches-SVG-snapshot-for-a-Rejected-tool-call.snap.svg b/packages/cli/src/ui/components/messages/__snapshots__/DenseToolMessage-DenseToolMessage-Visual-Regression-matches-SVG-snapshot-for-a-Rejected-tool-call.snap.svg new file mode 100644 index 0000000000..96d89e7416 --- /dev/null +++ b/packages/cli/src/ui/components/messages/__snapshots__/DenseToolMessage-DenseToolMessage-Visual-Regression-matches-SVG-snapshot-for-a-Rejected-tool-call.snap.svg @@ -0,0 +1,11 @@ + + + + + - + read_file + Reading important.txt + + \ No newline at end of file diff --git a/packages/cli/src/ui/components/messages/__snapshots__/DenseToolMessage-DenseToolMessage-Visual-Regression-matches-SVG-snapshot-for-an-Accepted-file-edit-with-diff-stats.snap.svg b/packages/cli/src/ui/components/messages/__snapshots__/DenseToolMessage-DenseToolMessage-Visual-Regression-matches-SVG-snapshot-for-an-Accepted-file-edit-with-diff-stats.snap.svg new file mode 100644 index 0000000000..7b21bd65a0 --- /dev/null +++ b/packages/cli/src/ui/components/messages/__snapshots__/DenseToolMessage-DenseToolMessage-Visual-Regression-matches-SVG-snapshot-for-an-Accepted-file-edit-with-diff-stats.snap.svg @@ -0,0 +1,33 @@ + + + + + + edit + test.ts + → Accepted + ( + +1 + , + -1 + ) + + 1 + + + - + + + old + + 1 + + + + + + + new + + \ No newline at end of file diff --git a/packages/cli/src/ui/components/messages/__snapshots__/DenseToolMessage.test.tsx.snap b/packages/cli/src/ui/components/messages/__snapshots__/DenseToolMessage.test.tsx.snap new file mode 100644 index 0000000000..d08b84c1a9 --- /dev/null +++ b/packages/cli/src/ui/components/messages/__snapshots__/DenseToolMessage.test.tsx.snap @@ -0,0 +1,143 @@ +// Vitest Snapshot v1, https://vitest.dev/guide/snapshot.html + +exports[`DenseToolMessage > Toggleable Diff View (Alternate Buffer) > hides diff content by default when in alternate buffer mode 1`] = ` +" ✓ test-tool test.ts → Accepted +" +`; + +exports[`DenseToolMessage > Toggleable Diff View (Alternate Buffer) > shows diff content by default when NOT in alternate buffer mode 1`] = ` +" ✓ test-tool test.ts → Accepted + + 1 - old line + 1 + new line +" +`; + +exports[`DenseToolMessage > Visual Regression > matches SVG snapshot for a Rejected tool call 1`] = `" - read_file Reading important.txt"`; + +exports[`DenseToolMessage > Visual Regression > matches SVG snapshot for an Accepted file edit with diff stats 1`] = ` +" ✓ edit test.ts → Accepted (+1, -1) + + 1 - old + 1 + new +" +`; + +exports[`DenseToolMessage > does not render result arrow if resultDisplay is missing 1`] = ` +" o test-tool Test description +" +`; + +exports[`DenseToolMessage > flattens newlines in string results 1`] = ` +" ✓ test-tool Test description → Line 1 Line 2 +" +`; + +exports[`DenseToolMessage > renders correctly for Edit tool using confirmationDetails 1`] = ` +" ? Edit styles.scss → Confirming + + 1 - body { color: blue; } + 1 + body { color: red; } +" +`; + +exports[`DenseToolMessage > renders correctly for Errored Edit tool 1`] = ` +" x Edit styles.scss → Failed (+1, -1) + + 1 - old line + 1 + new line +" +`; + +exports[`DenseToolMessage > renders correctly for ReadManyFiles results 1`] = ` +" ✓ test-tool Attempting to read files from **/*.ts → Read 3 file(s) (1 ignored) + + file1.ts + file2.ts + file3.ts +" +`; + +exports[`DenseToolMessage > renders correctly for Rejected Edit tool 1`] = ` +" - Edit styles.scss → Rejected (+1, -1) + + 1 - old line + 1 + new line +" +`; + +exports[`DenseToolMessage > renders correctly for Rejected Edit tool with confirmationDetails and diffStat 1`] = ` +" - Edit styles.scss → Rejected (+1, -1) + + 1 - body { color: blue; } + 1 + body { color: red; } +" +`; + +exports[`DenseToolMessage > renders correctly for Rejected WriteFile tool 1`] = ` +" - WriteFile config.json → Rejected + + 1 - old content + 1 + new content +" +`; + +exports[`DenseToolMessage > renders correctly for WriteFile tool 1`] = ` +" ✓ WriteFile config.json → Accepted (+1, -1) + + 1 - old content + 1 + new content +" +`; + +exports[`DenseToolMessage > renders correctly for a successful string result 1`] = ` +" ✓ test-tool Test description → Success result +" +`; + +exports[`DenseToolMessage > renders correctly for error status with string message 1`] = ` +" x test-tool Test description → Error occurred +" +`; + +exports[`DenseToolMessage > renders correctly for file diff results with stats 1`] = ` +" ✓ test-tool test.ts → Accepted (+15, -6) + + 1 - old line + 1 + diff content +" +`; + +exports[`DenseToolMessage > renders correctly for grep results 1`] = ` +" ✓ test-tool Test description → Found 2 matches + + file1.ts:10: match 1 + file2.ts:20: match 2 +" +`; + +exports[`DenseToolMessage > renders correctly for ls results 1`] = ` +" ✓ test-tool Test description → Listed 2 files. (1 ignored) +" +`; + +exports[`DenseToolMessage > renders correctly for todo updates 1`] = ` +" ✓ test-tool Test description → Todos updated +" +`; + +exports[`DenseToolMessage > renders generic failure message for error status without string message 1`] = ` +" x test-tool Test description → Failed +" +`; + +exports[`DenseToolMessage > renders generic output message for unknown object results 1`] = ` +" ✓ test-tool Test description → Returned (possible empty result) +" +`; + +exports[`DenseToolMessage > truncates long string results 1`] = ` +" ✓ test-tool Test description + → AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA… +" +`; diff --git a/packages/cli/src/ui/components/messages/__snapshots__/ShellToolMessage.test.tsx.snap b/packages/cli/src/ui/components/messages/__snapshots__/ShellToolMessage.test.tsx.snap index 967ea81e14..38700b92de 100644 --- a/packages/cli/src/ui/components/messages/__snapshots__/ShellToolMessage.test.tsx.snap +++ b/packages/cli/src/ui/components/messages/__snapshots__/ShellToolMessage.test.tsx.snap @@ -4,7 +4,6 @@ exports[` > Height Constraints > defaults to ACTIVE_SHELL_MA "╭──────────────────────────────────────────────────────────────────────────────╮ │ ⊶ Shell Command A shell command │ │ │ -│ Line 90 │ │ Line 91 │ │ Line 92 │ │ Line 93 │ @@ -129,7 +128,6 @@ exports[` > Height Constraints > respects availableTerminalH "╭──────────────────────────────────────────────────────────────────────────────╮ │ ⊶ Shell Command A shell command │ │ │ -│ Line 94 │ │ Line 95 │ │ Line 96 │ │ Line 97 │ @@ -143,7 +141,6 @@ exports[` > Height Constraints > stays constrained in altern "╭──────────────────────────────────────────────────────────────────────────────╮ │ ✓ Shell Command A shell command │ │ │ -│ Line 90 │ │ Line 91 │ │ Line 92 │ │ Line 93 │ @@ -161,7 +158,6 @@ exports[` > Height Constraints > uses ACTIVE_SHELL_MAX_LINES "╭──────────────────────────────────────────────────────────────────────────────╮ │ ⊶ Shell Command A shell command │ │ │ -│ Line 90 │ │ Line 91 │ │ Line 92 │ │ Line 93 │ @@ -179,11 +175,10 @@ exports[` > Height Constraints > uses full availableTerminal "╭──────────────────────────────────────────────────────────────────────────────╮ │ ⊶ Shell Command A shell command (Shift+Tab to unfocus) │ │ │ -│ Line 4 │ │ Line 5 │ │ Line 6 │ -│ Line 7 █ │ -│ Line 8 █ │ +│ Line 7 │ +│ Line 8 │ │ Line 9 █ │ │ Line 10 █ │ │ Line 11 █ │ diff --git a/packages/cli/src/ui/components/messages/__snapshots__/ToolGroupMessage.compact.test.tsx.snap b/packages/cli/src/ui/components/messages/__snapshots__/ToolGroupMessage.compact.test.tsx.snap new file mode 100644 index 0000000000..37b111ed1e --- /dev/null +++ b/packages/cli/src/ui/components/messages/__snapshots__/ToolGroupMessage.compact.test.tsx.snap @@ -0,0 +1,35 @@ +// Vitest Snapshot v1, https://vitest.dev/guide/snapshot.html + +exports[`ToolGroupMessage Compact Rendering > does not add an extra empty line between a compact tool and a standard tool 1`] = ` +" ✓ ReadFolder Listing files → file1.txt + +╭──────────────────────────────────────────────────────────────────────────╮ +│ ✓ non-compact-tool Doing something │ +│ │ +│ some large output │ +╰──────────────────────────────────────────────────────────────────────────╯ +" +`; + +exports[`ToolGroupMessage Compact Rendering > does not add an extra empty line between a standard tool and a compact tool 1`] = ` +"╭──────────────────────────────────────────────────────────────────────────╮ +│ ✓ non-compact-tool Doing something │ +│ │ +│ some large output │ +╰──────────────────────────────────────────────────────────────────────────╯ + + ✓ ReadFolder Listing files → file1.txt +" +`; + +exports[`ToolGroupMessage Compact Rendering > does not add an extra empty line if a compact tool has a dense payload 1`] = ` +" ✓ ReadFolder Listing files → file1.txt + ✓ ReadFile Reading file → read file +" +`; + +exports[`ToolGroupMessage Compact Rendering > renders consecutive compact tools without empty lines between them 1`] = ` +" ✓ ReadFolder Listing files → file1.txt file2.txt + ✓ ReadFolder Listing files → file3.txt +" +`; diff --git a/packages/cli/src/ui/components/messages/__snapshots__/ToolGroupMessage.test.tsx.snap b/packages/cli/src/ui/components/messages/__snapshots__/ToolGroupMessage.test.tsx.snap index e5a69fb2bf..a3af0178a5 100644 --- a/packages/cli/src/ui/components/messages/__snapshots__/ToolGroupMessage.test.tsx.snap +++ b/packages/cli/src/ui/components/messages/__snapshots__/ToolGroupMessage.test.tsx.snap @@ -63,7 +63,8 @@ exports[` > Golden Snapshots > renders empty tool calls arra exports[` > Golden Snapshots > renders header when scrolled 1`] = ` "╭──────────────────────────────────────────────────────────────────────────╮ │ ✓ tool-1 Description 1. This is a long description that will need to b… │ -│──────────────────────────────────────────────────────────────────────────│ +│──────────────────────────────────────────────────────────────────────────│ ▄ +│ line4 │ █ │ line5 │ █ │ │ █ │ ✓ tool-2 Description 2 │ █ @@ -71,6 +72,7 @@ exports[` > Golden Snapshots > renders header when scrolled │ line1 │ █ │ line2 │ █ ╰──────────────────────────────────────────────────────────────────────────╯ █ + █ " `; @@ -129,12 +131,12 @@ exports[` > Golden Snapshots > renders tool call with output `; exports[` > Golden Snapshots > renders two tool groups where only the last line of the previous group is visible 1`] = ` -"╰──────────────────────────────────────────────────────────────────────────╯ -╭──────────────────────────────────────────────────────────────────────────╮ +"╭──────────────────────────────────────────────────────────────────────────╮ │ ✓ tool-2 Description 2 │ -│ │ ▄ -│ line1 │ █ +│ │ +│ line1 │ ▄ ╰──────────────────────────────────────────────────────────────────────────╯ █ + █ " `; diff --git a/packages/cli/src/ui/components/messages/__snapshots__/ToolResultDisplay.test.tsx.snap b/packages/cli/src/ui/components/messages/__snapshots__/ToolResultDisplay.test.tsx.snap index e34e66cc48..f4b3a35884 100644 --- a/packages/cli/src/ui/components/messages/__snapshots__/ToolResultDisplay.test.tsx.snap +++ b/packages/cli/src/ui/components/messages/__snapshots__/ToolResultDisplay.test.tsx.snap @@ -37,8 +37,7 @@ exports[`ToolResultDisplay > renders string result as plain text when renderOutp `; exports[`ToolResultDisplay > truncates very long string results 1`] = ` -"... 249 hidden (Ctrl+O) ... -aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa +"... 250 hidden (Ctrl+O) ... aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa diff --git a/packages/cli/src/ui/components/messages/__snapshots__/ToolStickyHeaderRegression.test.tsx.snap b/packages/cli/src/ui/components/messages/__snapshots__/ToolStickyHeaderRegression.test.tsx.snap index dda93c1c21..66ca527b4b 100644 --- a/packages/cli/src/ui/components/messages/__snapshots__/ToolStickyHeaderRegression.test.tsx.snap +++ b/packages/cli/src/ui/components/messages/__snapshots__/ToolStickyHeaderRegression.test.tsx.snap @@ -2,7 +2,7 @@ exports[`ToolMessage Sticky Header Regression > verifies that ShellToolMessage in a ToolGroupMessage in a ScrollableList has sticky headers 1`] = ` "╭────────────────────────────────────────────────────────────────────────╮ █ -│ ✓ Shell Command Description for Shell Command │ █ +│ ✓ Shell Command Description for Shell Command │ ▀ │ │ │ shell-01 │ │ shell-02 │ @@ -11,7 +11,7 @@ exports[`ToolMessage Sticky Header Regression > verifies that ShellToolMessage i exports[`ToolMessage Sticky Header Regression > verifies that ShellToolMessage in a ToolGroupMessage in a ScrollableList has sticky headers 2`] = ` "╭────────────────────────────────────────────────────────────────────────╮ -│ ✓ Shell Command Description for Shell Command │ ▄ +│ ✓ Shell Command Description for Shell Command │ │────────────────────────────────────────────────────────────────────────│ █ │ shell-06 │ ▀ │ shell-07 │ diff --git a/packages/cli/src/ui/constants.ts b/packages/cli/src/ui/constants.ts index 943f180134..19df95a621 100644 --- a/packages/cli/src/ui/constants.ts +++ b/packages/cli/src/ui/constants.ts @@ -62,3 +62,6 @@ export const DEFAULT_COMPRESSION_THRESHOLD = 0.5; /** Documentation URL for skills setup and configuration */ export const SKILLS_DOCS_URL = 'https://github.com/google-gemini/gemini-cli/blob/main/docs/cli/skills.md'; + +/** Max lines to show for a compact tool subview (e.g. diff) */ +export const COMPACT_TOOL_SUBVIEW_MAX_LINES = 15; diff --git a/packages/cli/src/ui/contexts/ToolActionsContext.test.tsx b/packages/cli/src/ui/contexts/ToolActionsContext.test.tsx index 642eec0cde..d93a7d56c2 100644 --- a/packages/cli/src/ui/contexts/ToolActionsContext.test.tsx +++ b/packages/cli/src/ui/contexts/ToolActionsContext.test.tsx @@ -4,7 +4,7 @@ * SPDX-License-Identifier: Apache-2.0 */ -import { act } from 'react'; +import { act, useState, useCallback } from 'react'; import { describe, it, expect, vi, beforeEach } from 'vitest'; import { renderHook } from '../../test-utils/render.js'; import { ToolActionsProvider, useToolActions } from './ToolActionsContext.js'; @@ -71,16 +71,61 @@ describe('ToolActionsContext', () => { beforeEach(() => { vi.clearAllMocks(); + // Default to a pending promise to avoid unwanted async state updates in tests + // that don't specifically test the IdeClient initialization. + vi.mocked(IdeClient.getInstance).mockReturnValue(new Promise(() => {})); }); - const wrapper = ({ children }: { children: React.ReactNode }) => ( - - {children} - - ); + const WrapperReactComp = ({ children }: { children: React.ReactNode }) => { + const [expandedTools, setExpandedTools] = useState>(new Set()); + + const isExpanded = useCallback( + (callId: string) => expandedTools.has(callId), + [expandedTools], + ); + + const toggleExpansion = useCallback((callId: string) => { + setExpandedTools((prev) => { + const next = new Set(prev); + if (next.has(callId)) { + next.delete(callId); + } else { + next.add(callId); + } + return next; + }); + }, []); + + const toggleAllExpansion = useCallback((callIds: string[]) => { + setExpandedTools((prev) => { + const next = new Set(prev); + const anyCollapsed = callIds.some((id) => !next.has(id)); + + if (anyCollapsed) { + callIds.forEach((id) => next.add(id)); + } else { + callIds.forEach((id) => next.delete(id)); + } + return next; + }); + }, []); + return ( + + {children} + + ); + }; it('publishes to MessageBus for tools with correlationId', async () => { - const { result } = await renderHook(() => useToolActions(), { wrapper }); + const { result } = await renderHook(() => useToolActions(), { + wrapper: WrapperReactComp, + }); await result.current.confirm( 'modern-call', @@ -98,7 +143,9 @@ describe('ToolActionsContext', () => { }); it('handles cancel by calling confirm with Cancel outcome', async () => { - const { result } = await renderHook(() => useToolActions(), { wrapper }); + const { result } = await renderHook(() => useToolActions(), { + wrapper: WrapperReactComp, + }); await result.current.cancel('modern-call'); @@ -127,7 +174,9 @@ describe('ToolActionsContext', () => { ); vi.mocked(mockConfig.getIdeMode).mockReturnValue(true); - const { result } = await renderHook(() => useToolActions(), { wrapper }); + const { result } = await renderHook(() => useToolActions(), { + wrapper: WrapperReactComp, + }); await act(async () => { deferredIdeClient.resolve(mockIdeClient); @@ -169,7 +218,9 @@ describe('ToolActionsContext', () => { ); vi.mocked(mockConfig.getIdeMode).mockReturnValue(true); - const { result } = await renderHook(() => useToolActions(), { wrapper }); + const { result } = await renderHook(() => useToolActions(), { + wrapper: WrapperReactComp, + }); await act(async () => { deferredIdeClient.resolve(mockIdeClient); @@ -214,7 +265,13 @@ describe('ToolActionsContext', () => { const { result } = await renderHook(() => useToolActions(), { wrapper: ({ children }) => ( - + {children} ), @@ -233,4 +290,58 @@ describe('ToolActionsContext', () => { ); expect(mockMessageBus.publish).not.toHaveBeenCalled(); }); + + describe('toggleAllExpansion', () => { + it('expands all when none are expanded', async () => { + const { result } = await renderHook(() => useToolActions(), { + wrapper: WrapperReactComp, + }); + + act(() => { + result.current.toggleAllExpansion(['modern-call', 'edit-call']); + }); + + expect(result.current.isExpanded('modern-call')).toBe(true); + expect(result.current.isExpanded('edit-call')).toBe(true); + }); + + it('expands all when some are expanded', async () => { + const { result } = await renderHook(() => useToolActions(), { + wrapper: WrapperReactComp, + }); + + act(() => { + result.current.toggleExpansion('modern-call'); + }); + expect(result.current.isExpanded('modern-call')).toBe(true); + expect(result.current.isExpanded('edit-call')).toBe(false); + + act(() => { + result.current.toggleAllExpansion(['modern-call', 'edit-call']); + }); + + expect(result.current.isExpanded('modern-call')).toBe(true); + expect(result.current.isExpanded('edit-call')).toBe(true); + }); + + it('collapses all when all are expanded', async () => { + const { result } = await renderHook(() => useToolActions(), { + wrapper: WrapperReactComp, + }); + + act(() => { + result.current.toggleExpansion('modern-call'); + result.current.toggleExpansion('edit-call'); + }); + expect(result.current.isExpanded('modern-call')).toBe(true); + expect(result.current.isExpanded('edit-call')).toBe(true); + + act(() => { + result.current.toggleAllExpansion(['modern-call', 'edit-call']); + }); + + expect(result.current.isExpanded('modern-call')).toBe(false); + expect(result.current.isExpanded('edit-call')).toBe(false); + }); + }); }); diff --git a/packages/cli/src/ui/contexts/ToolActionsContext.tsx b/packages/cli/src/ui/contexts/ToolActionsContext.tsx index 10e063e098..c6c8c2ebbe 100644 --- a/packages/cli/src/ui/contexts/ToolActionsContext.tsx +++ b/packages/cli/src/ui/contexts/ToolActionsContext.tsx @@ -48,11 +48,14 @@ interface ToolActionsContextValue { ) => Promise; cancel: (callId: string) => Promise; isDiffingEnabled: boolean; + isExpanded: (callId: string) => boolean; + toggleExpansion: (callId: string) => void; + toggleAllExpansion: (callIds: string[]) => void; } const ToolActionsContext = createContext(null); -export const useToolActions = () => { +export const useToolActions = (): ToolActionsContextValue => { const context = useContext(ToolActionsContext); if (!context) { throw new Error('useToolActions must be used within a ToolActionsProvider'); @@ -64,12 +67,22 @@ interface ToolActionsProviderProps { children: React.ReactNode; config: Config; toolCalls: IndividualToolCallDisplay[]; + isExpanded: (callId: string) => boolean; + toggleExpansion: (callId: string) => void; + toggleAllExpansion: (callIds: string[]) => void; } export const ToolActionsProvider: React.FC = ( props: ToolActionsProviderProps, ) => { - const { children, config, toolCalls } = props; + const { + children, + config, + toolCalls, + isExpanded, + toggleExpansion, + toggleAllExpansion, + } = props; // Hoist IdeClient logic here to keep UI pure const [ideClient, setIdeClient] = useState(null); @@ -77,24 +90,23 @@ export const ToolActionsProvider: React.FC = ( useEffect(() => { let isMounted = true; + let activeClient: IdeClient | null = null; + + const handleStatusChange = () => { + if (isMounted && activeClient) { + setIsDiffingEnabled(activeClient.isDiffingEnabled()); + } + }; + if (config.getIdeMode()) { IdeClient.getInstance() .then((client) => { if (!isMounted) return; + activeClient = client; setIdeClient(client); setIsDiffingEnabled(client.isDiffingEnabled()); - const handleStatusChange = () => { - if (isMounted) { - setIsDiffingEnabled(client.isDiffingEnabled()); - } - }; - client.addStatusChangeListener(handleStatusChange); - // Return a cleanup function for the listener - return () => { - client.removeStatusChangeListener(handleStatusChange); - }; }) .catch((error) => { debugLogger.error('Failed to get IdeClient instance:', error); @@ -102,6 +114,9 @@ export const ToolActionsProvider: React.FC = ( } return () => { isMounted = false; + if (activeClient) { + activeClient.removeStatusChangeListener(handleStatusChange); + } }; }, [config]); @@ -164,7 +179,16 @@ export const ToolActionsProvider: React.FC = ( ); return ( - + {children} ); diff --git a/packages/cli/src/ui/hooks/useGeminiStream.ts b/packages/cli/src/ui/hooks/useGeminiStream.ts index 5f5c1ab187..fb975a4429 100644 --- a/packages/cli/src/ui/hooks/useGeminiStream.ts +++ b/packages/cli/src/ui/hooks/useGeminiStream.ts @@ -84,6 +84,7 @@ import type { UseHistoryManagerReturn } from './useHistoryManager.js'; import { useLogger } from './useLogger.js'; import { SHELL_COMMAND_NAME } from '../constants.js'; import { mapToDisplay as mapTrackedToolCallsToDisplay } from './toolMapping.js'; +import { isCompactTool } from '../components/messages/ToolGroupMessage.js'; import { useToolScheduler, type TrackedToolCall, @@ -303,9 +304,32 @@ export const useGeminiStream = ( (tc) => !pushedToolCallIdsRef.current.has(tc.request.callId), ); if (toolsToPush.length > 0) { + const isCompactModeEnabled = + settings.merged.ui?.compactToolOutput === true; + const firstToolToPush = toolsToPush[0]; + const tcIndex = toolCalls.indexOf(firstToolToPush); + const prevTool = tcIndex > 0 ? toolCalls[tcIndex - 1] : null; + + let borderTop = isFirstToolInGroupRef.current; + if (!borderTop && prevTool) { + // If the first tool in this push is non-compact but follows a compact tool, + // we must start a new border group. + const currentIsCompact = isCompactTool( + mapTrackedToolCallsToDisplay(firstToolToPush).tools[0], + isCompactModeEnabled, + ); + const prevWasCompact = isCompactTool( + mapTrackedToolCallsToDisplay(prevTool).tools[0], + isCompactModeEnabled, + ); + if (!currentIsCompact && prevWasCompact) { + borderTop = true; + } + } + addItem( mapTrackedToolCallsToDisplay(toolsToPush as TrackedToolCall[], { - borderTop: isFirstToolInGroupRef.current, + borderTop, borderBottom: true, borderColor: theme.border.default, borderDimColor: false, @@ -340,9 +364,7 @@ export const useGeminiStream = ( } // Handle tool response submission immediately when tools complete - await handleCompletedTools( - completedToolCallsFromScheduler as TrackedToolCall[], - ); + await handleCompletedTools(completedToolCallsFromScheduler); } }, config, @@ -472,26 +494,85 @@ export const useGeminiStream = ( if (toolsToPush.length > 0) { const newPushed = new Set(pushedToolCallIdsRef.current); + const isFirstInThisPush = isFirstToolInGroupRef.current; + const isCompactModeEnabled = + settings.merged.ui?.compactToolOutput === true; + + const groups: TrackedToolCall[][] = []; + let currentGroup: TrackedToolCall[] = []; for (const tc of toolsToPush) { newPushed.add(tc.request.callId); + + if (tc.tool?.kind === Kind.Agent) { + currentGroup.push(tc); + } else { + if (currentGroup.length > 0) { + groups.push(currentGroup); + currentGroup = []; + } + groups.push([tc]); + } + } + if (currentGroup.length > 0) { + groups.push(currentGroup); } - const isLastInBatch = - toolsToPush[toolsToPush.length - 1] === toolCalls[toolCalls.length - 1]; + for (let i = 0; i < groups.length; i++) { + const group = groups[i]; + const isFirstInBatch = i === 0 && isFirstInThisPush; + const lastTcInGroup = group[group.length - 1]; + const tcIndexInBatch = toolCalls.indexOf(lastTcInGroup); + const isLastInBatch = tcIndexInBatch === toolCalls.length - 1; - const historyItem = mapTrackedToolCallsToDisplay(toolsToPush, { - borderTop: isFirstToolInGroupRef.current, - borderBottom: isLastInBatch, - ...getToolGroupBorderAppearance( - { type: 'tool_group', tools: toolCalls }, - activeShellPtyId, - !!isShellFocused, - [], - backgroundTasks, - ), - }); - addItem(historyItem); + const nextTcInBatch = + tcIndexInBatch < toolCalls.length - 1 + ? toolCalls[tcIndexInBatch + 1] + : null; + const prevTcInBatch = + toolCalls.indexOf(group[0]) > 0 + ? toolCalls[toolCalls.indexOf(group[0]) - 1] + : null; + + const historyItem = mapTrackedToolCallsToDisplay(group, { + ...getToolGroupBorderAppearance( + { type: 'tool_group', tools: toolCalls }, + activeShellPtyId, + !!isShellFocused, + [], + backgroundTasks, + ), + }); + + // Determine if this group starts with a compact tool + const currentIsCompact = + historyItem.tools.length === 1 && + isCompactTool(historyItem.tools[0], isCompactModeEnabled); + + let nextIsCompact = false; + if (nextTcInBatch) { + const nextHistoryItem = mapTrackedToolCallsToDisplay(nextTcInBatch); + nextIsCompact = + nextHistoryItem.tools.length === 1 && + isCompactTool(nextHistoryItem.tools[0], isCompactModeEnabled); + } + + let prevWasCompact = false; + if (prevTcInBatch) { + const prevHistoryItem = mapTrackedToolCallsToDisplay(prevTcInBatch); + prevWasCompact = + prevHistoryItem.tools.length === 1 && + isCompactTool(prevHistoryItem.tools[0], isCompactModeEnabled); + } + + historyItem.borderTop = + isFirstInBatch || (!currentIsCompact && prevWasCompact); + historyItem.borderBottom = currentIsCompact + ? isLastInBatch && !nextIsCompact + : isLastInBatch || nextIsCompact; + + addItem(historyItem); + } setPushedToolCallIds(newPushed); @@ -516,6 +597,7 @@ export const useGeminiStream = ( activeShellPtyId, isShellFocused, backgroundTasks, + settings.merged.ui?.compactToolOutput, ]); const pendingToolGroupItems = useMemo((): HistoryItemWithoutId[] => { const remainingTools = toolCalls.filter( @@ -569,8 +651,7 @@ export const useGeminiStream = ( toolCalls.length > 0 && toolCalls.every((tc) => pushedToolCallIds.has(tc.request.callId)); - const anyVisibleInHistory = pushedToolCallIds.size > 0; - const anyVisibleInPending = remainingTools.some((tc) => { + const isToolVisible = (tc: TrackedToolCall) => { const displayName = tc.tool?.displayName ?? tc.request.name; let hasResultDisplay = false; @@ -607,12 +688,25 @@ export const useGeminiStream = ( // ToolGroupMessage now shows all non-canceled tools, so they are visible // in pending and we need to draw the closing border for them. return true; - }); + }; + + let lastVisibleIsCompact = false; + const isCompactModeEnabled = settings.merged.ui?.compactToolOutput === true; + for (let i = toolCalls.length - 1; i >= 0; i--) { + if (isToolVisible(toolCalls[i])) { + const mapped = mapTrackedToolCallsToDisplay(toolCalls[i]); + lastVisibleIsCompact = mapped.tools[0] + ? isCompactTool(mapped.tools[0], isCompactModeEnabled) + : false; + break; + } + } if ( toolCalls.length > 0 && !(allTerminal && allPushed) && - (anyVisibleInHistory || anyVisibleInPending) + toolCalls.some(isToolVisible) && + !lastVisibleIsCompact ) { items.push({ type: 'tool_group' as const, @@ -630,6 +724,7 @@ export const useGeminiStream = ( activeShellPtyId, isShellFocused, backgroundTasks, + settings.merged.ui?.compactToolOutput, ]); const lastQueryRef = useRef(null); diff --git a/packages/cli/src/ui/types.ts b/packages/cli/src/ui/types.ts index 18ed1f525c..444bc57394 100644 --- a/packages/cli/src/ui/types.ts +++ b/packages/cli/src/ui/types.ts @@ -370,6 +370,7 @@ export type HistoryItemMcpStatus = HistoryItemBase & { showSchema: boolean; }; +// Individually exported types extending HistoryItemBase export type HistoryItemWithoutId = | HistoryItemUser | HistoryItemUserShell diff --git a/packages/cli/src/ui/utils/__snapshots__/borderStyles.test.tsx.snap b/packages/cli/src/ui/utils/__snapshots__/borderStyles.test.tsx.snap index 84baf2edb8..19ca84853a 100644 --- a/packages/cli/src/ui/utils/__snapshots__/borderStyles.test.tsx.snap +++ b/packages/cli/src/ui/utils/__snapshots__/borderStyles.test.tsx.snap @@ -19,7 +19,8 @@ Tips for getting started: │ ⊶ google_web_search │ │ │ │ Searching... │ -╰──────────────────────────────────────────────────────────────────────────────────────────────╯" +╰──────────────────────────────────────────────────────────────────────────────────────────────╯ +" `; exports[`MainContent tool group border SVG snapshots > should render SVG snapshot for a shell tool 1`] = ` @@ -41,7 +42,8 @@ Tips for getting started: │ ⊶ run_shell_command │ │ │ │ Running command... │ -╰──────────────────────────────────────────────────────────────────────────────────────────────╯" +╰──────────────────────────────────────────────────────────────────────────────────────────────╯ +" `; exports[`MainContent tool group border SVG snapshots > should render SVG snapshot for an empty slice following a search tool 1`] = ` @@ -63,5 +65,6 @@ Tips for getting started: │ ⊶ google_web_search │ │ │ │ Searching... │ -╰──────────────────────────────────────────────────────────────────────────────────────────────╯" +╰──────────────────────────────────────────────────────────────────────────────────────────────╯ +" `; diff --git a/packages/cli/src/ui/utils/fileUtils.ts b/packages/cli/src/ui/utils/fileUtils.ts new file mode 100644 index 0000000000..a1f3472aa4 --- /dev/null +++ b/packages/cli/src/ui/utils/fileUtils.ts @@ -0,0 +1,19 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import * as path from 'node:path'; + +/** + * Gets the file extension from a filename or path, excluding the leading dot. + * Returns null if no extension is found. + */ +export function getFileExtension( + filename: string | null | undefined, +): string | null { + if (!filename) return null; + const ext = path.extname(filename); + return ext ? ext.slice(1) : null; +} diff --git a/packages/cli/src/ui/utils/toolLayoutUtils.ts b/packages/cli/src/ui/utils/toolLayoutUtils.ts index 1f140b9bc9..e45be2c840 100644 --- a/packages/cli/src/ui/utils/toolLayoutUtils.ts +++ b/packages/cli/src/ui/utils/toolLayoutUtils.ts @@ -17,7 +17,7 @@ import { CoreToolCallStatus } from '@google/gemini-cli-core'; */ export const TOOL_RESULT_STATIC_HEIGHT = 1; export const TOOL_RESULT_ASB_RESERVED_LINE_COUNT = 6; -export const TOOL_RESULT_STANDARD_RESERVED_LINE_COUNT = 3; +export const TOOL_RESULT_STANDARD_RESERVED_LINE_COUNT = 4; export const TOOL_RESULT_MIN_LINES_SHOWN = 2; /** diff --git a/packages/core/src/tools/grep-utils.ts b/packages/core/src/tools/grep-utils.ts index 2191588301..c2cd82dcae 100644 --- a/packages/core/src/tools/grep-utils.ts +++ b/packages/core/src/tools/grep-utils.ts @@ -7,6 +7,7 @@ import fsPromises from 'node:fs/promises'; import { debugLogger } from '../utils/debugLogger.js'; import { MAX_LINE_LENGTH_TEXT_FILE } from '../utils/constants.js'; +import type { GrepResult } from './tools.js'; /** * Result object for a single grep match @@ -148,12 +149,18 @@ export async function formatGrepResults( }, searchLocationDescription: string, totalMaxMatches: number, -): Promise<{ llmContent: string; returnDisplay: string }> { +): Promise<{ llmContent: string; returnDisplay: GrepResult }> { const { pattern, names_only, include_pattern } = params; if (allMatches.length === 0) { const noMatchMsg = `No matches found for pattern "${pattern}" ${searchLocationDescription}${include_pattern ? ` (filter: "${include_pattern}")` : ''}.`; - return { llmContent: noMatchMsg, returnDisplay: `No matches found` }; + return { + llmContent: noMatchMsg, + returnDisplay: { + summary: 'No matches found', + matches: [], + }, + }; } const matchesByFile = groupMatchesByFile(allMatches); @@ -181,7 +188,10 @@ export async function formatGrepResults( llmContent += filePaths.join('\n'); return { llmContent: llmContent.trim(), - returnDisplay: `Found ${filePaths.length} files${wasTruncated ? ' (limited)' : ''}`, + returnDisplay: { + summary: `Found ${filePaths.length} files${wasTruncated ? ' (limited)' : ''}`, + matches: [], + }, }; } @@ -213,8 +223,16 @@ export async function formatGrepResults( return { llmContent: llmContent.trim(), - returnDisplay: `Found ${matchCount} ${matchTerm}${ - wasTruncated ? ' (limited)' : '' - }`, + returnDisplay: { + summary: `Found ${matchCount} ${matchTerm}${wasTruncated ? ' (limited)' : ''}`, + matches: allMatches + .filter((m) => !m.isContext) + .map((m) => ({ + filePath: m.filePath, + absolutePath: m.absolutePath, + lineNumber: m.lineNumber, + line: m.line, + })), + }, }; } diff --git a/packages/core/src/tools/grep.test.ts b/packages/core/src/tools/grep.test.ts index 7bfc59435f..9eced68ca1 100644 --- a/packages/core/src/tools/grep.test.ts +++ b/packages/core/src/tools/grep.test.ts @@ -6,7 +6,7 @@ import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest'; import { GrepTool, type GrepToolParams } from './grep.js'; -import type { ToolResult } from './tools.js'; +import type { ToolResult, GrepResult } from './tools.js'; import path from 'node:path'; import { isSubpath } from '../utils/paths.js'; import fs from 'node:fs/promises'; @@ -187,7 +187,9 @@ describe('GrepTool', () => { `File: ${path.join('sub', 'fileC.txt')}`, ); expect(result.llmContent).toContain('L1: another world in sub dir'); - expect(result.returnDisplay).toBe('Found 3 matches'); + expect((result.returnDisplay as GrepResult)?.summary).toBe( + 'Found 3 matches', + ); }, 30000); it('should include files that start with ".." in JS fallback', async () => { @@ -228,7 +230,9 @@ describe('GrepTool', () => { ); expect(result.llmContent).toContain('File: fileC.txt'); // Path relative to 'sub' expect(result.llmContent).toContain('L1: another world in sub dir'); - expect(result.returnDisplay).toBe('Found 1 match'); + expect((result.returnDisplay as GrepResult)?.summary).toBe( + 'Found 1 match', + ); }, 30000); it('should find matches with an include glob', async () => { @@ -245,7 +249,9 @@ describe('GrepTool', () => { expect(result.llmContent).toContain( 'L2: function baz() { return "hello"; }', ); - expect(result.returnDisplay).toBe('Found 1 match'); + expect((result.returnDisplay as GrepResult)?.summary).toBe( + 'Found 1 match', + ); }, 30000); it('should find matches with an include glob and path', async () => { @@ -265,7 +271,9 @@ describe('GrepTool', () => { ); expect(result.llmContent).toContain('File: another.js'); expect(result.llmContent).toContain('L1: const greeting = "hello";'); - expect(result.returnDisplay).toBe('Found 1 match'); + expect((result.returnDisplay as GrepResult)?.summary).toBe( + 'Found 1 match', + ); }, 30000); it('should return "No matches found" when pattern does not exist', async () => { @@ -275,7 +283,9 @@ describe('GrepTool', () => { expect(result.llmContent).toContain( 'No matches found for pattern "nonexistentpattern" in the workspace directory.', ); - expect(result.returnDisplay).toBe('No matches found'); + expect((result.returnDisplay as GrepResult)?.summary).toBe( + 'No matches found', + ); }, 30000); it('should handle regex special characters correctly', async () => { @@ -501,7 +511,9 @@ describe('GrepTool', () => { expect(result.llmContent).toContain('L2: second line with world'); // And sub/fileC.txt should be excluded because limit reached expect(result.llmContent).not.toContain('File: sub/fileC.txt'); - expect(result.returnDisplay).toBe('Found 2 matches (limited)'); + expect((result.returnDisplay as GrepResult)?.summary).toBe( + 'Found 2 matches (limited)', + ); }); it('should respect max_matches_per_file in JS fallback', async () => { diff --git a/packages/core/src/tools/grep.ts b/packages/core/src/tools/grep.ts index e913c4b184..ac7dc6cf02 100644 --- a/packages/core/src/tools/grep.ts +++ b/packages/core/src/tools/grep.ts @@ -30,7 +30,7 @@ import { isGitRepository } from '../utils/gitUtils.js'; import type { Config } from '../config/config.js'; import type { FileExclusions } from '../utils/ignorePatterns.js'; import { ToolErrorType } from './tool-error.js'; -import { GREP_TOOL_NAME } from './tool-names.js'; +import { GREP_TOOL_NAME, GREP_DISPLAY_NAME } from './tool-names.js'; import { buildPatternArgsPattern } from '../policy/utils.js'; import { debugLogger } from '../utils/debugLogger.js'; import { GREP_DEFINITION } from './definitions/coreTools.js'; @@ -653,7 +653,7 @@ export class GrepTool extends BaseDeclarativeTool { ) { super( GrepTool.Name, - 'SearchText', + GREP_DISPLAY_NAME, GREP_DEFINITION.base.description!, Kind.Search, GREP_DEFINITION.base.parametersJsonSchema, diff --git a/packages/core/src/tools/ls.test.ts b/packages/core/src/tools/ls.test.ts index 5d728ad8a8..372de8e8a6 100644 --- a/packages/core/src/tools/ls.test.ts +++ b/packages/core/src/tools/ls.test.ts @@ -131,7 +131,10 @@ describe('LSTool', () => { expect(result.llmContent).toContain('[DIR] subdir'); expect(result.llmContent).toContain('file1.txt'); - expect(result.returnDisplay).toBe('Listed 2 item(s).'); + expect(result.returnDisplay).toEqual({ + summary: 'Listed 2 item(s).', + files: ['[DIR] subdir', 'file1.txt'], + }); }); it('should list files from secondary workspace directory', async () => { @@ -146,7 +149,10 @@ describe('LSTool', () => { const result = await invocation.execute(abortSignal); expect(result.llmContent).toContain('secondary-file.txt'); - expect(result.returnDisplay).toBe('Listed 1 item(s).'); + expect(result.returnDisplay).toEqual({ + summary: 'Listed 1 item(s).', + files: expect.any(Array), + }); }); it('should handle empty directories', async () => { @@ -171,7 +177,10 @@ describe('LSTool', () => { expect(result.llmContent).toContain('file1.txt'); expect(result.llmContent).not.toContain('file2.log'); - expect(result.returnDisplay).toBe('Listed 1 item(s).'); + expect(result.returnDisplay).toEqual({ + summary: 'Listed 1 item(s).', + files: expect.any(Array), + }); }); it('should respect gitignore patterns', async () => { @@ -185,7 +194,9 @@ describe('LSTool', () => { expect(result.llmContent).toContain('file1.txt'); expect(result.llmContent).not.toContain('file2.log'); // .git is always ignored by default. - expect(result.returnDisplay).toBe('Listed 2 item(s). (2 ignored)'); + expect(result.returnDisplay).toEqual( + expect.objectContaining({ summary: 'Listed 2 item(s). (2 ignored)' }), + ); }); it('should respect geminiignore patterns', async () => { @@ -200,7 +211,9 @@ describe('LSTool', () => { expect(result.llmContent).toContain('file1.txt'); expect(result.llmContent).not.toContain('file2.log'); - expect(result.returnDisplay).toBe('Listed 2 item(s). (1 ignored)'); + expect(result.returnDisplay).toEqual( + expect.objectContaining({ summary: 'Listed 2 item(s). (1 ignored)' }), + ); }); it('should handle non-directory paths', async () => { @@ -287,7 +300,10 @@ describe('LSTool', () => { // Should still list the other files expect(result.llmContent).toContain('file1.txt'); expect(result.llmContent).not.toContain('problematic.txt'); - expect(result.returnDisplay).toBe('Listed 1 item(s).'); + expect(result.returnDisplay).toEqual({ + summary: 'Listed 1 item(s).', + files: expect.any(Array), + }); statSpy.mockRestore(); }); @@ -347,7 +363,10 @@ describe('LSTool', () => { const result = await invocation.execute(abortSignal); expect(result.llmContent).toContain('secondary-file.txt'); - expect(result.returnDisplay).toBe('Listed 1 item(s).'); + expect(result.returnDisplay).toEqual({ + summary: 'Listed 1 item(s).', + files: expect.any(Array), + }); }); }); diff --git a/packages/core/src/tools/ls.ts b/packages/core/src/tools/ls.ts index 1972392508..b8e2e6a803 100644 --- a/packages/core/src/tools/ls.ts +++ b/packages/core/src/tools/ls.ts @@ -20,7 +20,7 @@ import { makeRelative, shortenPath } from '../utils/paths.js'; import type { Config } from '../config/config.js'; import { DEFAULT_FILE_FILTERING_OPTIONS } from '../config/constants.js'; import { ToolErrorType } from './tool-error.js'; -import { LS_TOOL_NAME } from './tool-names.js'; +import { LS_TOOL_NAME, LS_DISPLAY_NAME } from './tool-names.js'; import { buildDirPathArgsPattern } from '../policy/utils.js'; import { debugLogger } from '../utils/debugLogger.js'; import { LS_DEFINITION } from './definitions/coreTools.js'; @@ -143,7 +143,6 @@ class LSToolInvocation extends BaseToolInvocation { ): ToolResult { return { llmContent, - // Keep returnDisplay simpler in core logic returnDisplay: `Error: ${returnDisplay}`, error: { message: llmContent, @@ -284,7 +283,12 @@ class LSToolInvocation extends BaseToolInvocation { return { llmContent: resultMessage, - returnDisplay: displayMessage, + returnDisplay: { + summary: displayMessage, + files: entries.map( + (entry) => `${entry.isDirectory ? '[DIR] ' : ''}${entry.name}`, + ), + }, }; } catch (error) { const errorMsg = `Error listing directory: ${error instanceof Error ? error.message : String(error)}`; @@ -309,7 +313,7 @@ export class LSTool extends BaseDeclarativeTool { ) { super( LSTool.Name, - 'ReadFolder', + LS_DISPLAY_NAME, LS_DEFINITION.base.description!, Kind.Search, LS_DEFINITION.base.parametersJsonSchema, diff --git a/packages/core/src/tools/read-many-files.test.ts b/packages/core/src/tools/read-many-files.test.ts index 6a526d2b62..dd9d146c97 100644 --- a/packages/core/src/tools/read-many-files.test.ts +++ b/packages/core/src/tools/read-many-files.test.ts @@ -31,6 +31,7 @@ import { import * as glob from 'glob'; import { createMockMessageBus } from '../test-utils/mock-message-bus.js'; import { GEMINI_IGNORE_FILE_NAME } from '../config/constants.js'; +import type { ReadManyFilesResult } from './tools.js'; vi.mock('glob', { spy: true }); @@ -277,7 +278,7 @@ describe('ReadManyFilesTool', () => { `--- ${expectedPath} ---\n\nContent of file1\n\n`, `\n--- End of content ---`, ]); - expect(result.returnDisplay).toContain( + expect((result.returnDisplay as ReadManyFilesResult).summary).toContain( 'Successfully read and concatenated content from **1 file(s)**', ); }); @@ -301,7 +302,7 @@ describe('ReadManyFilesTool', () => { c.includes(`--- ${expectedPath2} ---\n\nContent2\n\n`), ), ).toBe(true); - expect(result.returnDisplay).toContain( + expect((result.returnDisplay as ReadManyFilesResult).summary).toContain( 'Successfully read and concatenated content from **2 file(s)**', ); }); @@ -327,7 +328,7 @@ describe('ReadManyFilesTool', () => { ), ).toBe(true); expect(content.find((c) => c.includes('sub/data.json'))).toBeUndefined(); - expect(result.returnDisplay).toContain( + expect((result.returnDisplay as ReadManyFilesResult).summary).toContain( 'Successfully read and concatenated content from **2 file(s)**', ); }); @@ -347,7 +348,7 @@ describe('ReadManyFilesTool', () => { expect( content.find((c) => c.includes('src/main.test.ts')), ).toBeUndefined(); - expect(result.returnDisplay).toContain( + expect((result.returnDisplay as ReadManyFilesResult).summary).toContain( 'Successfully read and concatenated content from **1 file(s)**', ); }); @@ -359,7 +360,7 @@ describe('ReadManyFilesTool', () => { expect(result.llmContent).toEqual([ 'No files matching the criteria were found or all were skipped.', ]); - expect(result.returnDisplay).toContain( + expect((result.returnDisplay as ReadManyFilesResult).summary).toContain( 'No files were read and concatenated based on the criteria.', ); }); @@ -379,7 +380,7 @@ describe('ReadManyFilesTool', () => { expect( content.find((c) => c.includes('node_modules/some-lib/index.js')), ).toBeUndefined(); - expect(result.returnDisplay).toContain( + expect((result.returnDisplay as ReadManyFilesResult).summary).toContain( 'Successfully read and concatenated content from **1 file(s)**', ); }); @@ -406,7 +407,7 @@ describe('ReadManyFilesTool', () => { c.includes(`--- ${expectedPath2} ---\n\napp code\n\n`), ), ).toBe(true); - expect(result.returnDisplay).toContain( + expect((result.returnDisplay as ReadManyFilesResult).summary).toContain( 'Successfully read and concatenated content from **2 file(s)**', ); }); @@ -430,7 +431,7 @@ describe('ReadManyFilesTool', () => { }, '\n--- End of content ---', ]); - expect(result.returnDisplay).toContain( + expect((result.returnDisplay as ReadManyFilesResult).summary).toContain( 'Successfully read and concatenated content from **1 file(s)**', ); }); @@ -471,8 +472,10 @@ describe('ReadManyFilesTool', () => { c.includes(`--- ${expectedPath} ---\n\ntext notes\n\n`), ), ).toBe(true); - expect(result.returnDisplay).toContain('**Skipped 1 item(s):**'); - expect(result.returnDisplay).toContain( + expect((result.returnDisplay as ReadManyFilesResult).summary).toContain( + '**Skipped 1 item(s):**', + ); + expect((result.returnDisplay as ReadManyFilesResult).summary).toContain( '- `document.pdf` (Reason: asset file (image/pdf/audio) was not explicitly requested by name or extension)', ); }); @@ -516,9 +519,15 @@ describe('ReadManyFilesTool', () => { const params = { include: ['foo.bar', 'bar.ts', 'foo.quux'] }; const invocation = tool.build(params); const result = await invocation.execute(new AbortController().signal); - expect(result.returnDisplay).not.toContain('foo.bar'); - expect(result.returnDisplay).not.toContain('foo.quux'); - expect(result.returnDisplay).toContain('bar.ts'); + expect((result.returnDisplay as ReadManyFilesResult).files).not.toContain( + 'foo.bar', + ); + expect((result.returnDisplay as ReadManyFilesResult).files).not.toContain( + 'foo.quux', + ); + expect((result.returnDisplay as ReadManyFilesResult).files).toContain( + 'bar.ts', + ); }); it('should read files from multiple workspace directories', async () => { @@ -594,7 +603,7 @@ describe('ReadManyFilesTool', () => { c.includes(`--- ${expectedPath2} ---\n\nContent2\n\n`), ), ).toBe(true); - expect(result.returnDisplay).toContain( + expect((result.returnDisplay as ReadManyFilesResult).summary).toContain( 'Successfully read and concatenated content from **2 file(s)**', ); @@ -646,7 +655,7 @@ Content of receive-detail `, `\n--- End of content ---`, ]); - expect(result.returnDisplay).toContain( + expect((result.returnDisplay as ReadManyFilesResult).summary).toContain( 'Successfully read and concatenated content from **1 file(s)**', ); }); @@ -665,7 +674,7 @@ Content of file[1] `, `\n--- End of content ---`, ]); - expect(result.returnDisplay).toContain( + expect((result.returnDisplay as ReadManyFilesResult).summary).toContain( 'Successfully read and concatenated content from **1 file(s)**', ); }); @@ -764,7 +773,9 @@ Content of file[1] // Should successfully process valid files despite one failure expect(content.length).toBeGreaterThanOrEqual(3); - expect(result.returnDisplay).toContain('Successfully read'); + expect((result.returnDisplay as ReadManyFilesResult).summary).toContain( + 'Successfully read', + ); // Verify valid files were processed const expectedPath1 = path.join(tempRootDir, 'valid1.txt'); diff --git a/packages/core/src/tools/read-many-files.ts b/packages/core/src/tools/read-many-files.ts index e2a283c726..c92b608791 100644 --- a/packages/core/src/tools/read-many-files.ts +++ b/packages/core/src/tools/read-many-files.ts @@ -13,6 +13,7 @@ import { type ToolResult, type PolicyUpdateOptions, type ToolConfirmationOutcome, + type ReadManyFilesResult, } from './tools.js'; import { getErrorMessage } from '../utils/errors.js'; import * as fsPromises from 'node:fs/promises'; @@ -36,7 +37,10 @@ import { getProgrammingLanguage } from '../telemetry/telemetry-utils.js'; import { logFileOperation } from '../telemetry/loggers.js'; import { FileOperationEvent } from '../telemetry/types.js'; import { ToolErrorType } from './tool-error.js'; -import { READ_MANY_FILES_TOOL_NAME } from './tool-names.js'; +import { + READ_MANY_FILES_TOOL_NAME, + READ_MANY_FILES_DISPLAY_NAME, +} from './tool-names.js'; import { READ_MANY_FILES_DEFINITION } from './definitions/coreTools.js'; import { resolveToolDeclaration } from './definitions/resolver.js'; @@ -269,7 +273,7 @@ ${finalExclusionPatternsForDescription const errorMessage = `Error during file search: ${getErrorMessage(error)}`; return { llmContent: errorMessage, - returnDisplay: `## File Search Error\n\nAn error occurred while searching for files:\n\`\`\`\n${getErrorMessage(error)}\n\`\`\``, + returnDisplay: `Error: ${getErrorMessage(error)}`, error: { message: errorMessage, type: ToolErrorType.READ_MANY_FILES_SEARCH_ERROR, @@ -483,9 +487,19 @@ ${finalExclusionPatternsForDescription 'No files matching the criteria were found or all were skipped.', ); } + + const returnDisplay: ReadManyFilesResult = { + summary: displayMessage.trim(), + files: processedFilesRelativePaths, + skipped: skippedFiles, + include: this.params.include, + excludes: effectiveExcludes, + targetDir: this.config.getTargetDir(), + }; + return { llmContent: contentParts, - returnDisplay: displayMessage.trim(), + returnDisplay, }; } } @@ -507,7 +521,7 @@ export class ReadManyFilesTool extends BaseDeclarativeTool< ) { super( ReadManyFilesTool.Name, - 'ReadManyFiles', + READ_MANY_FILES_DISPLAY_NAME, READ_MANY_FILES_DEFINITION.base.description!, Kind.Read, READ_MANY_FILES_DEFINITION.base.parametersJsonSchema, diff --git a/packages/core/src/tools/ripGrep.test.ts b/packages/core/src/tools/ripGrep.test.ts index a1b155fb7a..4481bf3e54 100644 --- a/packages/core/src/tools/ripGrep.test.ts +++ b/packages/core/src/tools/ripGrep.test.ts @@ -19,6 +19,7 @@ import { ensureRgPath, type RipGrepToolParams, } from './ripGrep.js'; +import type { GrepResult } from './tools.js'; import path from 'node:path'; import { isSubpath } from '../utils/paths.js'; import fs from 'node:fs/promises'; @@ -447,7 +448,9 @@ describe('RipGrepTool', () => { `File: ${path.join('sub', 'fileC.txt')}`, ); expect(result.llmContent).toContain('L1: another world in sub dir'); - expect(result.returnDisplay).toBe('Found 3 matches'); + expect((result.returnDisplay as GrepResult).summary).toBe( + 'Found 3 matches', + ); }); it('should ignore matches that escape the base path', async () => { @@ -509,7 +512,9 @@ describe('RipGrepTool', () => { ); expect(result.llmContent).toContain('File: fileC.txt'); // Path relative to 'sub' expect(result.llmContent).toContain('L1: another world in sub dir'); - expect(result.returnDisplay).toBe('Found 1 match'); + expect((result.returnDisplay as GrepResult).summary).toBe( + 'Found 1 match', + ); }); it('should find matches with an include glob', async () => { @@ -542,7 +547,9 @@ describe('RipGrepTool', () => { expect(result.llmContent).toContain( 'L2: function baz() { return "hello"; }', ); - expect(result.returnDisplay).toBe('Found 1 match'); + expect((result.returnDisplay as GrepResult).summary).toBe( + 'Found 1 match', + ); }); it('should find matches with an include glob and path', async () => { @@ -579,7 +586,9 @@ describe('RipGrepTool', () => { ); expect(result.llmContent).toContain('File: another.js'); expect(result.llmContent).toContain('L1: const greeting = "hello";'); - expect(result.returnDisplay).toBe('Found 1 match'); + expect((result.returnDisplay as GrepResult).summary).toBe( + 'Found 1 match', + ); }); it('should return "No matches found" when pattern does not exist', async () => { @@ -596,7 +605,9 @@ describe('RipGrepTool', () => { expect(result.llmContent).toContain( 'No matches found for pattern "nonexistentpattern" in path ".".', ); - expect(result.returnDisplay).toBe('No matches found'); + expect((result.returnDisplay as GrepResult).summary).toBe( + 'No matches found', + ); }); it('should throw error for invalid regex pattern during build', async () => { @@ -689,7 +700,9 @@ describe('RipGrepTool', () => { }); const result = await invocation.execute(abortSignal); - expect(result.returnDisplay).toContain('(limited)'); + expect((result.returnDisplay as GrepResult).summary).toContain( + '(limited)', + ); }, 10000); it('should filter out files based on FileDiscoveryService even if ripgrep returns them', async () => { @@ -740,7 +753,9 @@ describe('RipGrepTool', () => { expect(result.llmContent).toContain('should be kept'); expect(result.llmContent).not.toContain('ignored.txt'); expect(result.llmContent).not.toContain('should be ignored'); - expect(result.returnDisplay).toContain('Found 1 match'); + expect((result.returnDisplay as GrepResult).summary).toContain( + 'Found 1 match', + ); }); it('should handle regex special characters correctly', async () => { @@ -1064,7 +1079,9 @@ describe('RipGrepTool', () => { controller.abort(); const result = await invocation.execute(controller.signal); - expect(result.returnDisplay).toContain('No matches found'); + expect((result.returnDisplay as GrepResult).summary).toContain( + 'No matches found', + ); }); }); @@ -1946,7 +1963,9 @@ describe('RipGrepTool', () => { expect(result.llmContent).toContain('L1: match 1'); expect(result.llmContent).toContain('L2: match 2'); expect(result.llmContent).not.toContain('L3: match 3'); - expect(result.returnDisplay).toBe('Found 2 matches (limited)'); + expect((result.returnDisplay as GrepResult).summary).toBe( + 'Found 2 matches (limited)', + ); }); it('should return only file paths when names_only is true', async () => { diff --git a/packages/core/src/tools/tools.ts b/packages/core/src/tools/tools.ts index 23e88b608b..e89ef1b9e6 100644 --- a/packages/core/src/tools/tools.ts +++ b/packages/core/src/tools/tools.ts @@ -921,12 +921,18 @@ export const isListResult = ( ): res is ListDirectoryResult | ReadManyFilesResult => isStructuredToolResult(res) && 'files' in res && Array.isArray(res.files); +export const isReadManyFilesResult = ( + res: unknown, +): res is ReadManyFilesResult => isListResult(res) && 'include' in res; export type ToolResultDisplay = | string | FileDiff | AnsiOutput | TodoList - | SubagentProgress; + | SubagentProgress + | GrepResult + | ListDirectoryResult + | ReadManyFilesResult; export type TodoStatus = | 'pending' diff --git a/schemas/settings.schema.json b/schemas/settings.schema.json index c5db73b1f2..9e205f67c3 100644 --- a/schemas/settings.schema.json +++ b/schemas/settings.schema.json @@ -317,6 +317,13 @@ "default": true, "type": "boolean" }, + "compactToolOutput": { + "title": "Compact Tool Output", + "description": "Display tool outputs (like directory listings and file reads) in a compact, structured format.", + "markdownDescription": "Display tool outputs (like directory listings and file reads) in a compact, structured format.\n\n- Category: `UI`\n- Requires restart: `no`\n- Default: `false`", + "default": false, + "type": "boolean" + }, "hideBanner": { "title": "Hide Banner", "description": "Hide the application banner", From bea57a2f3d77726473b463e78773aad901b857e2 Mon Sep 17 00:00:00 2001 From: Gal Zahavi <38544478+galz10@users.noreply.github.com> Date: Mon, 30 Mar 2026 16:49:53 -0700 Subject: [PATCH 009/146] Add security settings for tool sandboxing (#23923) --- .gemini/settings.json | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.gemini/settings.json b/.gemini/settings.json index 18f81884d2..6802ccaae3 100644 --- a/.gemini/settings.json +++ b/.gemini/settings.json @@ -8,5 +8,8 @@ }, "general": { "devtools": true + }, + "security": { + "toolSandboxing": true } } From 35efdfc409131cefa7e9fddeb47d9953ff49a8c6 Mon Sep 17 00:00:00 2001 From: Sehoon Shon Date: Mon, 30 Mar 2026 20:31:04 -0400 Subject: [PATCH 010/146] chore(test-utils): switch integration tests to use PREVIEW_GEMINI_MODEL (#24276) Co-authored-by: Tommaso Sciortino --- integration-tests/context-compress-interactive.test.ts | 2 +- integration-tests/plan-mode.test.ts | 2 +- packages/test-utils/src/test-rig.ts | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/integration-tests/context-compress-interactive.test.ts b/integration-tests/context-compress-interactive.test.ts index c7e04c6c23..348d984c31 100644 --- a/integration-tests/context-compress-interactive.test.ts +++ b/integration-tests/context-compress-interactive.test.ts @@ -19,7 +19,7 @@ describe('Interactive Mode', () => { await rig.cleanup(); }); - it('should trigger chat compression with /compress command', async () => { + it.skip('should trigger chat compression with /compress command', async () => { await rig.setup('interactive-compress-success', { fakeResponsesPath: join( import.meta.dirname, diff --git a/integration-tests/plan-mode.test.ts b/integration-tests/plan-mode.test.ts index d8d297c460..5a1c91e4e1 100644 --- a/integration-tests/plan-mode.test.ts +++ b/integration-tests/plan-mode.test.ts @@ -113,7 +113,7 @@ describe('Plan Mode', () => { ).toBe(true); }); - it('should deny write_file to non-plans directory in plan mode', async () => { + it.skip('should deny write_file to non-plans directory in plan mode', async () => { const plansDir = '.gemini/tmp/foo/123/plans'; const testName = 'should deny write_file to non-plans directory in plan mode'; diff --git a/packages/test-utils/src/test-rig.ts b/packages/test-utils/src/test-rig.ts index ae2e9cc0ef..734c1b9546 100644 --- a/packages/test-utils/src/test-rig.ts +++ b/packages/test-utils/src/test-rig.ts @@ -11,7 +11,7 @@ import { join, dirname } from 'node:path'; import { fileURLToPath } from 'node:url'; import { env } from 'node:process'; import { setTimeout as sleep } from 'node:timers/promises'; -import { DEFAULT_GEMINI_MODEL, GEMINI_DIR } from '@google/gemini-cli-core'; +import { PREVIEW_GEMINI_MODEL, GEMINI_DIR } from '@google/gemini-cli-core'; export { GEMINI_DIR }; import * as pty from '@lydell/node-pty'; import stripAnsi from 'strip-ansi'; @@ -457,7 +457,7 @@ export class TestRig { ...(env['GEMINI_TEST_TYPE'] === 'integration' ? { model: { - name: DEFAULT_GEMINI_MODEL, + name: PREVIEW_GEMINI_MODEL, }, } : {}), From 80929c48c52751da2a123fb99b24767220187845 Mon Sep 17 00:00:00 2001 From: Abhijit Balaji Date: Mon, 30 Mar 2026 17:57:46 -0700 Subject: [PATCH 011/146] feat(core): enable topic update narration for legacy models (#24241) Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> --- .../core/src/prompts/promptProvider.test.ts | 20 ++++++++++- packages/core/src/prompts/snippets.legacy.ts | 36 +++++++++++++++++-- 2 files changed, 53 insertions(+), 3 deletions(-) diff --git a/packages/core/src/prompts/promptProvider.test.ts b/packages/core/src/prompts/promptProvider.test.ts index 74cc83ae3a..554bad2003 100644 --- a/packages/core/src/prompts/promptProvider.test.ts +++ b/packages/core/src/prompts/promptProvider.test.ts @@ -11,7 +11,10 @@ import { getAllGeminiMdFilenames, DEFAULT_CONTEXT_FILENAME, } from '../tools/memoryTool.js'; -import { PREVIEW_GEMINI_MODEL } from '../config/models.js'; +import { + PREVIEW_GEMINI_MODEL, + DEFAULT_GEMINI_MODEL, +} from '../config/models.js'; import { ApprovalMode } from '../policy/types.js'; import { DiscoveredMCPTool } from '../tools/mcp-tool.js'; import { MockTool } from '../test-utils/mock-tool.js'; @@ -301,5 +304,20 @@ describe('PromptProvider', () => { expect(prompt).toContain(`\`${UPDATE_TOPIC_TOOL_NAME}\``); }); + + it('should include topic update instructions in legacy model prompt when enabled', () => { + vi.mocked(mockConfig.getActiveModel).mockReturnValue( + DEFAULT_GEMINI_MODEL, + ); + vi.mocked(mockConfig.isTopicUpdateNarrationEnabled).mockReturnValue(true); + + const provider = new PromptProvider(); + const prompt = provider.getCoreSystemPrompt(mockConfig); + + expect(prompt).toContain('## Topic Updates'); + expect(prompt).toContain(UPDATE_TOPIC_TOOL_NAME); + expect(prompt).toContain('No Chitchat'); + expect(prompt).toContain('Topic Model'); + }); }); }); diff --git a/packages/core/src/prompts/snippets.legacy.ts b/packages/core/src/prompts/snippets.legacy.ts index f2930e07ca..ebe08847ed 100644 --- a/packages/core/src/prompts/snippets.legacy.ts +++ b/packages/core/src/prompts/snippets.legacy.ts @@ -20,6 +20,9 @@ import { TRACKER_CREATE_TASK_TOOL_NAME, TRACKER_LIST_TASKS_TOOL_NAME, TRACKER_UPDATE_TASK_TOOL_NAME, + UPDATE_TOPIC_TOOL_NAME, + TOPIC_PARAM_TITLE, + TOPIC_PARAM_SUMMARY, WRITE_FILE_TOOL_NAME, WRITE_TODOS_TOOL_NAME, } from '../tools/tool-names.js'; @@ -51,6 +54,7 @@ export interface CoreMandatesOptions { isGemini3: boolean; hasSkills: boolean; hasHierarchicalMemory: boolean; + topicUpdateNarration?: boolean; } export interface PrimaryWorkflowsOptions { @@ -60,6 +64,7 @@ export interface PrimaryWorkflowsOptions { enableEnterPlanModeTool: boolean; approvedPlan?: { path: string }; taskTracker?: boolean; + topicUpdateNarration?: boolean; } export interface OperationalGuidelinesOptions { @@ -67,6 +72,7 @@ export interface OperationalGuidelinesOptions { isGemini3: boolean; enableShellEfficiency: boolean; interactiveShellEnabled: boolean; + topicUpdateNarration?: boolean; memoryManagerEnabled: boolean; } @@ -177,7 +183,11 @@ export function renderCoreMandates(options?: CoreMandatesOptions): string { - **User Hints:** During execution, the user may provide real-time hints (marked as "User hint:" or "User hints:"). Treat these as high-priority but scope-preserving course corrections: apply the minimal plan change needed, keep unaffected user tasks active, and never cancel/skip tasks unless cancellation is explicit for those tasks. Hints may add new tasks, modify one or more tasks, cancel specific tasks, or provide extra context only. If scope is ambiguous, ask for clarification before dropping work. - ${mandateConfirm(options.interactive)} - **Explaining Changes:** After completing a code modification or file operation *do not* provide summaries unless asked. -- **Do Not revert changes:** Do not revert changes to the codebase unless asked to do so by the user. Only revert changes made by you if they have resulted in an error or if the user has explicitly asked you to revert the changes.${mandateSkillGuidance(options.hasSkills)}${mandateExplainBeforeActing(options.isGemini3)}${mandateContinueWork(options.interactive)} +- **Do Not revert changes:** Do not revert changes to the codebase unless asked to do so by the user. Only revert changes made by you if they have resulted in an error or if the user has explicitly asked you to revert the changes.${mandateSkillGuidance(options.hasSkills)}${ + options.topicUpdateNarration + ? mandateTopicUpdateModel() + : mandateExplainBeforeActing(options.isGemini3) + }${mandateContinueWork(options.interactive)} `.trim(); } @@ -272,7 +282,12 @@ ${shellEfficiencyGuidelines(options.enableShellEfficiency)} ## Tone and Style (CLI Interaction) - **Concise & Direct:** Adopt a professional, direct, and concise tone suitable for a CLI environment. - **Minimal Output:** Aim for fewer than 3 lines of text output (excluding tool use/code generation) per response whenever practical. Focus strictly on the user's query. -- **Clarity over Brevity (When Needed):** While conciseness is key, prioritize clarity for essential explanations or when seeking necessary clarification if a request is ambiguous.${toneAndStyleNoChitchat(options.isGemini3)} +- **Clarity over Brevity (When Needed):** While conciseness is key, prioritize clarity for essential explanations or when seeking necessary clarification if a request is ambiguous.${ + options.topicUpdateNarration + ? ` +- **No Chitchat:** Avoid conversational filler, preambles ("Okay, I will now..."), or postambles ("I have finished the changes...") unless they are part of the **Topic Model**.` + : toneAndStyleNoChitchat(options.isGemini3) + } - **Formatting:** Use GitHub-flavored Markdown. Responses will be rendered in monospace. - **Tools vs. Text:** Use tools for actions, text output *only* for communication. Do not add explanatory comments within tool calls or code blocks unless specifically part of the required code/command itself. - **Handling Inability:** If unable/unwilling to fulfill a request, state so briefly (1-2 sentences) without excessive justification. Offer alternatives if appropriate. @@ -493,6 +508,23 @@ function mandateConfirm(interactive: boolean): string { : '**Handle Ambiguity/Expansion:** Do not take significant actions beyond the clear scope of the request. If the user implies a change (e.g., reports a bug) without explicitly asking for a fix, do not perform it automatically.'; } +function mandateTopicUpdateModel(): string { + return ` +## Topic Updates +As you work, the user follows along by reading topic updates that you publish with ${UPDATE_TOPIC_TOOL_NAME}. Keep them informed by doing the following: + +- Always call ${UPDATE_TOPIC_TOOL_NAME} in your first and last turn. The final turn should always recap what was done. +- Each topic update should give a concise description of what you are doing for the next few turns in the \`${TOPIC_PARAM_SUMMARY}\` parameter. +- Provide topic updates whenever you change "topics". A topic is typically a discrete subgoal and will be every 3 to 10 turns. Do not use ${UPDATE_TOPIC_TOOL_NAME} on every turn. +- The typical user message should call ${UPDATE_TOPIC_TOOL_NAME} 3 or more times. Each corresponds to a distinct phase of the task, such as "Researching X", "Researching Y", "Implementing Z with X", and "Testing Z". +- Remember to call ${UPDATE_TOPIC_TOOL_NAME} when you experience an unexpected event (e.g., a test failure, compilation error, environment issue, or unexpected learning) that requires a strategic detour. +- **Examples:** + - ${UPDATE_TOPIC_TOOL_NAME}(${TOPIC_PARAM_TITLE}="Researching Parser", ${TOPIC_PARAM_SUMMARY}="I am starting an investigation into the parser timeout bug. My goal is to first understand the current test coverage and then attempt to reproduce the failure. This phase will focus on identifying the bottleneck in the main loop before we move to implementation.") + - ${UPDATE_TOPIC_TOOL_NAME}(${TOPIC_PARAM_TITLE}="Implementing Buffer Fix", ${TOPIC_PARAM_SUMMARY}="I have completed the research phase and identified a race condition in the tokenizer's buffer management. I am now transitioning to implementation. This new chapter will focus on refactoring the buffer logic to handle async chunks safely, followed by unit testing the fix.") + +`; +} + function mandateSkillGuidance(hasSkills: boolean): string { if (!hasSkills) return ''; return ` From 46d6b119b6f1c7382c00ae215a3b0c6ac687639a Mon Sep 17 00:00:00 2001 From: Sandy Tao Date: Mon, 30 Mar 2026 18:32:15 -0700 Subject: [PATCH 012/146] feat(core): add project-level memory scope to save_memory tool (#24161) --- packages/cli/src/ui/AppContainer.tsx | 1 + packages/core/src/config/config.test.ts | 3 + packages/core/src/config/config.ts | 18 +++- packages/core/src/config/memory.ts | 7 ++ packages/core/src/config/storage.test.ts | 11 ++ packages/core/src/config/storage.ts | 5 + .../core/__snapshots__/prompts.test.ts.snap | 95 ++++++++++++---- .../prompts/snippets-memory-manager.test.ts | 2 +- packages/core/src/prompts/snippets.legacy.ts | 5 + packages/core/src/prompts/snippets.ts | 12 ++- .../core/src/services/contextManager.test.ts | 7 ++ packages/core/src/services/contextManager.ts | 27 ++++- .../coreToolsModelSnapshots.test.ts.snap | 30 ++++-- .../tools/definitions/base-declarations.ts | 1 + .../model-family-sets/default-legacy.ts | 17 ++- .../definitions/model-family-sets/gemini-3.ts | 11 +- packages/core/src/tools/memoryTool.test.ts | 101 +++++++++++++++++- packages/core/src/tools/memoryTool.ts | 60 ++++++++--- packages/core/src/utils/memoryDiscovery.ts | 32 +++++- 19 files changed, 382 insertions(+), 63 deletions(-) diff --git a/packages/cli/src/ui/AppContainer.tsx b/packages/cli/src/ui/AppContainer.tsx index 129bcb8fda..4da8acfdb7 100644 --- a/packages/cli/src/ui/AppContainer.tsx +++ b/packages/cli/src/ui/AppContainer.tsx @@ -1027,6 +1027,7 @@ Logging in with Google... Restarting Gemini CLI to continue. if (config.isJitContextEnabled()) { await config.getContextManager()?.refresh(); + config.updateSystemInstructionIfInitialized(); flattenedMemory = flattenMemory(config.getUserMemory()); fileCount = config.getGeminiMdFileCount(); } else { diff --git a/packages/core/src/config/config.test.ts b/packages/core/src/config/config.test.ts index 14ac3b7cf1..59133f6997 100644 --- a/packages/core/src/config/config.test.ts +++ b/packages/core/src/config/config.test.ts @@ -227,6 +227,7 @@ vi.mock('../services/contextManager.js', () => ({ getGlobalMemory: vi.fn().mockReturnValue(''), getExtensionMemory: vi.fn().mockReturnValue(''), getEnvironmentMemory: vi.fn().mockReturnValue(''), + getUserProjectMemory: vi.fn().mockReturnValue(''), getLoadedPaths: vi.fn().mockReturnValue(new Set()), })), })); @@ -2948,6 +2949,7 @@ describe('Config JIT Initialization', () => { getEnvironmentMemory: vi .fn() .mockReturnValue('Environment Memory\n\nMCP Instructions'), + getUserProjectMemory: vi.fn().mockReturnValue(''), getLoadedPaths: vi.fn().mockReturnValue(new Set(['/path/to/GEMINI.md'])), } as unknown as ContextManager; (ContextManager as unknown as Mock).mockImplementation( @@ -2975,6 +2977,7 @@ describe('Config JIT Initialization', () => { global: 'Global Memory', extension: 'Extension Memory', project: 'Environment Memory\n\nMCP Instructions', + userProjectMemory: '', }); // Tier 1: system instruction gets only global memory diff --git a/packages/core/src/config/config.ts b/packages/core/src/config/config.ts index ec39016933..f3e02510ed 100644 --- a/packages/core/src/config/config.ts +++ b/packages/core/src/config/config.ts @@ -2282,6 +2282,7 @@ export class Config implements McpContext, AgentLoopContext { global: this.contextManager.getGlobalMemory(), extension: this.contextManager.getExtensionMemory(), project: this.contextManager.getEnvironmentMemory(), + userProjectMemory: this.contextManager.getUserProjectMemory(), }; } return this.userMemory; @@ -2311,13 +2312,20 @@ export class Config implements McpContext, AgentLoopContext { /** * Returns memory for the system instruction. - * When JIT is enabled, only global memory (Tier 1) goes in the system - * instruction. Extension and project memory (Tier 2) are placed in the - * first user message instead, per the tiered context model. + * When JIT is enabled, global memory and user project memory (Tier 1) go + * in the system instruction. Extension and project memory (Tier 2) are + * placed in the first user message instead, per the tiered context model. + * User project memory is in Tier 1 so mid-session saves are reflected + * via system instruction updates. */ getSystemInstructionMemory(): string | HierarchicalMemory { if (this.experimentalJitContext && this.contextManager) { - return this.contextManager.getGlobalMemory(); + const global = this.contextManager.getGlobalMemory(); + const userProjectMemory = this.contextManager.getUserProjectMemory(); + if (userProjectMemory?.trim()) { + return { global, userProjectMemory }; + } + return global; } return this.userMemory; } @@ -3476,7 +3484,7 @@ export class Config implements McpContext, AgentLoopContext { ); if (!this.isMemoryManagerEnabled()) { maybeRegister(MemoryTool, () => - registry.registerTool(new MemoryTool(this.messageBus)), + registry.registerTool(new MemoryTool(this.messageBus, this.storage)), ); } maybeRegister(WebSearchTool, () => diff --git a/packages/core/src/config/memory.ts b/packages/core/src/config/memory.ts index 6ae902d5c6..146e38d0a6 100644 --- a/packages/core/src/config/memory.ts +++ b/packages/core/src/config/memory.ts @@ -8,6 +8,7 @@ export interface HierarchicalMemory { global?: string; extension?: string; project?: string; + userProjectMemory?: string; } /** @@ -21,6 +22,12 @@ export function flattenMemory(memory?: string | HierarchicalMemory): string { if (memory.global?.trim()) { sections.push({ name: 'Global', content: memory.global.trim() }); } + if (memory.userProjectMemory?.trim()) { + sections.push({ + name: 'User Project Memory', + content: memory.userProjectMemory.trim(), + }); + } if (memory.extension?.trim()) { sections.push({ name: 'Extension', content: memory.extension.trim() }); } diff --git a/packages/core/src/config/storage.test.ts b/packages/core/src/config/storage.test.ts index ea8fce6da3..b5b8c26841 100644 --- a/packages/core/src/config/storage.test.ts +++ b/packages/core/src/config/storage.test.ts @@ -147,6 +147,17 @@ describe('Storage – additional helpers', () => { expect(storage.getProjectAgentsDir()).toBe(expected); }); + it('getProjectMemoryDir returns ~/.gemini/memory/', async () => { + await storage.initialize(); + const expected = path.join( + os.homedir(), + GEMINI_DIR, + 'memory', + PROJECT_SLUG, + ); + expect(storage.getProjectMemoryDir()).toBe(expected); + }); + it('getMcpOAuthTokensPath returns ~/.gemini/mcp-oauth-tokens.json', () => { const expected = path.join( os.homedir(), diff --git a/packages/core/src/config/storage.ts b/packages/core/src/config/storage.ts index 38654346fa..cfbe6cf945 100644 --- a/packages/core/src/config/storage.ts +++ b/packages/core/src/config/storage.ts @@ -266,6 +266,11 @@ export class Storage { return path.join(historyDir, identifier); } + getProjectMemoryDir(): string { + const identifier = this.getProjectIdentifier(); + return path.join(Storage.getGlobalGeminiDir(), 'memory', identifier); + } + getWorkspaceSettingsPath(): string { return path.join(this.getGeminiDir(), 'settings.json'); } diff --git a/packages/core/src/core/__snapshots__/prompts.test.ts.snap b/packages/core/src/core/__snapshots__/prompts.test.ts.snap index 51f9a9e59e..b4e8dd4e7e 100644 --- a/packages/core/src/core/__snapshots__/prompts.test.ts.snap +++ b/packages/core/src/core/__snapshots__/prompts.test.ts.snap @@ -164,7 +164,10 @@ Use the \`exit_plan_mode\` tool to present the plan and formally request approva - **Command Execution:** Use the \`run_shell_command\` tool for running shell commands, remembering the safety rule to explain modifying commands first. - **Background Processes:** To run a command in the background, set the \`is_background\` parameter to true. If unsure, ask the user. - **Interactive Commands:** Always prefer non-interactive commands (e.g., using 'run once' or 'CI' flags for test runners to avoid persistent watch modes or 'git --no-pager') unless a persistent process is specifically required; however, some commands are only interactive and expect user input during their execution (e.g. ssh, vim). If you choose to execute an interactive command consider letting the user know they can press \`tab\` to focus into the shell to provide input. -- **Memory Tool:** Use \`save_memory\` only for global user preferences, personal facts, or high-level information that applies across all sessions. Never save workspace-specific context, local file paths, or transient session state. Do not use memory to store summaries of code changes, bug fixes, or findings discovered during a task; this tool is for persistent user-related information only. If unsure whether a fact is worth remembering globally, ask the user. +- **Memory Tool:** Use \`save_memory\` to persist facts across sessions. It supports two scopes via the \`scope\` parameter: + - \`"global"\` (default): Cross-project preferences and personal facts loaded in every workspace. + - \`"project"\`: Facts specific to the current workspace, private to the user (not committed to the repo). Use this for local dev setup notes, project-specific workflows, or personal reminders about this codebase. + Never save transient session state. Do not use memory to store summaries of code changes, bug fixes, or findings discovered during a task. If unsure whether a fact is global or project-specific, ask the user. - **Confirmation Protocol:** If a tool call is declined or cancelled, respect the decision immediately. Do not re-attempt the action or "negotiate" for the same tool call unless the user explicitly directs you to. Offer an alternative technical path if possible. ## Interaction Details @@ -342,7 +345,10 @@ An approved plan is available for this task at \`/tmp/plans/feature-x.md\`. - **Command Execution:** Use the \`run_shell_command\` tool for running shell commands, remembering the safety rule to explain modifying commands first. - **Background Processes:** To run a command in the background, set the \`is_background\` parameter to true. If unsure, ask the user. - **Interactive Commands:** Always prefer non-interactive commands (e.g., using 'run once' or 'CI' flags for test runners to avoid persistent watch modes or 'git --no-pager') unless a persistent process is specifically required; however, some commands are only interactive and expect user input during their execution (e.g. ssh, vim). If you choose to execute an interactive command consider letting the user know they can press \`tab\` to focus into the shell to provide input. -- **Memory Tool:** Use \`save_memory\` only for global user preferences, personal facts, or high-level information that applies across all sessions. Never save workspace-specific context, local file paths, or transient session state. Do not use memory to store summaries of code changes, bug fixes, or findings discovered during a task; this tool is for persistent user-related information only. If unsure whether a fact is worth remembering globally, ask the user. +- **Memory Tool:** Use \`save_memory\` to persist facts across sessions. It supports two scopes via the \`scope\` parameter: + - \`"global"\` (default): Cross-project preferences and personal facts loaded in every workspace. + - \`"project"\`: Facts specific to the current workspace, private to the user (not committed to the repo). Use this for local dev setup notes, project-specific workflows, or personal reminders about this codebase. + Never save transient session state. Do not use memory to store summaries of code changes, bug fixes, or findings discovered during a task. If unsure whether a fact is global or project-specific, ask the user. - **Confirmation Protocol:** If a tool call is declined or cancelled, respect the decision immediately. Do not re-attempt the action or "negotiate" for the same tool call unless the user explicitly directs you to. Offer an alternative technical path if possible. ## Interaction Details @@ -627,7 +633,10 @@ Use the \`exit_plan_mode\` tool to present the plan and formally request approva - **Command Execution:** Use the \`run_shell_command\` tool for running shell commands, remembering the safety rule to explain modifying commands first. - **Background Processes:** To run a command in the background, set the \`is_background\` parameter to true. If unsure, ask the user. - **Interactive Commands:** Always prefer non-interactive commands (e.g., using 'run once' or 'CI' flags for test runners to avoid persistent watch modes or 'git --no-pager') unless a persistent process is specifically required; however, some commands are only interactive and expect user input during their execution (e.g. ssh, vim). If you choose to execute an interactive command consider letting the user know they can press \`tab\` to focus into the shell to provide input. -- **Memory Tool:** Use \`save_memory\` only for global user preferences, personal facts, or high-level information that applies across all sessions. Never save workspace-specific context, local file paths, or transient session state. Do not use memory to store summaries of code changes, bug fixes, or findings discovered during a task; this tool is for persistent user-related information only. If unsure whether a fact is worth remembering globally, ask the user. +- **Memory Tool:** Use \`save_memory\` to persist facts across sessions. It supports two scopes via the \`scope\` parameter: + - \`"global"\` (default): Cross-project preferences and personal facts loaded in every workspace. + - \`"project"\`: Facts specific to the current workspace, private to the user (not committed to the repo). Use this for local dev setup notes, project-specific workflows, or personal reminders about this codebase. + Never save transient session state. Do not use memory to store summaries of code changes, bug fixes, or findings discovered during a task. If unsure whether a fact is global or project-specific, ask the user. - **Confirmation Protocol:** If a tool call is declined or cancelled, respect the decision immediately. Do not re-attempt the action or "negotiate" for the same tool call unless the user explicitly directs you to. Offer an alternative technical path if possible. ## Interaction Details @@ -782,7 +791,10 @@ Operate using a **Research -> Strategy -> Execution** lifecycle. For the Executi - **Command Execution:** Use the \`run_shell_command\` tool for running shell commands, remembering the safety rule to explain modifying commands first. - **Background Processes:** To run a command in the background, set the \`is_background\` parameter to true. If unsure, ask the user. - **Interactive Commands:** Always prefer non-interactive commands (e.g., using 'run once' or 'CI' flags for test runners to avoid persistent watch modes or 'git --no-pager') unless a persistent process is specifically required; however, some commands are only interactive and expect user input during their execution (e.g. ssh, vim). If you choose to execute an interactive command consider letting the user know they can press \`tab\` to focus into the shell to provide input. -- **Memory Tool:** Use \`save_memory\` only for global user preferences, personal facts, or high-level information that applies across all sessions. Never save workspace-specific context, local file paths, or transient session state. Do not use memory to store summaries of code changes, bug fixes, or findings discovered during a task; this tool is for persistent user-related information only. If unsure whether a fact is worth remembering globally, ask the user. +- **Memory Tool:** Use \`save_memory\` to persist facts across sessions. It supports two scopes via the \`scope\` parameter: + - \`"global"\` (default): Cross-project preferences and personal facts loaded in every workspace. + - \`"project"\`: Facts specific to the current workspace, private to the user (not committed to the repo). Use this for local dev setup notes, project-specific workflows, or personal reminders about this codebase. + Never save transient session state. Do not use memory to store summaries of code changes, bug fixes, or findings discovered during a task. If unsure whether a fact is global or project-specific, ask the user. - **Confirmation Protocol:** If a tool call is declined or cancelled, respect the decision immediately. Do not re-attempt the action or "negotiate" for the same tool call unless the user explicitly directs you to. Offer an alternative technical path if possible. ## Interaction Details @@ -923,7 +935,10 @@ Operate using a **Research -> Strategy -> Execution** lifecycle. For the Executi - **Command Execution:** Use the \`run_shell_command\` tool for running shell commands, remembering the safety rule to explain modifying commands first. - **Background Processes:** To run a command in the background, set the \`is_background\` parameter to true. - **Interactive Commands:** Always prefer non-interactive commands (e.g., using 'run once' or 'CI' flags for test runners to avoid persistent watch modes or 'git --no-pager') unless a persistent process is specifically required; however, some commands are only interactive and expect user input during their execution (e.g. ssh, vim). -- **Memory Tool:** Use \`save_memory\` only for global user preferences, personal facts, or high-level information that applies across all sessions. Never save workspace-specific context, local file paths, or transient session state. Do not use memory to store summaries of code changes, bug fixes, or findings discovered during a task; this tool is for persistent user-related information only. +- **Memory Tool:** Use \`save_memory\` to persist facts across sessions. It supports two scopes via the \`scope\` parameter: + - \`"global"\` (default): Cross-project preferences and personal facts loaded in every workspace. + - \`"project"\`: Facts specific to the current workspace, private to the user (not committed to the repo). Use this for local dev setup notes, project-specific workflows, or personal reminders about this codebase. + Never save transient session state. Do not use memory to store summaries of code changes, bug fixes, or findings discovered during a task. - **Confirmation Protocol:** If a tool call is declined or cancelled, respect the decision immediately. Do not re-attempt the action or "negotiate" for the same tool call unless the user explicitly directs you to. Offer an alternative technical path if possible. ## Interaction Details @@ -1047,7 +1062,10 @@ Operate using a **Research -> Strategy -> Execution** lifecycle. For the Executi - **Command Execution:** Use the \`run_shell_command\` tool for running shell commands, remembering the safety rule to explain modifying commands first. - **Background Processes:** To run a command in the background, set the \`is_background\` parameter to true. - **Interactive Commands:** Always prefer non-interactive commands (e.g., using 'run once' or 'CI' flags for test runners to avoid persistent watch modes or 'git --no-pager') unless a persistent process is specifically required; however, some commands are only interactive and expect user input during their execution (e.g. ssh, vim). -- **Memory Tool:** Use \`save_memory\` only for global user preferences, personal facts, or high-level information that applies across all sessions. Never save workspace-specific context, local file paths, or transient session state. Do not use memory to store summaries of code changes, bug fixes, or findings discovered during a task; this tool is for persistent user-related information only. +- **Memory Tool:** Use \`save_memory\` to persist facts across sessions. It supports two scopes via the \`scope\` parameter: + - \`"global"\` (default): Cross-project preferences and personal facts loaded in every workspace. + - \`"project"\`: Facts specific to the current workspace, private to the user (not committed to the repo). Use this for local dev setup notes, project-specific workflows, or personal reminders about this codebase. + Never save transient session state. Do not use memory to store summaries of code changes, bug fixes, or findings discovered during a task. - **Confirmation Protocol:** If a tool call is declined or cancelled, respect the decision immediately. Do not re-attempt the action or "negotiate" for the same tool call unless the user explicitly directs you to. Offer an alternative technical path if possible. ## Interaction Details @@ -1688,7 +1706,10 @@ Operate using a **Research -> Strategy -> Execution** lifecycle. For the Executi - **Command Execution:** Use the \`run_shell_command\` tool for running shell commands, remembering the safety rule to explain modifying commands first. - **Background Processes:** To run a command in the background, set the \`is_background\` parameter to true. If unsure, ask the user. - **Interactive Commands:** Always prefer non-interactive commands (e.g., using 'run once' or 'CI' flags for test runners to avoid persistent watch modes or 'git --no-pager') unless a persistent process is specifically required; however, some commands are only interactive and expect user input during their execution (e.g. ssh, vim). If you choose to execute an interactive command consider letting the user know they can press \`tab\` to focus into the shell to provide input. -- **Memory Tool:** Use \`save_memory\` only for global user preferences, personal facts, or high-level information that applies across all sessions. Never save workspace-specific context, local file paths, or transient session state. Do not use memory to store summaries of code changes, bug fixes, or findings discovered during a task; this tool is for persistent user-related information only. If unsure whether a fact is worth remembering globally, ask the user. +- **Memory Tool:** Use \`save_memory\` to persist facts across sessions. It supports two scopes via the \`scope\` parameter: + - \`"global"\` (default): Cross-project preferences and personal facts loaded in every workspace. + - \`"project"\`: Facts specific to the current workspace, private to the user (not committed to the repo). Use this for local dev setup notes, project-specific workflows, or personal reminders about this codebase. + Never save transient session state. Do not use memory to store summaries of code changes, bug fixes, or findings discovered during a task. If unsure whether a fact is global or project-specific, ask the user. - **Confirmation Protocol:** If a tool call is declined or cancelled, respect the decision immediately. Do not re-attempt the action or "negotiate" for the same tool call unless the user explicitly directs you to. Offer an alternative technical path if possible. ## Interaction Details @@ -1843,7 +1864,10 @@ Operate using a **Research -> Strategy -> Execution** lifecycle. For the Executi - **Command Execution:** Use the \`run_shell_command\` tool for running shell commands, remembering the safety rule to explain modifying commands first. - **Background Processes:** To run a command in the background, set the \`is_background\` parameter to true. If unsure, ask the user. - **Interactive Commands:** Always prefer non-interactive commands (e.g., using 'run once' or 'CI' flags for test runners to avoid persistent watch modes or 'git --no-pager') unless a persistent process is specifically required; however, some commands are only interactive and expect user input during their execution (e.g. ssh, vim). If you choose to execute an interactive command consider letting the user know they can press \`tab\` to focus into the shell to provide input. -- **Memory Tool:** Use \`save_memory\` only for global user preferences, personal facts, or high-level information that applies across all sessions. Never save workspace-specific context, local file paths, or transient session state. Do not use memory to store summaries of code changes, bug fixes, or findings discovered during a task; this tool is for persistent user-related information only. If unsure whether a fact is worth remembering globally, ask the user. +- **Memory Tool:** Use \`save_memory\` to persist facts across sessions. It supports two scopes via the \`scope\` parameter: + - \`"global"\` (default): Cross-project preferences and personal facts loaded in every workspace. + - \`"project"\`: Facts specific to the current workspace, private to the user (not committed to the repo). Use this for local dev setup notes, project-specific workflows, or personal reminders about this codebase. + Never save transient session state. Do not use memory to store summaries of code changes, bug fixes, or findings discovered during a task. If unsure whether a fact is global or project-specific, ask the user. - **Confirmation Protocol:** If a tool call is declined or cancelled, respect the decision immediately. Do not re-attempt the action or "negotiate" for the same tool call unless the user explicitly directs you to. Offer an alternative technical path if possible. ## Interaction Details @@ -2002,7 +2026,10 @@ Operate using a **Research -> Strategy -> Execution** lifecycle. For the Executi - **Command Execution:** Use the \`run_shell_command\` tool for running shell commands, remembering the safety rule to explain modifying commands first. - **Background Processes:** To run a command in the background, set the \`is_background\` parameter to true. If unsure, ask the user. - **Interactive Commands:** Always prefer non-interactive commands (e.g., using 'run once' or 'CI' flags for test runners to avoid persistent watch modes or 'git --no-pager') unless a persistent process is specifically required; however, some commands are only interactive and expect user input during their execution (e.g. ssh, vim). If you choose to execute an interactive command consider letting the user know they can press \`tab\` to focus into the shell to provide input. -- **Memory Tool:** Use \`save_memory\` only for global user preferences, personal facts, or high-level information that applies across all sessions. Never save workspace-specific context, local file paths, or transient session state. Do not use memory to store summaries of code changes, bug fixes, or findings discovered during a task; this tool is for persistent user-related information only. If unsure whether a fact is worth remembering globally, ask the user. +- **Memory Tool:** Use \`save_memory\` to persist facts across sessions. It supports two scopes via the \`scope\` parameter: + - \`"global"\` (default): Cross-project preferences and personal facts loaded in every workspace. + - \`"project"\`: Facts specific to the current workspace, private to the user (not committed to the repo). Use this for local dev setup notes, project-specific workflows, or personal reminders about this codebase. + Never save transient session state. Do not use memory to store summaries of code changes, bug fixes, or findings discovered during a task. If unsure whether a fact is global or project-specific, ask the user. - **Confirmation Protocol:** If a tool call is declined or cancelled, respect the decision immediately. Do not re-attempt the action or "negotiate" for the same tool call unless the user explicitly directs you to. Offer an alternative technical path if possible. ## Interaction Details @@ -2161,7 +2188,10 @@ Operate using a **Research -> Strategy -> Execution** lifecycle. For the Executi - **Command Execution:** Use the \`run_shell_command\` tool for running shell commands, remembering the safety rule to explain modifying commands first. - **Background Processes:** To run a command in the background, set the \`is_background\` parameter to true. If unsure, ask the user. - **Interactive Commands:** Always prefer non-interactive commands (e.g., using 'run once' or 'CI' flags for test runners to avoid persistent watch modes or 'git --no-pager') unless a persistent process is specifically required; however, some commands are only interactive and expect user input during their execution (e.g. ssh, vim). If you choose to execute an interactive command consider letting the user know they can press \`tab\` to focus into the shell to provide input. -- **Memory Tool:** Use \`save_memory\` only for global user preferences, personal facts, or high-level information that applies across all sessions. Never save workspace-specific context, local file paths, or transient session state. Do not use memory to store summaries of code changes, bug fixes, or findings discovered during a task; this tool is for persistent user-related information only. If unsure whether a fact is worth remembering globally, ask the user. +- **Memory Tool:** Use \`save_memory\` to persist facts across sessions. It supports two scopes via the \`scope\` parameter: + - \`"global"\` (default): Cross-project preferences and personal facts loaded in every workspace. + - \`"project"\`: Facts specific to the current workspace, private to the user (not committed to the repo). Use this for local dev setup notes, project-specific workflows, or personal reminders about this codebase. + Never save transient session state. Do not use memory to store summaries of code changes, bug fixes, or findings discovered during a task. If unsure whether a fact is global or project-specific, ask the user. - **Confirmation Protocol:** If a tool call is declined or cancelled, respect the decision immediately. Do not re-attempt the action or "negotiate" for the same tool call unless the user explicitly directs you to. Offer an alternative technical path if possible. ## Interaction Details @@ -2316,7 +2346,10 @@ Operate using a **Research -> Strategy -> Execution** lifecycle. For the Executi - **Command Execution:** Use the \`run_shell_command\` tool for running shell commands, remembering the safety rule to explain modifying commands first. - **Background Processes:** To run a command in the background, set the \`is_background\` parameter to true. If unsure, ask the user. - **Interactive Commands:** Always prefer non-interactive commands (e.g., using 'run once' or 'CI' flags for test runners to avoid persistent watch modes or 'git --no-pager') unless a persistent process is specifically required; however, some commands are only interactive and expect user input during their execution (e.g. ssh, vim). If you choose to execute an interactive command consider letting the user know they can press \`tab\` to focus into the shell to provide input. -- **Memory Tool:** Use \`save_memory\` only for global user preferences, personal facts, or high-level information that applies across all sessions. Never save workspace-specific context, local file paths, or transient session state. Do not use memory to store summaries of code changes, bug fixes, or findings discovered during a task; this tool is for persistent user-related information only. If unsure whether a fact is worth remembering globally, ask the user. +- **Memory Tool:** Use \`save_memory\` to persist facts across sessions. It supports two scopes via the \`scope\` parameter: + - \`"global"\` (default): Cross-project preferences and personal facts loaded in every workspace. + - \`"project"\`: Facts specific to the current workspace, private to the user (not committed to the repo). Use this for local dev setup notes, project-specific workflows, or personal reminders about this codebase. + Never save transient session state. Do not use memory to store summaries of code changes, bug fixes, or findings discovered during a task. If unsure whether a fact is global or project-specific, ask the user. - **Confirmation Protocol:** If a tool call is declined or cancelled, respect the decision immediately. Do not re-attempt the action or "negotiate" for the same tool call unless the user explicitly directs you to. Offer an alternative technical path if possible. ## Interaction Details @@ -2465,7 +2498,10 @@ Operate using a **Research -> Strategy -> Execution** lifecycle. For the Executi - **Command Execution:** Use the \`run_shell_command\` tool for running shell commands, remembering the safety rule to explain modifying commands first. - **Background Processes:** To run a command in the background, set the \`is_background\` parameter to true. If unsure, ask the user. - **Interactive Commands:** Always prefer non-interactive commands (e.g., using 'run once' or 'CI' flags for test runners to avoid persistent watch modes or 'git --no-pager') unless a persistent process is specifically required; however, some commands are only interactive and expect user input during their execution (e.g. ssh, vim). If you choose to execute an interactive command consider letting the user know they can press \`tab\` to focus into the shell to provide input. -- **Memory Tool:** Use \`save_memory\` only for global user preferences, personal facts, or high-level information that applies across all sessions. Never save workspace-specific context, local file paths, or transient session state. Do not use memory to store summaries of code changes, bug fixes, or findings discovered during a task; this tool is for persistent user-related information only. If unsure whether a fact is worth remembering globally, ask the user. +- **Memory Tool:** Use \`save_memory\` to persist facts across sessions. It supports two scopes via the \`scope\` parameter: + - \`"global"\` (default): Cross-project preferences and personal facts loaded in every workspace. + - \`"project"\`: Facts specific to the current workspace, private to the user (not committed to the repo). Use this for local dev setup notes, project-specific workflows, or personal reminders about this codebase. + Never save transient session state. Do not use memory to store summaries of code changes, bug fixes, or findings discovered during a task. If unsure whether a fact is global or project-specific, ask the user. - **Confirmation Protocol:** If a tool call is declined or cancelled, respect the decision immediately. Do not re-attempt the action or "negotiate" for the same tool call unless the user explicitly directs you to. Offer an alternative technical path if possible. ## Interaction Details @@ -2619,7 +2655,10 @@ Operate using a **Research -> Strategy -> Execution** lifecycle. For the Executi - **Command Execution:** Use the \`run_shell_command\` tool for running shell commands, remembering the safety rule to explain modifying commands first. - **Background Processes:** To run a command in the background, set the \`is_background\` parameter to true. If unsure, ask the user. - **Interactive Commands:** Always prefer non-interactive commands (e.g., using 'run once' or 'CI' flags for test runners to avoid persistent watch modes or 'git --no-pager') unless a persistent process is specifically required; however, some commands are only interactive and expect user input during their execution (e.g. ssh, vim). If you choose to execute an interactive command consider letting the user know they can press \`tab\` to focus into the shell to provide input. -- **Memory Tool:** Use \`save_memory\` only for global user preferences, personal facts, or high-level information that applies across all sessions. Never save workspace-specific context, local file paths, or transient session state. Do not use memory to store summaries of code changes, bug fixes, or findings discovered during a task; this tool is for persistent user-related information only. If unsure whether a fact is worth remembering globally, ask the user. +- **Memory Tool:** Use \`save_memory\` to persist facts across sessions. It supports two scopes via the \`scope\` parameter: + - \`"global"\` (default): Cross-project preferences and personal facts loaded in every workspace. + - \`"project"\`: Facts specific to the current workspace, private to the user (not committed to the repo). Use this for local dev setup notes, project-specific workflows, or personal reminders about this codebase. + Never save transient session state. Do not use memory to store summaries of code changes, bug fixes, or findings discovered during a task. If unsure whether a fact is global or project-specific, ask the user. - **Confirmation Protocol:** If a tool call is declined or cancelled, respect the decision immediately. Do not re-attempt the action or "negotiate" for the same tool call unless the user explicitly directs you to. Offer an alternative technical path if possible. ## Interaction Details @@ -2774,7 +2813,10 @@ Operate using a **Research -> Strategy -> Execution** lifecycle. For the Executi - **Command Execution:** Use the \`run_shell_command\` tool for running shell commands, remembering the safety rule to explain modifying commands first. - **Background Processes:** To run a command in the background, set the \`is_background\` parameter to true. If unsure, ask the user. - **Interactive Commands:** Always prefer non-interactive commands (e.g., using 'run once' or 'CI' flags for test runners to avoid persistent watch modes or 'git --no-pager') unless a persistent process is specifically required; however, some commands are only interactive and expect user input during their execution (e.g. ssh, vim). If you choose to execute an interactive command consider letting the user know they can press \`tab\` to focus into the shell to provide input. -- **Memory Tool:** Use \`save_memory\` only for global user preferences, personal facts, or high-level information that applies across all sessions. Never save workspace-specific context, local file paths, or transient session state. Do not use memory to store summaries of code changes, bug fixes, or findings discovered during a task; this tool is for persistent user-related information only. If unsure whether a fact is worth remembering globally, ask the user. +- **Memory Tool:** Use \`save_memory\` to persist facts across sessions. It supports two scopes via the \`scope\` parameter: + - \`"global"\` (default): Cross-project preferences and personal facts loaded in every workspace. + - \`"project"\`: Facts specific to the current workspace, private to the user (not committed to the repo). Use this for local dev setup notes, project-specific workflows, or personal reminders about this codebase. + Never save transient session state. Do not use memory to store summaries of code changes, bug fixes, or findings discovered during a task. If unsure whether a fact is global or project-specific, ask the user. - **Confirmation Protocol:** If a tool call is declined or cancelled, respect the decision immediately. Do not re-attempt the action or "negotiate" for the same tool call unless the user explicitly directs you to. Offer an alternative technical path if possible. ## Interaction Details @@ -3064,7 +3106,10 @@ You are operating with a persistent file-based task tracking system located at \ - **Command Execution:** Use the \`run_shell_command\` tool for running shell commands, remembering the safety rule to explain modifying commands first. - **Background Processes:** To run a command in the background, set the \`is_background\` parameter to true. If unsure, ask the user. - **Interactive Commands:** Always prefer non-interactive commands (e.g., using 'run once' or 'CI' flags for test runners to avoid persistent watch modes or 'git --no-pager') unless a persistent process is specifically required; however, some commands are only interactive and expect user input during their execution (e.g. ssh, vim). If you choose to execute an interactive command consider letting the user know they can press \`tab\` to focus into the shell to provide input. -- **Memory Tool:** Use \`save_memory\` only for global user preferences, personal facts, or high-level information that applies across all sessions. Never save workspace-specific context, local file paths, or transient session state. Do not use memory to store summaries of code changes, bug fixes, or findings discovered during a task; this tool is for persistent user-related information only. If unsure whether a fact is worth remembering globally, ask the user. +- **Memory Tool:** Use \`save_memory\` to persist facts across sessions. It supports two scopes via the \`scope\` parameter: + - \`"global"\` (default): Cross-project preferences and personal facts loaded in every workspace. + - \`"project"\`: Facts specific to the current workspace, private to the user (not committed to the repo). Use this for local dev setup notes, project-specific workflows, or personal reminders about this codebase. + Never save transient session state. Do not use memory to store summaries of code changes, bug fixes, or findings discovered during a task. If unsure whether a fact is global or project-specific, ask the user. - **Confirmation Protocol:** If a tool call is declined or cancelled, respect the decision immediately. Do not re-attempt the action or "negotiate" for the same tool call unless the user explicitly directs you to. Offer an alternative technical path if possible. ## Interaction Details @@ -3460,7 +3505,10 @@ Operate using a **Research -> Strategy -> Execution** lifecycle. For the Executi - **Command Execution:** Use the \`run_shell_command\` tool for running shell commands, remembering the safety rule to explain modifying commands first. - **Background Processes:** To run a command in the background, set the \`is_background\` parameter to true. If unsure, ask the user. - **Interactive Commands:** Always prefer non-interactive commands (e.g., using 'run once' or 'CI' flags for test runners to avoid persistent watch modes or 'git --no-pager') unless a persistent process is specifically required; however, some commands are only interactive and expect user input during their execution (e.g. ssh, vim). If you choose to execute an interactive command consider letting the user know they can press \`tab\` to focus into the shell to provide input. -- **Memory Tool:** Use \`save_memory\` only for global user preferences, personal facts, or high-level information that applies across all sessions. Never save workspace-specific context, local file paths, or transient session state. Do not use memory to store summaries of code changes, bug fixes, or findings discovered during a task; this tool is for persistent user-related information only. If unsure whether a fact is worth remembering globally, ask the user. +- **Memory Tool:** Use \`save_memory\` to persist facts across sessions. It supports two scopes via the \`scope\` parameter: + - \`"global"\` (default): Cross-project preferences and personal facts loaded in every workspace. + - \`"project"\`: Facts specific to the current workspace, private to the user (not committed to the repo). Use this for local dev setup notes, project-specific workflows, or personal reminders about this codebase. + Never save transient session state. Do not use memory to store summaries of code changes, bug fixes, or findings discovered during a task. If unsure whether a fact is global or project-specific, ask the user. - **Confirmation Protocol:** If a tool call is declined or cancelled, respect the decision immediately. Do not re-attempt the action or "negotiate" for the same tool call unless the user explicitly directs you to. Offer an alternative technical path if possible. ## Interaction Details @@ -3615,7 +3663,10 @@ Operate using a **Research -> Strategy -> Execution** lifecycle. For the Executi - **Command Execution:** Use the \`run_shell_command\` tool for running shell commands, remembering the safety rule to explain modifying commands first. - **Background Processes:** To run a command in the background, set the \`is_background\` parameter to true. If unsure, ask the user. - **Interactive Commands:** Always prefer non-interactive commands (e.g., using 'run once' or 'CI' flags for test runners to avoid persistent watch modes or 'git --no-pager') unless a persistent process is specifically required; however, some commands are only interactive and expect user input during their execution (e.g. ssh, vim). If you choose to execute an interactive command consider letting the user know they can press \`tab\` to focus into the shell to provide input. -- **Memory Tool:** Use \`save_memory\` only for global user preferences, personal facts, or high-level information that applies across all sessions. Never save workspace-specific context, local file paths, or transient session state. Do not use memory to store summaries of code changes, bug fixes, or findings discovered during a task; this tool is for persistent user-related information only. If unsure whether a fact is worth remembering globally, ask the user. +- **Memory Tool:** Use \`save_memory\` to persist facts across sessions. It supports two scopes via the \`scope\` parameter: + - \`"global"\` (default): Cross-project preferences and personal facts loaded in every workspace. + - \`"project"\`: Facts specific to the current workspace, private to the user (not committed to the repo). Use this for local dev setup notes, project-specific workflows, or personal reminders about this codebase. + Never save transient session state. Do not use memory to store summaries of code changes, bug fixes, or findings discovered during a task. If unsure whether a fact is global or project-specific, ask the user. - **Confirmation Protocol:** If a tool call is declined or cancelled, respect the decision immediately. Do not re-attempt the action or "negotiate" for the same tool call unless the user explicitly directs you to. Offer an alternative technical path if possible. ## Interaction Details @@ -3882,7 +3933,10 @@ Operate using a **Research -> Strategy -> Execution** lifecycle. For the Executi - **Command Execution:** Use the \`run_shell_command\` tool for running shell commands, remembering the safety rule to explain modifying commands first. - **Background Processes:** To run a command in the background, set the \`is_background\` parameter to true. If unsure, ask the user. - **Interactive Commands:** Always prefer non-interactive commands (e.g., using 'run once' or 'CI' flags for test runners to avoid persistent watch modes or 'git --no-pager') unless a persistent process is specifically required; however, some commands are only interactive and expect user input during their execution (e.g. ssh, vim). If you choose to execute an interactive command consider letting the user know they can press \`tab\` to focus into the shell to provide input. -- **Memory Tool:** Use \`save_memory\` only for global user preferences, personal facts, or high-level information that applies across all sessions. Never save workspace-specific context, local file paths, or transient session state. Do not use memory to store summaries of code changes, bug fixes, or findings discovered during a task; this tool is for persistent user-related information only. If unsure whether a fact is worth remembering globally, ask the user. +- **Memory Tool:** Use \`save_memory\` to persist facts across sessions. It supports two scopes via the \`scope\` parameter: + - \`"global"\` (default): Cross-project preferences and personal facts loaded in every workspace. + - \`"project"\`: Facts specific to the current workspace, private to the user (not committed to the repo). Use this for local dev setup notes, project-specific workflows, or personal reminders about this codebase. + Never save transient session state. Do not use memory to store summaries of code changes, bug fixes, or findings discovered during a task. If unsure whether a fact is global or project-specific, ask the user. - **Confirmation Protocol:** If a tool call is declined or cancelled, respect the decision immediately. Do not re-attempt the action or "negotiate" for the same tool call unless the user explicitly directs you to. Offer an alternative technical path if possible. ## Interaction Details @@ -4037,7 +4091,10 @@ Operate using a **Research -> Strategy -> Execution** lifecycle. For the Executi - **Command Execution:** Use the \`run_shell_command\` tool for running shell commands, remembering the safety rule to explain modifying commands first. - **Background Processes:** To run a command in the background, set the \`is_background\` parameter to true. If unsure, ask the user. - **Interactive Commands:** Always prefer non-interactive commands (e.g., using 'run once' or 'CI' flags for test runners to avoid persistent watch modes or 'git --no-pager') unless a persistent process is specifically required; however, some commands are only interactive and expect user input during their execution (e.g. ssh, vim). If you choose to execute an interactive command consider letting the user know they can press \`tab\` to focus into the shell to provide input. -- **Memory Tool:** Use \`save_memory\` only for global user preferences, personal facts, or high-level information that applies across all sessions. Never save workspace-specific context, local file paths, or transient session state. Do not use memory to store summaries of code changes, bug fixes, or findings discovered during a task; this tool is for persistent user-related information only. If unsure whether a fact is worth remembering globally, ask the user. +- **Memory Tool:** Use \`save_memory\` to persist facts across sessions. It supports two scopes via the \`scope\` parameter: + - \`"global"\` (default): Cross-project preferences and personal facts loaded in every workspace. + - \`"project"\`: Facts specific to the current workspace, private to the user (not committed to the repo). Use this for local dev setup notes, project-specific workflows, or personal reminders about this codebase. + Never save transient session state. Do not use memory to store summaries of code changes, bug fixes, or findings discovered during a task. If unsure whether a fact is global or project-specific, ask the user. - **Confirmation Protocol:** If a tool call is declined or cancelled, respect the decision immediately. Do not re-attempt the action or "negotiate" for the same tool call unless the user explicitly directs you to. Offer an alternative technical path if possible. ## Interaction Details diff --git a/packages/core/src/prompts/snippets-memory-manager.test.ts b/packages/core/src/prompts/snippets-memory-manager.test.ts index 070e49f8c0..19aa8f478b 100644 --- a/packages/core/src/prompts/snippets-memory-manager.test.ts +++ b/packages/core/src/prompts/snippets-memory-manager.test.ts @@ -18,7 +18,7 @@ describe('renderOperationalGuidelines - memoryManagerEnabled', () => { it('should include standard memory tool guidance when memoryManagerEnabled is false', () => { const result = renderOperationalGuidelines(baseOptions); expect(result).toContain('save_memory'); - expect(result).toContain('persistent user-related information'); + expect(result).toContain('persist facts across sessions'); expect(result).not.toContain('subagent'); }); diff --git a/packages/core/src/prompts/snippets.legacy.ts b/packages/core/src/prompts/snippets.legacy.ts index ebe08847ed..0367596c69 100644 --- a/packages/core/src/prompts/snippets.legacy.ts +++ b/packages/core/src/prompts/snippets.legacy.ts @@ -405,6 +405,11 @@ ${trimmed} `\n${memory.global.trim()}\n`, ); } + if (memory.userProjectMemory?.trim()) { + sections.push( + `\n--- User's Project Memory (private, not committed to repo) ---\n${memory.userProjectMemory.trim()}\n--- End User's Project Memory ---\n`, + ); + } if (memory.extension?.trim()) { sections.push( `\n${memory.extension.trim()}\n`, diff --git a/packages/core/src/prompts/snippets.ts b/packages/core/src/prompts/snippets.ts index a16ef59461..d7e95a1f4e 100644 --- a/packages/core/src/prompts/snippets.ts +++ b/packages/core/src/prompts/snippets.ts @@ -517,6 +517,11 @@ ${trimmed} `\n${memory.global.trim()}\n`, ); } + if (memory.userProjectMemory?.trim()) { + sections.push( + `\n--- User's Project Memory (private, not committed to repo) ---\n${memory.userProjectMemory.trim()}\n--- End User's Project Memory ---\n`, + ); + } if (memory.extension?.trim()) { sections.push( `\n${memory.extension.trim()}\n`, @@ -798,9 +803,12 @@ function toolUsageRememberingFacts( - **Memory Tool:** You MUST use ${formatToolName(MEMORY_TOOL_NAME)} to proactively record facts, preferences, and workflows that apply across all sessions. Whenever the user explicitly tells you to "remember" something, or when they state a preference or workflow (like "always lint after editing"), you MUST immediately call the save_memory subagent. Never save transient session state. Do not use memory to store summaries of code changes, bug fixes, or findings discovered during a task; this tool is strictly for persistent general knowledge.`; } const base = ` -- **Memory Tool:** Use ${formatToolName(MEMORY_TOOL_NAME)} only for global user preferences, personal facts, or high-level information that applies across all sessions. Never save workspace-specific context, local file paths, or transient session state. Do not use memory to store summaries of code changes, bug fixes, or findings discovered during a task; this tool is for persistent user-related information only.`; +- **Memory Tool:** Use ${formatToolName(MEMORY_TOOL_NAME)} to persist facts across sessions. It supports two scopes via the \`scope\` parameter: + - \`"global"\` (default): Cross-project preferences and personal facts loaded in every workspace. + - \`"project"\`: Facts specific to the current workspace, private to the user (not committed to the repo). Use this for local dev setup notes, project-specific workflows, or personal reminders about this codebase. + Never save transient session state. Do not use memory to store summaries of code changes, bug fixes, or findings discovered during a task.`; const suffix = options.interactive - ? ' If unsure whether a fact is worth remembering globally, ask the user.' + ? ' If unsure whether a fact is global or project-specific, ask the user.' : ''; return base + suffix; } diff --git a/packages/core/src/services/contextManager.test.ts b/packages/core/src/services/contextManager.test.ts index a6a3c8cd0f..3d06e2485d 100644 --- a/packages/core/src/services/contextManager.test.ts +++ b/packages/core/src/services/contextManager.test.ts @@ -17,6 +17,7 @@ vi.mock('../utils/memoryDiscovery.js', async (importOriginal) => { return { ...actual, getGlobalMemoryPaths: vi.fn(), + getUserProjectMemoryPaths: vi.fn(), getExtensionMemoryPaths: vi.fn(), getEnvironmentMemoryPaths: vi.fn(), readGeminiMdFiles: vi.fn(), @@ -47,12 +48,18 @@ describe('ContextManager', () => { }), isTrustedFolder: vi.fn().mockReturnValue(true), getMemoryBoundaryMarkers: vi.fn().mockReturnValue(['.git']), + storage: { + getProjectMemoryDir: vi + .fn() + .mockReturnValue('/home/user/.gemini/memory/test-project'), + }, } as unknown as Config; contextManager = new ContextManager(mockConfig); vi.clearAllMocks(); vi.spyOn(coreEvents, 'emit'); vi.mocked(memoryDiscovery.getExtensionMemoryPaths).mockReturnValue([]); + vi.mocked(memoryDiscovery.getUserProjectMemoryPaths).mockResolvedValue([]); // default mock: deduplication returns paths as-is (no deduplication) vi.mocked( memoryDiscovery.deduplicatePathsByFileIdentity, diff --git a/packages/core/src/services/contextManager.ts b/packages/core/src/services/contextManager.ts index 3d7400c747..43ae627796 100644 --- a/packages/core/src/services/contextManager.ts +++ b/packages/core/src/services/contextManager.ts @@ -8,6 +8,7 @@ import { loadJitSubdirectoryMemory, concatenateInstructions, getGlobalMemoryPaths, + getUserProjectMemoryPaths, getExtensionMemoryPaths, getEnvironmentMemoryPaths, readGeminiMdFiles, @@ -25,6 +26,7 @@ export class ContextManager { private globalMemory: string = ''; private extensionMemory: string = ''; private projectMemory: string = ''; + private userProjectMemoryContent: string = ''; constructor(config: Config) { this.config = config; @@ -45,7 +47,7 @@ export class ContextManager { } private async discoverMemoryPaths() { - const [global, extension, project] = await Promise.all([ + const [global, extension, project, userProjectMemory] = await Promise.all([ getGlobalMemoryPaths(), Promise.resolve( getExtensionMemoryPaths(this.config.getExtensionLoader()), @@ -56,18 +58,25 @@ export class ContextManager { this.config.getMemoryBoundaryMarkers(), ) : Promise.resolve([]), + getUserProjectMemoryPaths(this.config.storage.getProjectMemoryDir()), ]); - return { global, extension, project }; + return { global, extension, project, userProjectMemory }; } private async loadMemoryContents(paths: { global: string[]; extension: string[]; project: string[]; + userProjectMemory: string[]; }) { const allPathsStringDeduped = Array.from( - new Set([...paths.global, ...paths.extension, ...paths.project]), + new Set([ + ...paths.global, + ...paths.extension, + ...paths.project, + ...paths.userProjectMemory, + ]), ); // deduplicate by file identity to handle case-insensitive filesystems @@ -97,13 +106,19 @@ export class ContextManager { } private categorizeMemoryContents( - paths: { global: string[]; extension: string[]; project: string[] }, + paths: { + global: string[]; + extension: string[]; + project: string[]; + userProjectMemory: string[]; + }, contentsMap: Map, ) { const hierarchicalMemory = categorizeAndConcatenate(paths, contentsMap); this.globalMemory = hierarchicalMemory.global || ''; this.extensionMemory = hierarchicalMemory.extension || ''; + this.userProjectMemoryContent = hierarchicalMemory.userProjectMemory || ''; const mcpInstructions = this.config.getMcpClientManager()?.getMcpInstructions() || ''; @@ -174,6 +189,10 @@ export class ContextManager { return this.projectMemory; } + getUserProjectMemory(): string { + return this.userProjectMemoryContent; + } + private markAsLoaded(paths: string[]): void { paths.forEach((p) => this.loadedPaths.add(p)); } diff --git a/packages/core/src/tools/definitions/__snapshots__/coreToolsModelSnapshots.test.ts.snap b/packages/core/src/tools/definitions/__snapshots__/coreToolsModelSnapshots.test.ts.snap index 5a8291bcfc..dbaad2d1f8 100644 --- a/packages/core/src/tools/definitions/__snapshots__/coreToolsModelSnapshots.test.ts.snap +++ b/packages/core/src/tools/definitions/__snapshots__/coreToolsModelSnapshots.test.ts.snap @@ -640,13 +640,13 @@ exports[`coreTools snapshots for specific models > Model: gemini-2.5-pro > snaps exports[`coreTools snapshots for specific models > Model: gemini-2.5-pro > snapshot for tool: save_memory 1`] = ` { "description": " -Saves concise global user context (preferences, facts) for use across ALL workspaces. +Saves concise user context (preferences, facts) for use across future sessions. -### CRITICAL: GLOBAL CONTEXT ONLY -NEVER save workspace-specific context, local paths, or commands (e.g. "The entry point is src/index.js", "The test command is npm test"). These are local to the current workspace and must NOT be saved globally. EXCLUSIVELY for context relevant across ALL workspaces. +Supports two scopes: +- **global** (default): Cross-project preferences loaded in every workspace. Use for "Remember X" or clear personal facts. +- **project**: Facts specific to the current workspace, private to the user (not committed to the repo). Use for local dev setup notes, project-specific workflows, or personal reminders about this codebase. -- Use for "Remember X" or clear personal facts. -- Do NOT use for session context.", +Do NOT use for session-specific context or temporary data.", "name": "save_memory", "parametersJsonSchema": { "additionalProperties": false, @@ -655,6 +655,14 @@ NEVER save workspace-specific context, local paths, or commands (e.g. "The entry "description": "The specific fact or piece of information to remember. Should be a clear, self-contained statement.", "type": "string", }, + "scope": { + "description": "Where to save the memory. 'global' (default) saves to a file loaded in every workspace. 'project' saves to a project-specific file private to the user, not committed to the repo.", + "enum": [ + "global", + "project", + ], + "type": "string", + }, }, "required": [ "fact", @@ -1433,13 +1441,21 @@ exports[`coreTools snapshots for specific models > Model: gemini-3-pro-preview > exports[`coreTools snapshots for specific models > Model: gemini-3-pro-preview > snapshot for tool: save_memory 1`] = ` { - "description": "Persists global preferences or facts across ALL future sessions. Use this for recurring instructions like coding styles or tool aliases. Unlike 'write_file', which is for project-specific files, this appends to a global memory file loaded in every workspace. If you are unsure whether a fact should be remembered globally, ask the user first. CRITICAL: Do not use for session-specific context or temporary data.", + "description": "Persists preferences or facts across ALL future sessions. Supports two scopes: 'global' (default) for cross-project preferences loaded in every workspace, and 'project' for facts specific to the current workspace that are private to the user (not committed to the repo). Use 'project' scope for things like local dev setup notes, project-specific workflows, or personal reminders about this codebase. CRITICAL: Do not use for session-specific context or temporary data.", "name": "save_memory", "parametersJsonSchema": { "additionalProperties": false, "properties": { "fact": { - "description": "A concise, global fact or preference (e.g., 'I prefer using tabs'). Do not include local paths or project-specific names.", + "description": "A concise fact or preference to remember. Should be a clear, self-contained statement.", + "type": "string", + }, + "scope": { + "description": "Where to save the memory. 'global' (default) saves to a file loaded in every workspace. 'project' saves to a project-specific file private to the user, not committed to the repo.", + "enum": [ + "global", + "project", + ], "type": "string", }, }, diff --git a/packages/core/src/tools/definitions/base-declarations.ts b/packages/core/src/tools/definitions/base-declarations.ts index 08b14ce6cb..13f31aa2bb 100644 --- a/packages/core/src/tools/definitions/base-declarations.ts +++ b/packages/core/src/tools/definitions/base-declarations.ts @@ -92,6 +92,7 @@ export const READ_MANY_PARAM_USE_DEFAULT_EXCLUDES = 'useDefaultExcludes'; // -- save_memory -- export const MEMORY_TOOL_NAME = 'save_memory'; export const MEMORY_PARAM_FACT = 'fact'; +export const MEMORY_PARAM_SCOPE = 'scope'; // -- get_internal_docs -- export const GET_INTERNAL_DOCS_TOOL_NAME = 'get_internal_docs'; diff --git a/packages/core/src/tools/definitions/model-family-sets/default-legacy.ts b/packages/core/src/tools/definitions/model-family-sets/default-legacy.ts index cd79694f78..dcf9e6e86e 100644 --- a/packages/core/src/tools/definitions/model-family-sets/default-legacy.ts +++ b/packages/core/src/tools/definitions/model-family-sets/default-legacy.ts @@ -59,6 +59,7 @@ import { READ_MANY_PARAM_RECURSIVE, READ_MANY_PARAM_USE_DEFAULT_EXCLUDES, MEMORY_PARAM_FACT, + MEMORY_PARAM_SCOPE, TODOS_PARAM_TODOS, TODOS_ITEM_PARAM_DESCRIPTION, TODOS_ITEM_PARAM_STATUS, @@ -513,13 +514,13 @@ Use this tool when the user's query implies needing the content of several files save_memory: { name: MEMORY_TOOL_NAME, description: ` -Saves concise global user context (preferences, facts) for use across ALL workspaces. +Saves concise user context (preferences, facts) for use across future sessions. -### CRITICAL: GLOBAL CONTEXT ONLY -NEVER save workspace-specific context, local paths, or commands (e.g. "The entry point is src/index.js", "The test command is npm test"). These are local to the current workspace and must NOT be saved globally. EXCLUSIVELY for context relevant across ALL workspaces. +Supports two scopes: +- **global** (default): Cross-project preferences loaded in every workspace. Use for "Remember X" or clear personal facts. +- **project**: Facts specific to the current workspace, private to the user (not committed to the repo). Use for local dev setup notes, project-specific workflows, or personal reminders about this codebase. -- Use for "Remember X" or clear personal facts. -- Do NOT use for session context.`, +Do NOT use for session-specific context or temporary data.`, parametersJsonSchema: { type: 'object', properties: { @@ -528,6 +529,12 @@ NEVER save workspace-specific context, local paths, or commands (e.g. "The entry description: 'The specific fact or piece of information to remember. Should be a clear, self-contained statement.', }, + [MEMORY_PARAM_SCOPE]: { + type: 'string', + enum: ['global', 'project'], + description: + "Where to save the memory. 'global' (default) saves to a file loaded in every workspace. 'project' saves to a project-specific file private to the user, not committed to the repo.", + }, }, required: [MEMORY_PARAM_FACT], additionalProperties: false, diff --git a/packages/core/src/tools/definitions/model-family-sets/gemini-3.ts b/packages/core/src/tools/definitions/model-family-sets/gemini-3.ts index b19c157f22..b69ca43e5a 100644 --- a/packages/core/src/tools/definitions/model-family-sets/gemini-3.ts +++ b/packages/core/src/tools/definitions/model-family-sets/gemini-3.ts @@ -59,6 +59,7 @@ import { READ_MANY_PARAM_RECURSIVE, READ_MANY_PARAM_USE_DEFAULT_EXCLUDES, MEMORY_PARAM_FACT, + MEMORY_PARAM_SCOPE, TODOS_PARAM_TODOS, TODOS_ITEM_PARAM_DESCRIPTION, TODOS_ITEM_PARAM_STATUS, @@ -495,14 +496,20 @@ Use this tool when the user's query implies needing the content of several files save_memory: { name: MEMORY_TOOL_NAME, - description: `Persists global preferences or facts across ALL future sessions. Use this for recurring instructions like coding styles or tool aliases. Unlike '${WRITE_FILE_TOOL_NAME}', which is for project-specific files, this appends to a global memory file loaded in every workspace. If you are unsure whether a fact should be remembered globally, ask the user first. CRITICAL: Do not use for session-specific context or temporary data.`, + description: `Persists preferences or facts across ALL future sessions. Supports two scopes: 'global' (default) for cross-project preferences loaded in every workspace, and 'project' for facts specific to the current workspace that are private to the user (not committed to the repo). Use 'project' scope for things like local dev setup notes, project-specific workflows, or personal reminders about this codebase. CRITICAL: Do not use for session-specific context or temporary data.`, parametersJsonSchema: { type: 'object', properties: { [MEMORY_PARAM_FACT]: { type: 'string', description: - "A concise, global fact or preference (e.g., 'I prefer using tabs'). Do not include local paths or project-specific names.", + 'A concise fact or preference to remember. Should be a clear, self-contained statement.', + }, + [MEMORY_PARAM_SCOPE]: { + type: 'string', + enum: ['global', 'project'], + description: + "Where to save the memory. 'global' (default) saves to a file loaded in every workspace. 'project' saves to a project-specific file private to the user, not committed to the repo.", }, }, required: [MEMORY_PARAM_FACT], diff --git a/packages/core/src/tools/memoryTool.test.ts b/packages/core/src/tools/memoryTool.test.ts index 4b0aa1b616..8b306c9fb6 100644 --- a/packages/core/src/tools/memoryTool.test.ts +++ b/packages/core/src/tools/memoryTool.test.ts @@ -19,7 +19,9 @@ import { getCurrentGeminiMdFilename, getAllGeminiMdFilenames, DEFAULT_CONTEXT_FILENAME, + getProjectMemoryFilePath, } from './memoryTool.js'; +import type { Storage } from '../config/storage.js'; import * as fs from 'node:fs/promises'; import * as path from 'node:path'; import * as os from 'node:os'; @@ -113,9 +115,7 @@ describe('MemoryTool', () => { it('should have correct name, displayName, description, and schema', () => { expect(memoryTool.name).toBe('save_memory'); expect(memoryTool.displayName).toBe('SaveMemory'); - expect(memoryTool.description).toContain( - 'Saves concise global user context', - ); + expect(memoryTool.description).toContain('Saves concise user context'); expect(memoryTool.schema).toBeDefined(); expect(memoryTool.schema.name).toBe('save_memory'); expect(memoryTool.schema.parametersJsonSchema).toStrictEqual({ @@ -127,6 +127,12 @@ describe('MemoryTool', () => { description: 'The specific fact or piece of information to remember. Should be a clear, self-contained statement.', }, + scope: { + type: 'string', + enum: ['global', 'project'], + description: + "Where to save the memory. 'global' (default) saves to a file loaded in every workspace. 'project' saves to a project-specific file private to the user, not committed to the repo.", + }, }, required: ['fact'], }); @@ -378,4 +384,93 @@ describe('MemoryTool', () => { expect(() => memoryTool.build(attackParams)).toThrow(); }); }); + + describe('project-scope memory', () => { + const mockProjectMemoryDir = path.join( + '/mock', + '.gemini', + 'memory', + 'test-project', + ); + + function createMockStorage(): Storage { + return { + getProjectMemoryDir: () => mockProjectMemoryDir, + } as unknown as Storage; + } + + it('should reject scope=project when storage is not initialized', () => { + const bus = createMockMessageBus(); + const memoryToolNoStorage = new MemoryTool(bus); + const params = { fact: 'project fact', scope: 'project' as const }; + + expect(memoryToolNoStorage.validateToolParams(params)).toBe( + 'Project-level memory is not available: storage is not initialized.', + ); + }); + + it('should write to global path when scope is not specified', async () => { + const bus = createMockMessageBus(); + getMockMessageBusInstance(bus).defaultToolDecision = 'ask_user'; + const memoryToolWithStorage = new MemoryTool(bus, createMockStorage()); + const params = { fact: 'global fact' }; + const invocation = memoryToolWithStorage.build(params); + await invocation.execute(mockAbortSignal); + + const expectedFilePath = path.join( + os.homedir(), + GEMINI_DIR, + getCurrentGeminiMdFilename(), + ); + expect(fs.writeFile).toHaveBeenCalledWith( + expectedFilePath, + expect.any(String), + 'utf-8', + ); + }); + + it('should write to project memory path when scope is project', async () => { + const bus = createMockMessageBus(); + getMockMessageBusInstance(bus).defaultToolDecision = 'ask_user'; + const memoryToolWithStorage = new MemoryTool(bus, createMockStorage()); + const params = { + fact: 'project-specific fact', + scope: 'project' as const, + }; + const invocation = memoryToolWithStorage.build(params); + await invocation.execute(mockAbortSignal); + + const expectedFilePath = path.join( + mockProjectMemoryDir, + getCurrentGeminiMdFilename(), + ); + expect(fs.mkdir).toHaveBeenCalledWith(mockProjectMemoryDir, { + recursive: true, + }); + expect(fs.writeFile).toHaveBeenCalledWith( + expectedFilePath, + expect.stringContaining('- project-specific fact'), + 'utf-8', + ); + }); + + it('should use project path in confirmation details when scope is project', async () => { + const bus = createMockMessageBus(); + getMockMessageBusInstance(bus).defaultToolDecision = 'ask_user'; + const memoryToolWithStorage = new MemoryTool(bus, createMockStorage()); + const params = { fact: 'project fact', scope: 'project' as const }; + const invocation = memoryToolWithStorage.build(params); + const result = await invocation.shouldConfirmExecute(mockAbortSignal); + + expect(result).toBeDefined(); + expect(result).not.toBe(false); + + if (result && result.type === 'edit') { + expect(result.fileName).toBe( + getProjectMemoryFilePath(createMockStorage()), + ); + expect(result.newContent).toContain('- project fact'); + } + }); + }); }); diff --git a/packages/core/src/tools/memoryTool.ts b/packages/core/src/tools/memoryTool.ts index 68a0942a53..fa6a478d7d 100644 --- a/packages/core/src/tools/memoryTool.ts +++ b/packages/core/src/tools/memoryTool.ts @@ -61,6 +61,7 @@ export function getAllGeminiMdFilenames(): string[] { interface SaveMemoryParams { fact: string; + scope?: 'global' | 'project'; modified_by_user?: boolean; modified_content?: string; } @@ -69,6 +70,10 @@ export function getGlobalMemoryFilePath(): string { return path.join(Storage.getGlobalGeminiDir(), getCurrentGeminiMdFilename()); } +export function getProjectMemoryFilePath(storage: Storage): string { + return path.join(storage.getProjectMemoryDir(), getCurrentGeminiMdFilename()); +} + /** * Ensures proper newline separation before appending content. */ @@ -82,11 +87,11 @@ function ensureNewlineSeparation(currentContent: string): string { } /** - * Reads the current content of the memory file + * Reads the current content of a memory file at the given path. */ -async function readMemoryFileContent(): Promise { +async function readMemoryFileContent(filePath: string): Promise { try { - return await fs.readFile(getGlobalMemoryFilePath(), 'utf-8'); + return await fs.readFile(filePath, 'utf-8'); } catch (err) { // eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion const error = err as Error & { code?: string }; @@ -146,32 +151,42 @@ class MemoryToolInvocation extends BaseToolInvocation< > { private static readonly allowlist: Set = new Set(); private proposedNewContent: string | undefined; + private readonly storage: Storage | undefined; constructor( params: SaveMemoryParams, messageBus: MessageBus, toolName?: string, displayName?: string, + storage?: Storage, ) { super(params, messageBus, toolName, displayName); + this.storage = storage; + } + + private getMemoryFilePath(): string { + if (this.params.scope === 'project' && this.storage) { + return getProjectMemoryFilePath(this.storage); + } + return getGlobalMemoryFilePath(); } getDescription(): string { - const memoryFilePath = getGlobalMemoryFilePath(); + const memoryFilePath = this.getMemoryFilePath(); return `in ${tildeifyPath(memoryFilePath)}`; } protected override async getConfirmationDetails( _abortSignal: AbortSignal, ): Promise { - const memoryFilePath = getGlobalMemoryFilePath(); + const memoryFilePath = this.getMemoryFilePath(); const allowlistKey = memoryFilePath; if (MemoryToolInvocation.allowlist.has(allowlistKey)) { return false; } - const currentContent = await readMemoryFileContent(); + const currentContent = await readMemoryFileContent(memoryFilePath); const { fact, modified_by_user, modified_content } = this.params; // If an attacker injects modified_content, use it for the diff @@ -213,6 +228,7 @@ class MemoryToolInvocation extends BaseToolInvocation< async execute(_signal: AbortSignal): Promise { const { fact, modified_by_user, modified_content } = this.params; + const memoryFilePath = this.getMemoryFilePath(); try { let contentToWrite: string; @@ -233,17 +249,17 @@ class MemoryToolInvocation extends BaseToolInvocation< // This case can be hit in flows without a confirmation step (e.g., --auto-confirm). // As a fallback, we recompute the content now. This is safe because // computeNewContent sanitizes the input. - const currentContent = await readMemoryFileContent(); + const currentContent = await readMemoryFileContent(memoryFilePath); this.proposedNewContent = computeNewContent(currentContent, fact); } contentToWrite = this.proposedNewContent; successMessage = `Okay, I've remembered that: "${sanitizedFact}"`; } - await fs.mkdir(path.dirname(getGlobalMemoryFilePath()), { + await fs.mkdir(path.dirname(memoryFilePath), { recursive: true, }); - await fs.writeFile(getGlobalMemoryFilePath(), contentToWrite, 'utf-8'); + await fs.writeFile(memoryFilePath, contentToWrite, 'utf-8'); return { llmContent: JSON.stringify({ @@ -275,8 +291,9 @@ export class MemoryTool implements ModifiableDeclarativeTool { static readonly Name = MEMORY_TOOL_NAME; + private readonly storage: Storage | undefined; - constructor(messageBus: MessageBus) { + constructor(messageBus: MessageBus, storage?: Storage) { super( MemoryTool.Name, 'SaveMemory', @@ -287,6 +304,14 @@ export class MemoryTool true, false, ); + this.storage = storage; + } + + private resolveMemoryFilePath(params: SaveMemoryParams): string { + if (params.scope === 'project' && this.storage) { + return getProjectMemoryFilePath(this.storage); + } + return getGlobalMemoryFilePath(); } protected override validateToolParamValues( @@ -296,6 +321,10 @@ export class MemoryTool return 'Parameter "fact" must be a non-empty string.'; } + if (params.scope === 'project' && !this.storage) { + return 'Project-level memory is not available: storage is not initialized.'; + } + return null; } @@ -310,6 +339,7 @@ export class MemoryTool messageBus, toolName ?? this.name, displayName ?? this.displayName, + this.storage, ); } @@ -319,11 +349,13 @@ export class MemoryTool getModifyContext(_abortSignal: AbortSignal): ModifyContext { return { - getFilePath: (_params: SaveMemoryParams) => getGlobalMemoryFilePath(), - getCurrentContent: async (_params: SaveMemoryParams): Promise => - readMemoryFileContent(), + getFilePath: (params: SaveMemoryParams) => + this.resolveMemoryFilePath(params), + getCurrentContent: async (params: SaveMemoryParams): Promise => + readMemoryFileContent(this.resolveMemoryFilePath(params)), getProposedContent: async (params: SaveMemoryParams): Promise => { - const currentContent = await readMemoryFileContent(); + const filePath = this.resolveMemoryFilePath(params); + const currentContent = await readMemoryFileContent(filePath); const { fact, modified_by_user, modified_content } = params; // Ensure the editor is populated with the same content // that the confirmation diff would show. diff --git a/packages/core/src/utils/memoryDiscovery.ts b/packages/core/src/utils/memoryDiscovery.ts index 01b9f9fb5a..cc61da78ec 100644 --- a/packages/core/src/utils/memoryDiscovery.ts +++ b/packages/core/src/utils/memoryDiscovery.ts @@ -485,6 +485,30 @@ export async function getGlobalMemoryPaths(): Promise { ); } +export async function getUserProjectMemoryPaths( + projectMemoryDir: string, +): Promise { + const geminiMdFilenames = getAllGeminiMdFilenames(); + + const accessChecks = geminiMdFilenames.map(async (filename) => { + const memoryPath = normalizePath(path.join(projectMemoryDir, filename)); + try { + await fs.access(memoryPath, fsSync.constants.R_OK); + debugLogger.debug( + '[DEBUG] [MemoryDiscovery] Found user project memory file:', + memoryPath, + ); + return memoryPath; + } catch { + return null; + } + }); + + return (await Promise.all(accessChecks)).filter( + (p): p is string => p !== null, + ); +} + export function getExtensionMemoryPaths( extensionLoader: ExtensionLoader, ): string[] { @@ -526,7 +550,12 @@ export async function getEnvironmentMemoryPaths( } export function categorizeAndConcatenate( - paths: { global: string[]; extension: string[]; project: string[] }, + paths: { + global: string[]; + extension: string[]; + project: string[]; + userProjectMemory?: string[]; + }, contentsMap: Map, ): HierarchicalMemory { const getConcatenated = (pList: string[]) => @@ -540,6 +569,7 @@ export function categorizeAndConcatenate( global: getConcatenated(paths.global), extension: getConcatenated(paths.extension), project: getConcatenated(paths.project), + userProjectMemory: getConcatenated(paths.userProjectMemory ?? []), }; } From 2f7f967189253bd9631cb06266fe4bac9e102440 Mon Sep 17 00:00:00 2001 From: Sehoon Shon Date: Mon, 30 Mar 2026 23:17:36 -0400 Subject: [PATCH 013/146] test(integration): fix plan mode write denial test false positive (#24299) --- integration-tests/plan-mode.test.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/integration-tests/plan-mode.test.ts b/integration-tests/plan-mode.test.ts index 5a1c91e4e1..2467b6027e 100644 --- a/integration-tests/plan-mode.test.ts +++ b/integration-tests/plan-mode.test.ts @@ -113,7 +113,7 @@ describe('Plan Mode', () => { ).toBe(true); }); - it.skip('should deny write_file to non-plans directory in plan mode', async () => { + it('should deny write_file to non-plans directory in plan mode', async () => { const plansDir = '.gemini/tmp/foo/123/plans'; const testName = 'should deny write_file to non-plans directory in plan mode'; @@ -135,7 +135,7 @@ describe('Plan Mode', () => { await rig.run({ approvalMode: 'plan', - args: 'Create a file called hello.txt in the current directory.', + args: 'Attempt to create a file named "hello.txt" in the current directory. Do not create a plan file, try to write hello.txt directly.', }); const toolLogs = rig.readToolLogs(); From d0d3639e16725f898001f792cab2528d126ad4a1 Mon Sep 17 00:00:00 2001 From: Adib234 <30782825+Adib234@users.noreply.github.com> Date: Mon, 30 Mar 2026 23:33:55 -0400 Subject: [PATCH 014/146] feat(plan): support `Plan` mode in untrusted folders (#17586) --- packages/core/src/config/config.test.ts | 6 ++++++ packages/core/src/config/config.ts | 6 +++++- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/packages/core/src/config/config.test.ts b/packages/core/src/config/config.test.ts index 59133f6997..f6eab55d0c 100644 --- a/packages/core/src/config/config.test.ts +++ b/packages/core/src/config/config.test.ts @@ -1682,6 +1682,12 @@ describe('setApprovalMode with folder trust', () => { expect(() => config.setApprovalMode(ApprovalMode.DEFAULT)).not.toThrow(); }); + it('should NOT throw an error when setting PLAN mode in an untrusted folder', () => { + const config = new Config(baseParams); + vi.spyOn(config, 'isTrustedFolder').mockReturnValue(false); + expect(() => config.setApprovalMode(ApprovalMode.PLAN)).not.toThrow(); + }); + it('should NOT throw an error when setting any mode in a trusted folder', () => { const config = new Config(baseParams); vi.spyOn(config, 'isTrustedFolder').mockReturnValue(true); diff --git a/packages/core/src/config/config.ts b/packages/core/src/config/config.ts index f3e02510ed..012d0e6b4e 100644 --- a/packages/core/src/config/config.ts +++ b/packages/core/src/config/config.ts @@ -2517,7 +2517,11 @@ export class Config implements McpContext, AgentLoopContext { } setApprovalMode(mode: ApprovalMode): void { - if (!this.isTrustedFolder() && mode !== ApprovalMode.DEFAULT) { + if ( + !this.isTrustedFolder() && + mode !== ApprovalMode.DEFAULT && + mode !== ApprovalMode.PLAN + ) { throw new Error( 'Cannot enable privileged approval modes in an untrusted folder.', ); From 561418c554ad17385ac92e813816988c7f5e5977 Mon Sep 17 00:00:00 2001 From: Sehoon Shon Date: Tue, 31 Mar 2026 02:25:21 -0400 Subject: [PATCH 015/146] fix(core): enable mid-stream retries for all models and re-enable compression test (#24302) --- integration-tests/api-resilience.responses | 2 +- .../context-compress-interactive.test.ts | 2 +- packages/core/src/core/client.test.ts | 42 ------------------- packages/core/src/core/client.ts | 11 +---- packages/core/src/core/geminiChat.test.ts | 35 ---------------- packages/core/src/core/geminiChat.ts | 11 +---- 6 files changed, 6 insertions(+), 97 deletions(-) diff --git a/integration-tests/api-resilience.responses b/integration-tests/api-resilience.responses index d30d29906e..d0520047f7 100644 --- a/integration-tests/api-resilience.responses +++ b/integration-tests/api-resilience.responses @@ -1 +1 @@ -{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"Part 1. "}],"role":"model"},"index":0}]},{"usageMetadata":{"promptTokenCount":100,"candidatesTokenCount":10,"totalTokenCount":110}},{"candidates":[{"content":{"parts":[{"text":"Part 2."}],"role":"model"},"index":0}],"finishReason":"STOP"}]} +{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"Part 1. "}],"role":"model"},"index":0}]},{"usageMetadata":{"promptTokenCount":100,"candidatesTokenCount":10,"totalTokenCount":110}},{"candidates":[{"content":{"parts":[{"text":"Part 2."}],"role":"model"},"index":0,"finishReason":"STOP"}]}]} diff --git a/integration-tests/context-compress-interactive.test.ts b/integration-tests/context-compress-interactive.test.ts index 348d984c31..c7e04c6c23 100644 --- a/integration-tests/context-compress-interactive.test.ts +++ b/integration-tests/context-compress-interactive.test.ts @@ -19,7 +19,7 @@ describe('Interactive Mode', () => { await rig.cleanup(); }); - it.skip('should trigger chat compression with /compress command', async () => { + it('should trigger chat compression with /compress command', async () => { await rig.setup('interactive-compress-success', { fakeResponsesPath: join( import.meta.dirname, diff --git a/packages/core/src/core/client.test.ts b/packages/core/src/core/client.test.ts index 033b674cb5..584ae5412c 100644 --- a/packages/core/src/core/client.test.ts +++ b/packages/core/src/core/client.test.ts @@ -2157,48 +2157,6 @@ ${JSON.stringify( expect(mockTurnRunFn).toHaveBeenCalledTimes(1); }); - it('should not retry with "Please continue." when InvalidStream event is received for non-Gemini-2 models', async () => { - vi.spyOn(client['config'], 'getContinueOnFailedApiCall').mockReturnValue( - true, - ); - // Arrange - router returns a non-Gemini-2 model - mockRouterService.route.mockResolvedValue({ - model: 'gemini-3.0-pro', - reason: 'test', - }); - - const mockStream1 = (async function* () { - yield { type: GeminiEventType.InvalidStream }; - })(); - - mockTurnRunFn.mockReturnValueOnce(mockStream1); - - const mockChat: Partial = { - addHistory: vi.fn(), - setTools: vi.fn(), - getHistory: vi.fn().mockReturnValue([]), - getLastPromptTokenCount: vi.fn(), - }; - client['chat'] = mockChat as GeminiChat; - - const initialRequest = [{ text: 'Hi' }]; - const promptId = 'prompt-id-invalid-stream-non-g2'; - const signal = new AbortController().signal; - - // Act - const stream = client.sendMessageStream(initialRequest, signal, promptId); - const events = await fromAsync(stream); - - // Assert - expect(events).toEqual([ - { type: GeminiEventType.ModelInfo, value: 'gemini-3.0-pro' }, - { type: GeminiEventType.InvalidStream }, - ]); - - // Verify that turn.run was called only once (no retry) - expect(mockTurnRunFn).toHaveBeenCalledTimes(1); - }); - it('should stop recursing after one retry when InvalidStream events are repeatedly received', async () => { vi.spyOn(client['config'], 'getContinueOnFailedApiCall').mockReturnValue( true, diff --git a/packages/core/src/core/client.ts b/packages/core/src/core/client.ts index 765ea6df45..4863c7aa2e 100644 --- a/packages/core/src/core/client.ts +++ b/packages/core/src/core/client.ts @@ -71,11 +71,7 @@ import { applyModelSelection, createAvailabilityContextProvider, } from '../availability/policyHelpers.js'; -import { - getDisplayString, - resolveModel, - isGemini2Model, -} from '../config/models.js'; +import { getDisplayString, resolveModel } from '../config/models.js'; import { partToString } from '../utils/partUtils.js'; import { coreEvents, CoreEvent } from '../utils/events.js'; @@ -820,10 +816,7 @@ export class GeminiClient { } if (isInvalidStream) { - if ( - this.config.getContinueOnFailedApiCall() && - isGemini2Model(modelToUse) - ) { + if (this.config.getContinueOnFailedApiCall()) { if (isInvalidStreamRetry) { logContentRetryFailure( this.config, diff --git a/packages/core/src/core/geminiChat.test.ts b/packages/core/src/core/geminiChat.test.ts index adc50d5979..aad2054ad0 100644 --- a/packages/core/src/core/geminiChat.test.ts +++ b/packages/core/src/core/geminiChat.test.ts @@ -1140,41 +1140,6 @@ describe('GeminiChat', () => { }); describe('sendMessageStream with retries', () => { - it('should not retry on invalid content if model does not start with gemini-2', async () => { - // Mock the stream to fail. - vi.mocked(mockContentGenerator.generateContentStream).mockImplementation( - async () => - (async function* () { - yield { - candidates: [{ content: { parts: [{ text: '' }] } }], - } as unknown as GenerateContentResponse; - })(), - ); - - const stream = await chat.sendMessageStream( - { model: 'gemini-1.5-pro' }, - 'test', - 'prompt-id-no-retry', - new AbortController().signal, - LlmRole.MAIN, - ); - - await expect( - (async () => { - for await (const _ of stream) { - // Must loop to trigger the internal logic that throws. - } - })(), - ).rejects.toThrow(InvalidStreamError); - - // Should be called only 1 time (no retry) - expect(mockContentGenerator.generateContentStream).toHaveBeenCalledTimes( - 1, - ); - expect(mockLogContentRetry).not.toHaveBeenCalled(); - expect(mockLogContentRetryFailure).toHaveBeenCalledTimes(1); - }); - it('should yield a RETRY event when an invalid stream is encountered', async () => { // ARRANGE: Mock the stream to fail once, then succeed. vi.mocked(mockContentGenerator.generateContentStream) diff --git a/packages/core/src/core/geminiChat.ts b/packages/core/src/core/geminiChat.ts index 00ff64a398..b96baa1c6a 100644 --- a/packages/core/src/core/geminiChat.ts +++ b/packages/core/src/core/geminiChat.ts @@ -25,11 +25,7 @@ import { getRetryErrorType, } from '../utils/retry.js'; import type { ValidationRequiredError } from '../utils/googleQuotaErrors.js'; -import { - resolveModel, - isGemini2Model, - supportsModernFeatures, -} from '../config/models.js'; +import { resolveModel, supportsModernFeatures } from '../config/models.js'; import { hasCycleInSchema } from '../tools/tools.js'; import type { StructuredError } from './turn.js'; import type { CompletedToolCall } from '../scheduler/types.js'; @@ -423,10 +419,7 @@ export class GeminiChat { ? error.type : getRetryErrorType(error); - if ( - (isContentError && isGemini2Model(model)) || - (isRetryable && !signal.aborted) - ) { + if (isContentError || (isRetryable && !signal.aborted)) { // The issue requests exactly 3 retries (4 attempts) for API errors during stream iteration. // Regardless of the global maxAttempts (e.g. 10), we only want to retry these mid-stream API errors // up to 3 times before finally throwing the error to the user. From 21ad42f6778e03b09cad9d68ff1862e8e57dd41e Mon Sep 17 00:00:00 2001 From: gemini-cli-robot Date: Mon, 30 Mar 2026 23:35:26 -0700 Subject: [PATCH 016/146] Changelog for v0.36.0-preview.6 (#24082) Co-authored-by: gemini-cli-robot <224641728+gemini-cli-robot@users.noreply.github.com> --- docs/changelogs/preview.md | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/docs/changelogs/preview.md b/docs/changelogs/preview.md index 6c31a64679..5568191d73 100644 --- a/docs/changelogs/preview.md +++ b/docs/changelogs/preview.md @@ -1,6 +1,6 @@ -# Preview release: v0.36.0-preview.5 +# Preview release: v0.36.0-preview.6 -Released: March 27, 2026 +Released: March 28, 2026 Our preview release includes the latest, new, and experimental features. This release may not be as stable as our [latest weekly release](latest.md). @@ -31,6 +31,10 @@ npm install -g @google/gemini-cli@preview ## What's Changed +- fix(patch): cherry-pick 765fb67 to release/v0.36.0-preview.5-pr-24055 to patch + version v0.36.0-preview.5 and create version 0.36.0-preview.6 by + @gemini-cli-robot in + [#24061](https://github.com/google-gemini/gemini-cli/pull/24061) - fix(a2a-server): A2A server should execute ask policies in interactive mode by @kschaab in [#23831](https://github.com/google-gemini/gemini-cli/pull/23831) - docs(core): document agent_card_json string literal options for remote agents @@ -386,4 +390,4 @@ npm install -g @google/gemini-cli@preview [#23666](https://github.com/google-gemini/gemini-cli/pull/23666) **Full Changelog**: -https://github.com/google-gemini/gemini-cli/compare/v0.35.0-preview.5...v0.36.0-preview.5 +https://github.com/google-gemini/gemini-cli/compare/v0.35.0-preview.5...v0.36.0-preview.6 From e293424bb4973fc2a281f67c43594222263b240a Mon Sep 17 00:00:00 2001 From: gemini-cli-robot Date: Mon, 30 Mar 2026 23:35:30 -0700 Subject: [PATCH 017/146] Changelog for v0.35.3 (#24083) Co-authored-by: gemini-cli-robot <224641728+gemini-cli-robot@users.noreply.github.com> --- docs/changelogs/latest.md | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/docs/changelogs/latest.md b/docs/changelogs/latest.md index 6df33c78d6..3d3cf07f7a 100644 --- a/docs/changelogs/latest.md +++ b/docs/changelogs/latest.md @@ -1,6 +1,6 @@ -# Latest stable release: v0.35.2 +# Latest stable release: v0.35.3 -Released: March 26, 2026 +Released: March 28, 2026 For most users, our latest stable release is the recommended release. Install the latest stable version with: @@ -29,6 +29,9 @@ npm install -g @google/gemini-cli ## What's Changed +- fix(patch): cherry-pick 765fb67 to release/v0.35.2-pr-24055 [CONFLICTS] by + @gemini-cli-robot in + [#24063](https://github.com/google-gemini/gemini-cli/pull/24063) - fix(core): allow disabling environment variable redaction by @galz10 in [#23927](https://github.com/google-gemini/gemini-cli/pull/23927) - fix(a2a-server): A2A server should execute ask policies in interactive mode by @@ -385,4 +388,4 @@ npm install -g @google/gemini-cli [#23585](https://github.com/google-gemini/gemini-cli/pull/23585) **Full Changelog**: -https://github.com/google-gemini/gemini-cli/compare/v0.34.0...v0.35.2 +https://github.com/google-gemini/gemini-cli/compare/v0.34.0...v0.35.3 From 3982a252bb2de964e2d6582f2dbe658d493efc98 Mon Sep 17 00:00:00 2001 From: cynthialong0-0 <82900738+cynthialong0-0@users.noreply.github.com> Date: Tue, 31 Mar 2026 08:23:19 -0700 Subject: [PATCH 018/146] fix(browser): reset action counter for each agent session and let it ignore internal actions (#24228) --- .../src/agents/browser/automationOverlay.ts | 2 ++ .../src/agents/browser/browserManager.test.ts | 24 +++++++++++++++++++ .../core/src/agents/browser/browserManager.ts | 20 ++++++++++------ .../src/agents/browser/inputBlocker.test.ts | 6 +++++ .../core/src/agents/browser/inputBlocker.ts | 4 ++++ .../src/agents/browser/mcpToolWrapper.test.ts | 2 ++ 6 files changed, 51 insertions(+), 7 deletions(-) diff --git a/packages/core/src/agents/browser/automationOverlay.ts b/packages/core/src/agents/browser/automationOverlay.ts index a1aa40d58b..e87a70b6da 100644 --- a/packages/core/src/agents/browser/automationOverlay.ts +++ b/packages/core/src/agents/browser/automationOverlay.ts @@ -94,6 +94,7 @@ export async function injectAutomationOverlay( 'evaluate_script', { function: buildInjectionScript() }, signal, + true, ); if (result.isError) { @@ -120,6 +121,7 @@ export async function removeAutomationOverlay( 'evaluate_script', { function: buildRemovalScript() }, signal, + true, ); if (result.isError) { diff --git a/packages/core/src/agents/browser/browserManager.test.ts b/packages/core/src/agents/browser/browserManager.test.ts index 9813fd721f..edf3c46664 100644 --- a/packages/core/src/agents/browser/browserManager.test.ts +++ b/packages/core/src/agents/browser/browserManager.test.ts @@ -980,5 +980,29 @@ describe('BrowserManager', () => { /maximum action limit \(3\)/, ); }); + + it('should NOT increment action counter when shouldCount is false', async () => { + const limitedConfig = makeFakeConfig({ + agents: { + browser: { + maxActionsPerTask: 1, + }, + }, + }); + const manager = new BrowserManager(limitedConfig); + + // Multiple calls with isInternal: true should NOT exhaust the limit + await manager.callTool('evaluate_script', {}, undefined, true); + await manager.callTool('evaluate_script', {}, undefined, true); + await manager.callTool('evaluate_script', {}, undefined, true); + + // This should still work + await manager.callTool('take_snapshot', {}); + + // Next one should throw (limit 1 allows exactly 1 call with >= check) + await expect(manager.callTool('take_snapshot', {})).rejects.toThrow( + /maximum action limit \(1\)/, + ); + }); }); }); diff --git a/packages/core/src/agents/browser/browserManager.ts b/packages/core/src/agents/browser/browserManager.ts index 81f9db8250..5aebc93823 100644 --- a/packages/core/src/agents/browser/browserManager.ts +++ b/packages/core/src/agents/browser/browserManager.ts @@ -215,26 +215,30 @@ export class BrowserManager { * @param toolName The name of the tool to call * @param args Arguments to pass to the tool * @param signal Optional AbortSignal to cancel the call + * @param isInternal Determine if the tool is for internal execution * @returns The result from the MCP server */ async callTool( toolName: string, args: Record, signal?: AbortSignal, + isInternal: boolean = false, ): Promise { if (signal?.aborted) { throw signal.reason ?? new Error('Operation cancelled'); } // Hard enforcement of per-action rate limit - if (this.actionCounter > this.maxActionsPerTask) { - const error = new Error( - `Browser agent reached maximum action limit (${this.maxActionsPerTask}). ` + - `Task terminated to prevent runaway execution. To config the limit, use maxActionsPerTask in the settings.`, - ); - throw error; + if (!isInternal) { + if (this.actionCounter >= this.maxActionsPerTask) { + const error = new Error( + `Browser agent reached maximum action limit (${this.maxActionsPerTask}). ` + + `Task terminated to prevent runaway execution. To config the limit, use maxActionsPerTask in the settings.`, + ); + throw error; + } + this.actionCounter++; } - this.actionCounter++; const errorMessage = this.checkNavigationRestrictions(toolName, args); if (errorMessage) { @@ -588,6 +592,8 @@ export class BrowserManager { debugLogger.log('MCP client connected to chrome-devtools-mcp'); await this.discoverTools(); this.registerInputBlockerHandler(); + // clear the action counter for each connection + this.actionCounter = 0; })(), new Promise((_, reject) => { timeoutId = setTimeout( diff --git a/packages/core/src/agents/browser/inputBlocker.test.ts b/packages/core/src/agents/browser/inputBlocker.test.ts index abccac70c3..4723b9b607 100644 --- a/packages/core/src/agents/browser/inputBlocker.test.ts +++ b/packages/core/src/agents/browser/inputBlocker.test.ts @@ -34,6 +34,7 @@ describe('inputBlocker', () => { function: expect.stringContaining('__gemini_input_blocker'), }, undefined, + true, ); }); @@ -96,6 +97,7 @@ describe('inputBlocker', () => { function: expect.stringContaining('__gemini_input_blocker'), }), undefined, + true, ); expect(mockBrowserManager.callTool).toHaveBeenNthCalledWith( 2, @@ -104,6 +106,7 @@ describe('inputBlocker', () => { function: expect.stringContaining('__gemini_input_blocker'), }), undefined, + true, ); }); }); @@ -118,6 +121,7 @@ describe('inputBlocker', () => { function: expect.stringContaining('__gemini_input_blocker'), }, undefined, + true, ); }); @@ -163,6 +167,7 @@ describe('inputBlocker', () => { function: expect.stringContaining('__gemini_input_blocker'), }), undefined, + true, ); expect(mockBrowserManager.callTool).toHaveBeenNthCalledWith( 2, @@ -171,6 +176,7 @@ describe('inputBlocker', () => { function: expect.stringContaining('__gemini_input_blocker'), }), undefined, + true, ); }); }); diff --git a/packages/core/src/agents/browser/inputBlocker.ts b/packages/core/src/agents/browser/inputBlocker.ts index 0d6b9610cf..d7c6d8ce16 100644 --- a/packages/core/src/agents/browser/inputBlocker.ts +++ b/packages/core/src/agents/browser/inputBlocker.ts @@ -205,6 +205,7 @@ export async function injectInputBlocker( 'evaluate_script', { function: INPUT_BLOCKER_FUNCTION }, signal, + true, ); debugLogger.log('Input blocker injected successfully'); } catch (error) { @@ -232,6 +233,7 @@ export async function removeInputBlocker( 'evaluate_script', { function: REMOVE_BLOCKER_FUNCTION }, signal, + true, ); debugLogger.log('Input blocker removed successfully'); } catch (error) { @@ -257,6 +259,7 @@ export async function suspendInputBlocker( 'evaluate_script', { function: SUSPEND_BLOCKER_FUNCTION }, signal, + true, ); } catch { // Non-critical — tool call will still attempt to proceed @@ -276,6 +279,7 @@ export async function resumeInputBlocker( 'evaluate_script', { function: RESUME_BLOCKER_FUNCTION }, signal, + true, ); } catch { // Non-critical diff --git a/packages/core/src/agents/browser/mcpToolWrapper.test.ts b/packages/core/src/agents/browser/mcpToolWrapper.test.ts index fa9aa228a5..7a03a1daec 100644 --- a/packages/core/src/agents/browser/mcpToolWrapper.test.ts +++ b/packages/core/src/agents/browser/mcpToolWrapper.test.ts @@ -225,6 +225,7 @@ describe('mcpToolWrapper', () => { function: expect.stringContaining('__gemini_input_blocker'), }), expect.any(AbortSignal), + true, ); // Second call: click @@ -243,6 +244,7 @@ describe('mcpToolWrapper', () => { function: expect.stringContaining('__gemini_input_blocker'), }), expect.any(AbortSignal), + true, ); }); From f9a93a133768cc6da7e726e55651f675bc0a2e29 Mon Sep 17 00:00:00 2001 From: Sehoon Shon Date: Tue, 31 Mar 2026 12:10:13 -0400 Subject: [PATCH 019/146] feat(cli): add auth info to footer (#24042) --- packages/cli/src/config/footerItems.test.ts | 220 ++++++++++++------ packages/cli/src/config/footerItems.ts | 19 +- .../cli/src/ui/components/Footer.test.tsx | 82 ++++++- packages/cli/src/ui/components/Footer.tsx | 35 +++ .../ui/components/FooterConfigDialog.test.tsx | 2 +- .../src/ui/components/FooterConfigDialog.tsx | 1 + .../cli/src/ui/components/UserIdentity.tsx | 11 +- ...ts-the-active-item-in-the-preview.snap.svg | 101 ++++---- ...s-correctly-with-default-settings.snap.svg | 69 +++--- ...Show-footer-labels-is-toggled-off.snap.svg | 73 +++--- .../FooterConfigDialog.test.tsx.snap | 8 + 11 files changed, 423 insertions(+), 198 deletions(-) diff --git a/packages/cli/src/config/footerItems.test.ts b/packages/cli/src/config/footerItems.test.ts index 420246811b..d9ef9bc3f2 100644 --- a/packages/cli/src/config/footerItems.test.ts +++ b/packages/cli/src/config/footerItems.test.ts @@ -5,87 +5,153 @@ */ import { describe, it, expect } from 'vitest'; -import { deriveItemsFromLegacySettings } from './footerItems.js'; +import { + deriveItemsFromLegacySettings, + resolveFooterState, +} from './footerItems.js'; import { createMockSettings } from '../test-utils/settings.js'; -describe('deriveItemsFromLegacySettings', () => { - it('returns defaults when no legacy settings are customized', () => { - const settings = createMockSettings({ - ui: { footer: { hideContextPercentage: true } }, - }).merged; - const items = deriveItemsFromLegacySettings(settings); - expect(items).toEqual([ - 'workspace', - 'git-branch', - 'sandbox', - 'model-name', - 'quota', - ]); - }); +describe('footerItems', () => { + describe('deriveItemsFromLegacySettings', () => { + it('returns defaults when no legacy settings are customized', () => { + const settings = createMockSettings({ + ui: { footer: { hideContextPercentage: true } }, + }).merged; + const items = deriveItemsFromLegacySettings(settings); + expect(items).toEqual([ + 'workspace', + 'git-branch', + 'sandbox', + 'model-name', + 'quota', + ]); + }); - it('removes workspace when hideCWD is true', () => { - const settings = createMockSettings({ - ui: { footer: { hideCWD: true, hideContextPercentage: true } }, - }).merged; - const items = deriveItemsFromLegacySettings(settings); - expect(items).not.toContain('workspace'); - }); + it('removes workspace when hideCWD is true', () => { + const settings = createMockSettings({ + ui: { footer: { hideCWD: true, hideContextPercentage: true } }, + }).merged; + const items = deriveItemsFromLegacySettings(settings); + expect(items).not.toContain('workspace'); + }); - it('removes sandbox when hideSandboxStatus is true', () => { - const settings = createMockSettings({ - ui: { footer: { hideSandboxStatus: true, hideContextPercentage: true } }, - }).merged; - const items = deriveItemsFromLegacySettings(settings); - expect(items).not.toContain('sandbox'); - }); - - it('removes model-name, context-used, and quota when hideModelInfo is true', () => { - const settings = createMockSettings({ - ui: { footer: { hideModelInfo: true, hideContextPercentage: true } }, - }).merged; - const items = deriveItemsFromLegacySettings(settings); - expect(items).not.toContain('model-name'); - expect(items).not.toContain('context-used'); - expect(items).not.toContain('quota'); - }); - - it('includes context-used when hideContextPercentage is false', () => { - const settings = createMockSettings({ - ui: { footer: { hideContextPercentage: false } }, - }).merged; - const items = deriveItemsFromLegacySettings(settings); - expect(items).toContain('context-used'); - // Should be after model-name - const modelIdx = items.indexOf('model-name'); - const contextIdx = items.indexOf('context-used'); - expect(contextIdx).toBe(modelIdx + 1); - }); - - it('includes memory-usage when showMemoryUsage is true', () => { - const settings = createMockSettings({ - ui: { showMemoryUsage: true, footer: { hideContextPercentage: true } }, - }).merged; - const items = deriveItemsFromLegacySettings(settings); - expect(items).toContain('memory-usage'); - }); - - it('handles combination of settings', () => { - const settings = createMockSettings({ - ui: { - showMemoryUsage: true, - footer: { - hideCWD: true, - hideModelInfo: true, - hideContextPercentage: false, + it('removes sandbox when hideSandboxStatus is true', () => { + const settings = createMockSettings({ + ui: { + footer: { hideSandboxStatus: true, hideContextPercentage: true }, }, - }, - }).merged; - const items = deriveItemsFromLegacySettings(settings); - expect(items).toEqual([ - 'git-branch', - 'sandbox', - 'context-used', - 'memory-usage', - ]); + }).merged; + const items = deriveItemsFromLegacySettings(settings); + expect(items).not.toContain('sandbox'); + }); + + it('removes model-name, context-used, and quota when hideModelInfo is true', () => { + const settings = createMockSettings({ + ui: { footer: { hideModelInfo: true, hideContextPercentage: true } }, + }).merged; + const items = deriveItemsFromLegacySettings(settings); + expect(items).not.toContain('model-name'); + expect(items).not.toContain('context-used'); + expect(items).not.toContain('quota'); + }); + + it('includes context-used when hideContextPercentage is false', () => { + const settings = createMockSettings({ + ui: { footer: { hideContextPercentage: false } }, + }).merged; + const items = deriveItemsFromLegacySettings(settings); + expect(items).toContain('context-used'); + // Should be after model-name + const modelIdx = items.indexOf('model-name'); + const contextIdx = items.indexOf('context-used'); + expect(contextIdx).toBe(modelIdx + 1); + }); + + it('includes memory-usage when showMemoryUsage is true', () => { + const settings = createMockSettings({ + ui: { showMemoryUsage: true, footer: { hideContextPercentage: true } }, + }).merged; + const items = deriveItemsFromLegacySettings(settings); + expect(items).toContain('memory-usage'); + }); + + it('handles combination of settings', () => { + const settings = createMockSettings({ + ui: { + showMemoryUsage: true, + footer: { + hideCWD: true, + hideModelInfo: true, + hideContextPercentage: false, + }, + }, + }).merged; + const items = deriveItemsFromLegacySettings(settings); + expect(items).toEqual([ + 'git-branch', + 'sandbox', + 'context-used', + 'memory-usage', + ]); + }); + }); + + describe('resolveFooterState', () => { + it('filters out auth item when showUserIdentity is false', () => { + const settings = createMockSettings({ + ui: { + showUserIdentity: false, + footer: { + items: ['workspace', 'auth', 'model-name'], + }, + }, + }).merged; + + const state = resolveFooterState(settings); + expect(state.orderedIds).not.toContain('auth'); + expect(state.selectedIds.has('auth')).toBe(false); + // It should also not be in the 'others' part of orderedIds + expect(state.orderedIds).toEqual([ + 'workspace', + 'model-name', + 'git-branch', + 'sandbox', + 'context-used', + 'quota', + 'memory-usage', + 'session-id', + 'code-changes', + 'token-count', + ]); + }); + + it('includes auth item when showUserIdentity is true', () => { + const settings = createMockSettings({ + ui: { + showUserIdentity: true, + footer: { + items: ['workspace', 'auth', 'model-name'], + }, + }, + }).merged; + + const state = resolveFooterState(settings); + expect(state.orderedIds).toContain('auth'); + expect(state.selectedIds.has('auth')).toBe(true); + }); + + it('includes auth item by default when showUserIdentity is undefined (defaults to true)', () => { + const settings = createMockSettings({ + ui: { + footer: { + items: ['workspace', 'auth', 'model-name'], + }, + }, + }).merged; + + const state = resolveFooterState(settings); + expect(state.orderedIds).toContain('auth'); + expect(state.selectedIds.has('auth')).toBe(true); + }); }); }); diff --git a/packages/cli/src/config/footerItems.ts b/packages/cli/src/config/footerItems.ts index 8410d0b5ec..9f3943b692 100644 --- a/packages/cli/src/config/footerItems.ts +++ b/packages/cli/src/config/footerItems.ts @@ -47,6 +47,11 @@ export const ALL_ITEMS = [ header: 'session', description: 'Unique identifier for the current session', }, + { + id: 'auth', + header: '/auth', + description: 'Current authentication info', + }, { id: 'code-changes', header: 'diff', @@ -70,6 +75,7 @@ export const DEFAULT_ORDER = [ 'quota', 'memory-usage', 'session-id', + 'auth', 'code-changes', 'token-count', ]; @@ -121,10 +127,19 @@ export function resolveFooterState(settings: MergedSettings): { orderedIds: string[]; selectedIds: Set; } { + const showUserIdentity = settings.ui?.showUserIdentity !== false; + const filteredValidIds = showUserIdentity + ? VALID_IDS + : new Set([...VALID_IDS].filter((id) => id !== 'auth')); + const source = ( settings.ui?.footer?.items ?? deriveItemsFromLegacySettings(settings) - ).filter((id: string) => VALID_IDS.has(id)); - const others = DEFAULT_ORDER.filter((id) => !source.includes(id)); + ).filter((id: string) => filteredValidIds.has(id)); + + const others = DEFAULT_ORDER.filter( + (id) => !source.includes(id) && filteredValidIds.has(id), + ); + return { orderedIds: [...source, ...others], selectedIds: new Set(source), diff --git a/packages/cli/src/ui/components/Footer.test.tsx b/packages/cli/src/ui/components/Footer.test.tsx index c0a52af868..1ed20a42b5 100644 --- a/packages/cli/src/ui/components/Footer.test.tsx +++ b/packages/cli/src/ui/components/Footer.test.tsx @@ -8,7 +8,11 @@ import { describe, it, expect, vi, afterEach, beforeEach } from 'vitest'; import { renderWithProviders } from '../../test-utils/render.js'; import { Footer } from './Footer.js'; import { createMockSettings } from '../../test-utils/settings.js'; -import { type Config } from '@google/gemini-cli-core'; +import { + type Config, + UserAccountManager, + AuthType, +} from '@google/gemini-cli-core'; import path from 'node:path'; // Normalize paths to POSIX slashes for stable cross-platform snapshots. @@ -69,14 +73,17 @@ const defaultProps = { branchName: 'main', }; -const mockConfig = { +const mockConfigPlain = { getTargetDir: () => defaultProps.targetDir, getDebugMode: () => false, getModel: () => defaultProps.model, getIdeMode: () => false, isTrustedFolder: () => true, getExtensionRegistryURI: () => undefined, -} as unknown as Config; + getContentGeneratorConfig: () => ({ authType: undefined }), +}; + +const mockConfig = mockConfigPlain as unknown as Config; const mockSessionStats = { sessionId: 'test-session-id', @@ -675,6 +682,75 @@ describe('