diff --git a/.gemini/settings.json b/.gemini/settings.json new file mode 100644 index 0000000000..f84c17e60a --- /dev/null +++ b/.gemini/settings.json @@ -0,0 +1,7 @@ +{ + "experimental": { + "toolOutputMasking": { + "enabled": true + } + } +} diff --git a/packages/cli/src/config/config.ts b/packages/cli/src/config/config.ts index ee8e1d9a7d..9669dcfb4a 100755 --- a/packages/cli/src/config/config.ts +++ b/packages/cli/src/config/config.ts @@ -806,6 +806,7 @@ export async function loadCliConfig( skillsSupport: settings.skills?.enabled ?? true, disabledSkills: settings.skills?.disabled, experimentalJitContext: settings.experimental?.jitContext, + toolOutputMasking: settings.experimental?.toolOutputMasking, noBrowser: !!process.env['NO_BROWSER'], summarizeToolOutput: settings.model?.summarizeToolOutput, ideMode, diff --git a/packages/cli/src/config/settingsSchema.ts b/packages/cli/src/config/settingsSchema.ts index 2a67685239..376fba2d49 100644 --- a/packages/cli/src/config/settingsSchema.ts +++ b/packages/cli/src/config/settingsSchema.ts @@ -1462,6 +1462,58 @@ const SETTINGS_SCHEMA = { description: 'Setting to enable experimental features', showInDialog: false, properties: { + toolOutputMasking: { + type: 'object', + label: 'Tool Output Masking', + category: 'Experimental', + requiresRestart: true, + ignoreInDocs: true, + default: {}, + description: + 'Advanced settings for tool output masking to manage context window efficiency.', + showInDialog: false, + properties: { + enabled: { + type: 'boolean', + label: 'Enable Tool Output Masking', + category: 'Experimental', + requiresRestart: true, + default: false, + description: 'Enables tool output masking to save tokens.', + showInDialog: false, + }, + toolProtectionThreshold: { + type: 'number', + label: 'Tool Protection Threshold', + category: 'Experimental', + requiresRestart: true, + default: 50000, + description: + 'Minimum number of tokens to protect from masking (most recent tool outputs).', + showInDialog: false, + }, + minPrunableTokensThreshold: { + type: 'number', + label: 'Min Prunable Tokens Threshold', + category: 'Experimental', + requiresRestart: true, + default: 30000, + description: + 'Minimum prunable tokens required to trigger a masking pass.', + showInDialog: false, + }, + protectLatestTurn: { + type: 'boolean', + label: 'Protect Latest Turn', + category: 'Experimental', + requiresRestart: true, + default: true, + description: + 'Ensures the absolute latest turn is never masked, regardless of token count.', + showInDialog: false, + }, + }, + }, enableAgents: { type: 'boolean', label: 'Enable Agents', diff --git a/packages/core/src/config/config.ts b/packages/core/src/config/config.ts index 7bcf9434cc..7de07b748b 100644 --- a/packages/core/src/config/config.ts +++ b/packages/core/src/config/config.ts @@ -149,6 +149,13 @@ export interface OutputSettings { format?: OutputFormat; } +export interface ToolOutputMaskingConfig { + enabled: boolean; + toolProtectionThreshold: number; + minPrunableTokensThreshold: number; + protectLatestTurn: boolean; +} + export interface ExtensionSetting { name: string; description: string; @@ -273,6 +280,11 @@ import { DEFAULT_FILE_FILTERING_OPTIONS, DEFAULT_MEMORY_FILE_FILTERING_OPTIONS, } from './constants.js'; +import { + DEFAULT_TOOL_PROTECTION_THRESHOLD, + DEFAULT_MIN_PRUNABLE_TOKENS_THRESHOLD, + DEFAULT_PROTECT_LATEST_TURN, +} from '../services/toolOutputMaskingService.js'; import { type ExtensionLoader, @@ -462,6 +474,7 @@ export interface ConfigParameters { disabledSkills?: string[]; adminSkillsEnabled?: boolean; experimentalJitContext?: boolean; + toolOutputMasking?: Partial; disableLLMCorrection?: boolean; plan?: boolean; onModelChange?: (model: string) => void; @@ -599,6 +612,7 @@ export class Config { private pendingIncludeDirectories: string[]; private readonly enableHooks: boolean; private readonly enableHooksUI: boolean; + private readonly toolOutputMasking: ToolOutputMaskingConfig; private hooks: { [K in HookEventName]?: HookDefinition[] } | undefined; private projectHooks: | ({ [K in HookEventName]?: HookDefinition[] } & { disabled?: string[] }) @@ -721,6 +735,18 @@ export class Config { this.modelAvailabilityService = new ModelAvailabilityService(); this.previewFeatures = params.previewFeatures ?? undefined; this.experimentalJitContext = params.experimentalJitContext ?? false; + this.toolOutputMasking = { + enabled: params.toolOutputMasking?.enabled ?? false, + toolProtectionThreshold: + params.toolOutputMasking?.toolProtectionThreshold ?? + DEFAULT_TOOL_PROTECTION_THRESHOLD, + minPrunableTokensThreshold: + params.toolOutputMasking?.minPrunableTokensThreshold ?? + DEFAULT_MIN_PRUNABLE_TOKENS_THRESHOLD, + protectLatestTurn: + params.toolOutputMasking?.protectLatestTurn ?? + DEFAULT_PROTECT_LATEST_TURN, + }; this.maxSessionTurns = params.maxSessionTurns ?? -1; this.experimentalZedIntegration = params.experimentalZedIntegration ?? false; @@ -1445,6 +1471,14 @@ export class Config { return this.experimentalJitContext; } + getToolOutputMaskingEnabled(): boolean { + return this.toolOutputMasking.enabled; + } + + getToolOutputMaskingConfig(): ToolOutputMaskingConfig { + return this.toolOutputMasking; + } + getGeminiMdFileCount(): number { if (this.experimentalJitContext && this.contextManager) { return this.contextManager.getLoadedPaths().size; diff --git a/packages/core/src/core/client.test.ts b/packages/core/src/core/client.test.ts index b7323dfee8..98c7dd0b30 100644 --- a/packages/core/src/core/client.test.ts +++ b/packages/core/src/core/client.test.ts @@ -213,6 +213,7 @@ describe('Gemini Client (client.ts)', () => { getGlobalMemory: vi.fn().mockReturnValue(''), getEnvironmentMemory: vi.fn().mockReturnValue(''), isJitContextEnabled: vi.fn().mockReturnValue(false), + getToolOutputMaskingEnabled: vi.fn().mockReturnValue(false), getDisableLoopDetection: vi.fn().mockReturnValue(false), getSessionId: vi.fn().mockReturnValue('test-session-id'), diff --git a/packages/core/src/core/client.ts b/packages/core/src/core/client.ts index d6c3bb8520..91434d12b3 100644 --- a/packages/core/src/core/client.ts +++ b/packages/core/src/core/client.ts @@ -54,6 +54,7 @@ import { handleFallback } from '../fallback/handler.js'; import type { RoutingContext } from '../routing/routingStrategy.js'; import { debugLogger } from '../utils/debugLogger.js'; import type { ModelConfigKey } from '../services/modelConfigService.js'; +import { ToolOutputMaskingService } from '../services/toolOutputMaskingService.js'; import { calculateRequestTokenCount } from '../utils/tokenCalculation.js'; import { applyModelSelection, @@ -84,6 +85,7 @@ export class GeminiClient { private readonly loopDetector: LoopDetectionService; private readonly compressionService: ChatCompressionService; + private readonly toolOutputMaskingService: ToolOutputMaskingService; private lastPromptId: string; private currentSequenceModel: string | null = null; private lastSentIdeContext: IdeContext | undefined; @@ -98,6 +100,7 @@ export class GeminiClient { constructor(private readonly config: Config) { this.loopDetector = new LoopDetectionService(config); this.compressionService = new ChatCompressionService(); + this.toolOutputMaskingService = new ToolOutputMaskingService(); this.lastPromptId = this.config.getSessionId(); coreEvents.on(CoreEvent.ModelChanged, this.handleModelChanged); @@ -562,6 +565,8 @@ export class GeminiClient { const remainingTokenCount = tokenLimit(modelForLimitCheck) - this.getChat().getLastPromptTokenCount(); + await this.tryMaskToolOutputs(this.getHistory()); + // Estimate tokens. For text-only requests, we estimate based on character length. // For requests with non-text parts (like images, tools), we use the countTokens API. const estimatedRequestTokenCount = await calculateRequestTokenCount( @@ -1056,4 +1061,20 @@ export class GeminiClient { return info; } + + /** + * Masks bulky tool outputs to save context window space. + */ + private async tryMaskToolOutputs(history: Content[]): Promise { + if (!this.config.getToolOutputMaskingEnabled()) { + return; + } + const result = await this.toolOutputMaskingService.mask( + history, + this.config, + ); + if (result.maskedCount > 0) { + this.getChat().setHistory(result.newHistory); + } + } } diff --git a/packages/core/src/services/__snapshots__/toolOutputMaskingService.test.ts.snap b/packages/core/src/services/__snapshots__/toolOutputMaskingService.test.ts.snap new file mode 100644 index 0000000000..c99f06b4eb --- /dev/null +++ b/packages/core/src/services/__snapshots__/toolOutputMaskingService.test.ts.snap @@ -0,0 +1,31 @@ +// Vitest Snapshot v1, https://vitest.dev/guide/snapshot.html + +exports[`ToolOutputMaskingService > should match the expected snapshot for a masked tool output 1`] = ` +" +Line +Line +Line +Line +Line +Line +Line +Line +Line +Line + +... [6 lines omitted] ... + +Line +Line +Line +Line +Line +Line +Line +Line +Line + + +Output too large. Full output available at: /mock/history/tool-outputs/run_shell_command_deterministic.txt +" +`; diff --git a/packages/core/src/services/toolOutputMaskingService.test.ts b/packages/core/src/services/toolOutputMaskingService.test.ts new file mode 100644 index 0000000000..05ab181f0d --- /dev/null +++ b/packages/core/src/services/toolOutputMaskingService.test.ts @@ -0,0 +1,500 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { describe, it, expect, vi, beforeEach } from 'vitest'; +import { + ToolOutputMaskingService, + MASKING_INDICATOR_TAG, +} from './toolOutputMaskingService.js'; +import { SHELL_TOOL_NAME } from '../tools/tool-names.js'; +import { estimateTokenCountSync } from '../utils/tokenCalculation.js'; +import type { Config } from '../config/config.js'; +import type { Content, Part } from '@google/genai'; + +vi.mock('../utils/tokenCalculation.js', () => ({ + estimateTokenCountSync: vi.fn(), +})); + +vi.mock('node:fs/promises', () => ({ + mkdir: vi.fn().mockResolvedValue(undefined), + writeFile: vi.fn().mockResolvedValue(undefined), +})); + +describe('ToolOutputMaskingService', () => { + let service: ToolOutputMaskingService; + let mockConfig: Config; + + const mockedEstimateTokenCountSync = vi.mocked(estimateTokenCountSync); + + beforeEach(() => { + service = new ToolOutputMaskingService(); + mockConfig = { + storage: { + getHistoryDir: () => '/mock/history', + }, + getUsageStatisticsEnabled: () => false, + getToolOutputMaskingConfig: () => ({ + enabled: true, + toolProtectionThreshold: 50000, + minPrunableTokensThreshold: 30000, + protectLatestTurn: true, + }), + } as unknown as Config; + vi.clearAllMocks(); + }); + + it('should not mask if total tool tokens are below protection threshold', async () => { + const history: Content[] = [ + { + role: 'user', + parts: [ + { + functionResponse: { + name: 'test_tool', + response: { output: 'small output' }, + }, + }, + ], + }, + ]; + + mockedEstimateTokenCountSync.mockReturnValue(100); + + const result = await service.mask(history, mockConfig); + + expect(result.maskedCount).toBe(0); + expect(result.newHistory).toEqual(history); + }); + + const getToolResponse = (part: Part | undefined): string => { + const resp = part?.functionResponse?.response as + | { output: string } + | undefined; + return resp?.output ?? (resp as unknown as string) ?? ''; + }; + + it('should protect the latest turn and mask older outputs beyond 50k window if total > 30k', async () => { + // History: + // Turn 1: 60k (Oldest) + // Turn 2: 20k + // Turn 3: 10k (Latest) - Protected because PROTECT_LATEST_TURN is true + const history: Content[] = [ + { + role: 'user', + parts: [ + { + functionResponse: { + name: 't1', + response: { output: 'A'.repeat(60000) }, + }, + }, + ], + }, + { + role: 'user', + parts: [ + { + functionResponse: { + name: 't2', + response: { output: 'B'.repeat(20000) }, + }, + }, + ], + }, + { + role: 'user', + parts: [ + { + functionResponse: { + name: 't3', + response: { output: 'C'.repeat(10000) }, + }, + }, + ], + }, + ]; + + mockedEstimateTokenCountSync.mockImplementation((parts: Part[]) => { + const toolName = parts[0].functionResponse?.name; + const resp = parts[0].functionResponse?.response as Record< + string, + unknown + >; + const content = (resp?.['output'] as string) ?? JSON.stringify(resp); + if (content.includes(`<${MASKING_INDICATOR_TAG}`)) return 100; + + if (toolName === 't1') return 60000; + if (toolName === 't2') return 20000; + if (toolName === 't3') return 10000; + return 0; + }); + + // Scanned: Turn 2 (20k), Turn 1 (60k). Total = 80k. + // Turn 2: Cumulative = 20k. Protected (<= 50k). + // Turn 1: Cumulative = 80k. Crossed 50k boundary. Prunabled. + // Total Prunable = 60k (> 30k trigger). + const result = await service.mask(history, mockConfig); + + expect(result.maskedCount).toBe(1); + expect(getToolResponse(result.newHistory[0].parts?.[0])).toContain( + `<${MASKING_INDICATOR_TAG}`, + ); + expect(getToolResponse(result.newHistory[1].parts?.[0])).toEqual( + 'B'.repeat(20000), + ); + expect(getToolResponse(result.newHistory[2].parts?.[0])).toEqual( + 'C'.repeat(10000), + ); + }); + + it('should perform global aggregation for many small parts once boundary is hit', async () => { + // history.length = 12. Skip index 11 (latest). + // Indices 0-10: 10k each. + // Index 10: 10k (Sum 10k) + // Index 9: 10k (Sum 20k) + // Index 8: 10k (Sum 30k) + // Index 7: 10k (Sum 40k) + // Index 6: 10k (Sum 50k) - Boundary hit here? + // Actually, Boundary is 50k. So Index 6 crosses it. + // Index 6, 5, 4, 3, 2, 1, 0 are all prunable. (7 * 10k = 70k). + const history: Content[] = Array.from({ length: 12 }, (_, i) => ({ + role: 'user', + parts: [ + { + functionResponse: { + name: `tool${i}`, + response: { output: 'A'.repeat(10000) }, + }, + }, + ], + })); + + mockedEstimateTokenCountSync.mockImplementation((parts: Part[]) => { + const resp = parts[0].functionResponse?.response as + | { output?: string; result?: string } + | string + | undefined; + const content = + typeof resp === 'string' + ? resp + : resp?.output || resp?.result || JSON.stringify(resp); + if (content?.includes(`<${MASKING_INDICATOR_TAG}`)) return 100; + return content?.length || 0; + }); + + const result = await service.mask(history, mockConfig); + + expect(result.maskedCount).toBe(6); // boundary at 50k protects 0-5 + expect(result.tokensSaved).toBeGreaterThan(0); + }); + + it('should verify tool-aware previews (shell vs generic)', async () => { + const shellHistory: Content[] = [ + { + role: 'user', + parts: [ + { + functionResponse: { + name: SHELL_TOOL_NAME, + response: { + output: + 'Output: line1\nline2\nline3\nline4\nline5\nError: failed\nExit Code: 1', + }, + }, + }, + ], + }, + // Protection buffer + { + role: 'user', + parts: [ + { + functionResponse: { + name: 'p', + response: { output: 'p'.repeat(60000) }, + }, + }, + ], + }, + // Latest turn + { + role: 'user', + parts: [{ functionResponse: { name: 'l', response: { output: 'l' } } }], + }, + ]; + + mockedEstimateTokenCountSync.mockImplementation((parts: Part[]) => { + const name = parts[0].functionResponse?.name; + const resp = parts[0].functionResponse?.response as Record< + string, + unknown + >; + const content = (resp?.['output'] as string) ?? JSON.stringify(resp); + if (content.includes(`<${MASKING_INDICATOR_TAG}`)) return 100; + + if (name === SHELL_TOOL_NAME) return 100000; + if (name === 'p') return 60000; + return 100; + }); + + const result = await service.mask(shellHistory, mockConfig); + const maskedBash = getToolResponse(result.newHistory[0].parts?.[0]); + + expect(maskedBash).toContain('Output: line1\nline2\nline3\nline4\nline5'); + expect(maskedBash).toContain('Exit Code: 1'); + expect(maskedBash).toContain('Error: failed'); + }); + + it('should skip already masked content and not count it towards totals', async () => { + const history: Content[] = [ + { + role: 'user', + parts: [ + { + functionResponse: { + name: 'tool1', + response: { + output: `<${MASKING_INDICATOR_TAG}>...`, + }, + }, + }, + ], + }, + { + role: 'user', + parts: [ + { + functionResponse: { + name: 'tool2', + response: { output: 'A'.repeat(60000) }, + }, + }, + ], + }, + ]; + mockedEstimateTokenCountSync.mockReturnValue(60000); + + const result = await service.mask(history, mockConfig); + expect(result.maskedCount).toBe(0); // tool1 skipped, tool2 is the "latest" which is protected + }); + + it('should handle different response keys in masked update', async () => { + const history: Content[] = [ + { + role: 'model', + parts: [ + { + functionResponse: { + name: 't1', + response: { result: 'A'.repeat(60000) }, + }, + }, + ], + }, + { + role: 'model', + parts: [ + { + functionResponse: { + name: 'p', + response: { output: 'P'.repeat(60000) }, + }, + }, + ], + }, + { role: 'user', parts: [{ text: 'latest' }] }, + ]; + + mockedEstimateTokenCountSync.mockImplementation((parts: Part[]) => { + const resp = parts[0].functionResponse?.response as Record< + string, + unknown + >; + const content = + (resp?.['output'] as string) ?? + (resp?.['result'] as string) ?? + JSON.stringify(resp); + if (content.includes(`<${MASKING_INDICATOR_TAG}`)) return 100; + return 60000; + }); + + const result = await service.mask(history, mockConfig); + expect(result.maskedCount).toBe(2); // both t1 and p are prunable (cumulative 60k and 120k) + const responseObj = result.newHistory[0].parts?.[0].functionResponse + ?.response as Record; + expect(Object.keys(responseObj)).toEqual(['output']); + }); + + it('should preserve multimodal parts while masking tool responses', async () => { + const history: Content[] = [ + { + role: 'user', + parts: [ + { + functionResponse: { + name: 't1', + response: { output: 'A'.repeat(60000) }, + }, + }, + { + inlineData: { + data: 'base64data', + mimeType: 'image/png', + }, + }, + ], + }, + // Protection buffer + { + role: 'user', + parts: [ + { + functionResponse: { + name: 'p', + response: { output: 'p'.repeat(60000) }, + }, + }, + ], + }, + // Latest turn + { role: 'user', parts: [{ text: 'latest' }] }, + ]; + + mockedEstimateTokenCountSync.mockImplementation((parts: Part[]) => { + const resp = parts[0].functionResponse?.response as Record< + string, + unknown + >; + const content = (resp?.['output'] as string) ?? JSON.stringify(resp); + if (content.includes(`<${MASKING_INDICATOR_TAG}`)) return 100; + + if (parts[0].functionResponse?.name === 't1') return 60000; + if (parts[0].functionResponse?.name === 'p') return 60000; + return 100; + }); + + const result = await service.mask(history, mockConfig); + + expect(result.maskedCount).toBe(2); //Both t1 and p are prunable (cumulative 60k each > 50k protection) + expect(result.newHistory[0].parts).toHaveLength(2); + expect(result.newHistory[0].parts?.[0].functionResponse).toBeDefined(); + expect( + ( + result.newHistory[0].parts?.[0].functionResponse?.response as Record< + string, + unknown + > + )['output'], + ).toContain(`<${MASKING_INDICATOR_TAG}`); + expect(result.newHistory[0].parts?.[1].inlineData).toEqual({ + data: 'base64data', + mimeType: 'image/png', + }); + }); + + it('should match the expected snapshot for a masked tool output', async () => { + const history: Content[] = [ + { + role: 'user', + parts: [ + { + functionResponse: { + name: SHELL_TOOL_NAME, + response: { + output: 'Line\n'.repeat(25), + exitCode: 0, + }, + }, + }, + ], + }, + // Buffer to push shell_tool into prunable territory + { + role: 'user', + parts: [ + { + functionResponse: { + name: 'padding', + response: { output: 'B'.repeat(60000) }, + }, + }, + ], + }, + { role: 'user', parts: [{ text: 'latest' }] }, + ]; + + mockedEstimateTokenCountSync.mockImplementation((parts: Part[]) => { + const resp = parts[0].functionResponse?.response as Record< + string, + unknown + >; + const content = (resp?.['output'] as string) ?? JSON.stringify(resp); + if (content.includes(`<${MASKING_INDICATOR_TAG}`)) return 100; + + if (parts[0].functionResponse?.name === SHELL_TOOL_NAME) return 1000; + if (parts[0].functionResponse?.name === 'padding') return 60000; + return 10; + }); + + const result = await service.mask(history, mockConfig); + + // Verify complete masking: only 'output' key should exist + const responseObj = result.newHistory[0].parts?.[0].functionResponse + ?.response as Record; + expect(Object.keys(responseObj)).toEqual(['output']); + + const response = responseObj['output'] as string; + + // We replace the random part of the filename for deterministic snapshots + // and normalize path separators for cross-platform compatibility + const deterministicResponse = response + .replace( + new RegExp(`${SHELL_TOOL_NAME}_[^\\s"]+\\.txt`, 'g'), + `${SHELL_TOOL_NAME}_deterministic.txt`, + ) + .replace(/\\/g, '/'); + + expect(deterministicResponse).toMatchSnapshot(); + }); + + it('should not mask if masking increases token count (due to overhead)', async () => { + const history: Content[] = [ + { + role: 'user', + parts: [ + { + functionResponse: { + name: 'tiny_tool', + response: { output: 'tiny' }, + }, + }, + ], + }, + // Protection buffer to push tiny_tool into prunable territory + { + role: 'user', + parts: [ + { + functionResponse: { + name: 'padding', + response: { output: 'B'.repeat(60000) }, + }, + }, + ], + }, + { role: 'user', parts: [{ text: 'latest' }] }, + ]; + + mockedEstimateTokenCountSync.mockImplementation((parts: Part[]) => { + if (parts[0].functionResponse?.name === 'tiny_tool') return 5; + if (parts[0].functionResponse?.name === 'padding') return 60000; + return 1000; // The masked version would be huge due to boilerplate + }); + + const result = await service.mask(history, mockConfig); + expect(result.maskedCount).toBe(0); // padding is protected, tiny_tool would increase size + }); +}); diff --git a/packages/core/src/services/toolOutputMaskingService.ts b/packages/core/src/services/toolOutputMaskingService.ts new file mode 100644 index 0000000000..76827da37a --- /dev/null +++ b/packages/core/src/services/toolOutputMaskingService.ts @@ -0,0 +1,344 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import type { Content, Part } from '@google/genai'; +import path from 'node:path'; +import * as fsPromises from 'node:fs/promises'; +import { estimateTokenCountSync } from '../utils/tokenCalculation.js'; +import { debugLogger } from '../utils/debugLogger.js'; +import { sanitizeFilenamePart } from '../utils/fileUtils.js'; +import type { Config } from '../config/config.js'; +import { logToolOutputMasking } from '../telemetry/loggers.js'; +import { SHELL_TOOL_NAME } from '../tools/tool-names.js'; +import { ToolOutputMaskingEvent } from '../telemetry/types.js'; + +// Tool output masking defaults +export const DEFAULT_TOOL_PROTECTION_THRESHOLD = 50000; +export const DEFAULT_MIN_PRUNABLE_TOKENS_THRESHOLD = 30000; +export const DEFAULT_PROTECT_LATEST_TURN = true; +export const MASKING_INDICATOR_TAG = 'tool_output_masked'; + +export const TOOL_OUTPUTS_DIR = 'tool-outputs'; + +export interface MaskingResult { + newHistory: Content[]; + maskedCount: number; + tokensSaved: number; +} + +/** + * Service to manage context window efficiency by masking bulky tool outputs (Tool Output Masking). + * + * It implements a "Hybrid Backward Scanned FIFO" algorithm to balance context relevance with + * token savings: + * 1. **Protection Window**: Protects the newest `toolProtectionThreshold` (default 50k) tool tokens + * from pruning. Optionally skips the entire latest conversation turn to ensure full context for + * the model's next response. + * 2. **Global Aggregation**: Scans backwards past the protection window to identify all remaining + * tool outputs that haven't been masked yet. + * 3. **Batch Trigger**: Trigger masking only if the total prunable tokens exceed + * `minPrunableTokensThreshold` (default 30k). + * + * @remarks + * Effectively, this means masking only starts once the conversation contains approximately 80k + * tokens of prunable tool outputs (50k protected + 30k prunable buffer). Small tool outputs + * are preserved until they collectively reach the threshold. + */ +export class ToolOutputMaskingService { + async mask(history: Content[], config: Config): Promise { + if (history.length === 0) { + return { newHistory: history, maskedCount: 0, tokensSaved: 0 }; + } + + let cumulativeToolTokens = 0; + let protectionBoundaryReached = false; + let totalPrunableTokens = 0; + let maskedCount = 0; + + const prunableParts: Array<{ + contentIndex: number; + partIndex: number; + tokens: number; + content: string; + originalPart: Part; + }> = []; + + const maskingConfig = config.getToolOutputMaskingConfig(); + + // Decide where to start scanning. + // If PROTECT_LATEST_TURN is true, we skip the most recent message (index history.length - 1). + const scanStartIdx = maskingConfig.protectLatestTurn + ? history.length - 2 + : history.length - 1; + + // Backward scan to identify prunable tool outputs + for (let i = scanStartIdx; i >= 0; i--) { + const content = history[i]; + const parts = content.parts || []; + + for (let j = parts.length - 1; j >= 0; j--) { + const part = parts[j]; + + // Tool outputs (functionResponse) are the primary targets for pruning because + // they often contain voluminous data (e.g., shell logs, file content) that + // can exceed context limits. We preserve other parts—such as user text, + // model reasoning, and multimodal data—because they define the conversation's + // core intent and logic, which are harder for the model to recover if lost. + if (!part.functionResponse) continue; + + const toolOutputContent = this.getToolOutputContent(part); + if (!toolOutputContent || this.isAlreadyMasked(toolOutputContent)) { + continue; + } + + const partTokens = estimateTokenCountSync([part]); + + if (!protectionBoundaryReached) { + cumulativeToolTokens += partTokens; + if (cumulativeToolTokens > maskingConfig.toolProtectionThreshold) { + protectionBoundaryReached = true; + // The part that crossed the boundary is prunable. + totalPrunableTokens += partTokens; + prunableParts.push({ + contentIndex: i, + partIndex: j, + tokens: partTokens, + content: toolOutputContent, + originalPart: part, + }); + } + } else { + totalPrunableTokens += partTokens; + prunableParts.push({ + contentIndex: i, + partIndex: j, + tokens: partTokens, + content: toolOutputContent, + originalPart: part, + }); + } + } + } + + // Trigger pruning only if we have accumulated enough savings to justify the + // overhead of masking and file I/O (batch pruning threshold). + if (totalPrunableTokens < maskingConfig.minPrunableTokensThreshold) { + return { newHistory: history, maskedCount: 0, tokensSaved: 0 }; + } + + debugLogger.debug( + `[ToolOutputMasking] Triggering masking. Prunable tool tokens: ${totalPrunableTokens.toLocaleString()} (> ${maskingConfig.minPrunableTokensThreshold.toLocaleString()})`, + ); + + // Perform masking and offloading + const newHistory = [...history]; // Shallow copy of history + let actualTokensSaved = 0; + const toolOutputsDir = path.join( + config.storage.getHistoryDir(), + TOOL_OUTPUTS_DIR, + ); + await fsPromises.mkdir(toolOutputsDir, { recursive: true }); + + for (const item of prunableParts) { + const { contentIndex, partIndex, content, tokens } = item; + const contentRecord = newHistory[contentIndex]; + const part = contentRecord.parts![partIndex]; + + if (!part.functionResponse) continue; + + const toolName = part.functionResponse.name || 'unknown_tool'; + const callId = part.functionResponse.id || Date.now().toString(); + const safeToolName = sanitizeFilenamePart(toolName).toLowerCase(); + const safeCallId = sanitizeFilenamePart(callId).toLowerCase(); + const fileName = `${safeToolName}_${safeCallId}_${Math.random() + .toString(36) + .substring(7)}.txt`; + const filePath = path.join(toolOutputsDir, fileName); + + await fsPromises.writeFile(filePath, content, 'utf-8'); + + const originalResponse = + (part.functionResponse.response as Record) || {}; + + const totalLines = content.split('\n').length; + const fileSizeMB = ( + Buffer.byteLength(content, 'utf8') / + 1024 / + 1024 + ).toFixed(2); + + let preview = ''; + if (toolName === SHELL_TOOL_NAME) { + preview = this.formatShellPreview(originalResponse); + } else { + // General tools: Head + Tail preview (250 chars each) + if (content.length > 500) { + preview = `${content.slice(0, 250)}\n... [TRUNCATED] ...\n${content.slice(-250)}`; + } else { + preview = content; + } + } + + const maskedSnippet = this.formatMaskedSnippet({ + toolName, + filePath, + fileSizeMB, + totalLines, + tokens, + preview, + }); + + const maskedPart = { + ...part, + functionResponse: { + ...part.functionResponse, + response: { output: maskedSnippet }, + }, + }; + + const newTaskTokens = estimateTokenCountSync([maskedPart]); + const savings = tokens - newTaskTokens; + + if (savings > 0) { + const newParts = [...contentRecord.parts!]; + newParts[partIndex] = maskedPart; + newHistory[contentIndex] = { ...contentRecord, parts: newParts }; + actualTokensSaved += savings; + maskedCount++; + } + } + + debugLogger.debug( + `[ToolOutputMasking] Masked ${maskedCount} tool outputs. Saved ~${actualTokensSaved.toLocaleString()} tokens.`, + ); + + const result = { + newHistory, + maskedCount, + tokensSaved: actualTokensSaved, + }; + + if (actualTokensSaved <= 0) { + return result; + } + + logToolOutputMasking( + config, + new ToolOutputMaskingEvent({ + tokens_before: totalPrunableTokens, + tokens_after: totalPrunableTokens - actualTokensSaved, + masked_count: maskedCount, + total_prunable_tokens: totalPrunableTokens, + }), + ); + + return result; + } + + private getToolOutputContent(part: Part): string | null { + if (!part.functionResponse) return null; + const response = part.functionResponse.response as Record; + if (!response) return null; + + // Stringify the entire response for saving. + // This handles any tool output schema automatically. + const content = JSON.stringify(response, null, 2); + + // Multimodal safety check: Sibling parts (inlineData, etc.) are handled by mask() + // by keeping the original part structure and only replacing the functionResponse content. + + return content; + } + + private isAlreadyMasked(content: string): boolean { + return content.includes(`<${MASKING_INDICATOR_TAG}`); + } + + private formatShellPreview(response: Record): string { + const content = (response['output'] || response['stdout'] || '') as string; + if (typeof content !== 'string') { + return typeof content === 'object' + ? JSON.stringify(content) + : String(content); + } + + // The shell tool output is structured in shell.ts with specific section prefixes: + const sectionRegex = + /^(Output|Error|Exit Code|Signal|Background PIDs|Process Group PGID): /m; + const parts = content.split(sectionRegex); + + if (parts.length < 3) { + // Fallback to simple head/tail if not in expected shell.ts format + return this.formatSimplePreview(content); + } + + const previewParts: string[] = []; + if (parts[0].trim()) { + previewParts.push(this.formatSimplePreview(parts[0].trim())); + } + + for (let i = 1; i < parts.length; i += 2) { + const name = parts[i]; + const sectionContent = parts[i + 1]?.trim() || ''; + + if (name === 'Output') { + previewParts.push( + `Output: ${this.formatSimplePreview(sectionContent)}`, + ); + } else { + // Keep other sections (Error, Exit Code, etc.) in full as they are usually high-signal and small + previewParts.push(`${name}: ${sectionContent}`); + } + } + + let preview = previewParts.join('\n'); + + // Also check root levels just in case some tool uses them or for future-proofing + const exitCode = response['exitCode'] ?? response['exit_code']; + const error = response['error']; + if ( + exitCode !== undefined && + exitCode !== 0 && + exitCode !== null && + !content.includes(`Exit Code: ${exitCode}`) + ) { + preview += `\n[Exit Code: ${exitCode}]`; + } + if (error && !content.includes(`Error: ${error}`)) { + preview += `\n[Error: ${error}]`; + } + + return preview; + } + + private formatSimplePreview(content: string): string { + const lines = content.split('\n'); + if (lines.length <= 20) return content; + const head = lines.slice(0, 10); + const tail = lines.slice(-10); + return `${head.join('\n')}\n\n... [${ + lines.length - head.length - tail.length + } lines omitted] ...\n\n${tail.join('\n')}`; + } + + private formatMaskedSnippet(params: MaskedSnippetParams): string { + const { filePath, preview } = params; + return `<${MASKING_INDICATOR_TAG}> +${preview} + +Output too large. Full output available at: ${filePath} +`; + } +} + +interface MaskedSnippetParams { + toolName: string; + filePath: string; + fileSizeMB: string; + totalLines: number; + tokens: number; + preview: string; +} diff --git a/packages/core/src/telemetry/clearcut-logger/clearcut-logger.ts b/packages/core/src/telemetry/clearcut-logger/clearcut-logger.ts index 83fe62006e..2afe9cf356 100644 --- a/packages/core/src/telemetry/clearcut-logger/clearcut-logger.ts +++ b/packages/core/src/telemetry/clearcut-logger/clearcut-logger.ts @@ -46,6 +46,7 @@ import type { ApprovalModeSwitchEvent, ApprovalModeDurationEvent, PlanExecutionEvent, + ToolOutputMaskingEvent, } from '../types.js'; import { EventMetadataKey } from './event-metadata-key.js'; import type { Config } from '../../config/config.js'; @@ -108,6 +109,7 @@ export enum EventNames { APPROVAL_MODE_SWITCH = 'approval_mode_switch', APPROVAL_MODE_DURATION = 'approval_mode_duration', PLAN_EXECUTION = 'plan_execution', + TOOL_OUTPUT_MASKING = 'tool_output_masking', } export interface LogResponse { @@ -1217,8 +1219,40 @@ export class ClearcutLogger { }, ]; + const logEvent = this.createLogEvent( + EventNames.TOOL_OUTPUT_TRUNCATED, + data, + ); + this.enqueueLogEvent(logEvent); + this.flushIfNeeded(); + } + + logToolOutputMaskingEvent(event: ToolOutputMaskingEvent): void { + const data: EventValue[] = [ + { + gemini_cli_key: + EventMetadataKey.GEMINI_CLI_TOOL_OUTPUT_MASKING_TOKENS_BEFORE, + value: event.tokens_before.toString(), + }, + { + gemini_cli_key: + EventMetadataKey.GEMINI_CLI_TOOL_OUTPUT_MASKING_TOKENS_AFTER, + value: event.tokens_after.toString(), + }, + { + gemini_cli_key: + EventMetadataKey.GEMINI_CLI_TOOL_OUTPUT_MASKING_MASKED_COUNT, + value: event.masked_count.toString(), + }, + { + gemini_cli_key: + EventMetadataKey.GEMINI_CLI_TOOL_OUTPUT_MASKING_TOTAL_PRUNABLE_TOKENS, + value: event.total_prunable_tokens.toString(), + }, + ]; + this.enqueueLogEvent( - this.createLogEvent(EventNames.TOOL_OUTPUT_TRUNCATED, data), + this.createLogEvent(EventNames.TOOL_OUTPUT_MASKING, data), ); this.flushIfNeeded(); } diff --git a/packages/core/src/telemetry/clearcut-logger/event-metadata-key.ts b/packages/core/src/telemetry/clearcut-logger/event-metadata-key.ts index 43535f6fa4..25e6e18d13 100644 --- a/packages/core/src/telemetry/clearcut-logger/event-metadata-key.ts +++ b/packages/core/src/telemetry/clearcut-logger/event-metadata-key.ts @@ -7,7 +7,7 @@ // Defines valid event metadata keys for Clearcut logging. export enum EventMetadataKey { // Deleted enums: 24 - // Next ID: 148 + // Next ID: 152 GEMINI_CLI_KEY_UNKNOWN = 0, @@ -561,4 +561,20 @@ export enum EventMetadataKey { // Logs the classifier threshold used. GEMINI_CLI_ROUTING_CLASSIFIER_THRESHOLD = 147, + + // ========================================================================== + // Tool Output Masking Event Keys + // ========================================================================== + + // Logs the total tokens in the prunable block before masking. + GEMINI_CLI_TOOL_OUTPUT_MASKING_TOKENS_BEFORE = 148, + + // Logs the total tokens in the masked remnants after masking. + GEMINI_CLI_TOOL_OUTPUT_MASKING_TOKENS_AFTER = 149, + + // Logs the number of tool outputs masked in this operation. + GEMINI_CLI_TOOL_OUTPUT_MASKING_MASKED_COUNT = 150, + + // Logs the total prunable tokens identified at the trigger point. + GEMINI_CLI_TOOL_OUTPUT_MASKING_TOTAL_PRUNABLE_TOKENS = 151, } diff --git a/packages/core/src/telemetry/loggers.ts b/packages/core/src/telemetry/loggers.ts index 945e3a2f6b..c5ab6887d1 100644 --- a/packages/core/src/telemetry/loggers.ts +++ b/packages/core/src/telemetry/loggers.ts @@ -56,6 +56,7 @@ import type { StartupStatsEvent, LlmLoopCheckEvent, PlanExecutionEvent, + ToolOutputMaskingEvent, } from './types.js'; import { recordApiErrorMetrics, @@ -163,6 +164,21 @@ export function logToolOutputTruncated( }); } +export function logToolOutputMasking( + config: Config, + event: ToolOutputMaskingEvent, +): void { + ClearcutLogger.getInstance(config)?.logToolOutputMaskingEvent(event); + bufferTelemetryEvent(() => { + const logger = logs.getLogger(SERVICE_NAME); + const logRecord: LogRecord = { + body: event.toLogBody(), + attributes: event.toOpenTelemetryAttributes(config), + }; + logger.emit(logRecord); + }); +} + export function logFileOperation( config: Config, event: FileOperationEvent, diff --git a/packages/core/src/telemetry/types.ts b/packages/core/src/telemetry/types.ts index e12b2847eb..0271aa4344 100644 --- a/packages/core/src/telemetry/types.ts +++ b/packages/core/src/telemetry/types.ts @@ -1376,6 +1376,49 @@ export class ToolOutputTruncatedEvent implements BaseTelemetryEvent { } } +export const EVENT_TOOL_OUTPUT_MASKING = 'gemini_cli.tool_output_masking'; + +export class ToolOutputMaskingEvent implements BaseTelemetryEvent { + 'event.name': 'tool_output_masking'; + 'event.timestamp': string; + tokens_before: number; + tokens_after: number; + masked_count: number; + total_prunable_tokens: number; + + constructor(details: { + tokens_before: number; + tokens_after: number; + masked_count: number; + total_prunable_tokens: number; + }) { + this['event.name'] = 'tool_output_masking'; + this['event.timestamp'] = new Date().toISOString(); + this.tokens_before = details.tokens_before; + this.tokens_after = details.tokens_after; + this.masked_count = details.masked_count; + this.total_prunable_tokens = details.total_prunable_tokens; + } + + toOpenTelemetryAttributes(config: Config): LogAttributes { + return { + ...getCommonAttributes(config), + 'event.name': EVENT_TOOL_OUTPUT_MASKING, + 'event.timestamp': this['event.timestamp'], + tokens_before: this.tokens_before, + tokens_after: this.tokens_after, + masked_count: this.masked_count, + total_prunable_tokens: this.total_prunable_tokens, + }; + } + + toLogBody(): string { + return `Tool output masking (Masked ${this.masked_count} tool outputs. Saved ${ + this.tokens_before - this.tokens_after + } tokens)`; + } +} + export const EVENT_EXTENSION_UNINSTALL = 'gemini_cli.extension_uninstall'; export class ExtensionUninstallEvent implements BaseTelemetryEvent { 'event.name': 'extension_uninstall'; @@ -1602,6 +1645,7 @@ export type TelemetryEvent = | LlmLoopCheckEvent | StartupStatsEvent | WebFetchFallbackAttemptEvent + | ToolOutputMaskingEvent | EditStrategyEvent | PlanExecutionEvent | RewindEvent diff --git a/packages/core/src/utils/fileUtils.ts b/packages/core/src/utils/fileUtils.ts index 6689467277..bb13fc01e0 100644 --- a/packages/core/src/utils/fileUtils.ts +++ b/packages/core/src/utils/fileUtils.ts @@ -572,6 +572,14 @@ export async function fileExists(filePath: string): Promise { const MAX_TRUNCATED_LINE_WIDTH = 1000; const MAX_TRUNCATED_CHARS = 4000; +/** + * Sanitizes a string for use as a filename part by removing path traversal + * characters and other non-alphanumeric characters. + */ +export function sanitizeFilenamePart(part: string): string { + return part.replace(/[^a-zA-Z0-9_-]/g, '_'); +} + /** * Formats a truncated message for tool output, handling multi-line and single-line (elephant) cases. */ @@ -623,11 +631,8 @@ export async function saveTruncatedToolOutput( id: string | number, // Accept string (callId) or number (truncationId) projectTempDir: string, ): Promise<{ outputFile: string; totalLines: number }> { - const safeToolName = toolName.replace(/[^a-z0-9]/gi, '_').toLowerCase(); - const safeId = id - .toString() - .replace(/[^a-z0-9]/gi, '_') - .toLowerCase(); + const safeToolName = sanitizeFilenamePart(toolName).toLowerCase(); + const safeId = sanitizeFilenamePart(id.toString()).toLowerCase(); const fileName = `${safeToolName}_${safeId}.txt`; const toolOutputDir = path.join(projectTempDir, TOOL_OUTPUT_DIR); const outputFile = path.join(toolOutputDir, fileName); diff --git a/schemas/settings.schema.json b/schemas/settings.schema.json index 5ee3d21b04..2098c26faf 100644 --- a/schemas/settings.schema.json +++ b/schemas/settings.schema.json @@ -1428,6 +1428,44 @@ "default": {}, "type": "object", "properties": { + "toolOutputMasking": { + "title": "Tool Output Masking", + "description": "Advanced settings for tool output masking to manage context window efficiency.", + "markdownDescription": "Advanced settings for tool output masking to manage context window efficiency.\n\n- Category: `Experimental`\n- Requires restart: `yes`\n- Default: `{}`", + "default": {}, + "type": "object", + "properties": { + "enabled": { + "title": "Enable Tool Output Masking", + "description": "Enables tool output masking to save tokens.", + "markdownDescription": "Enables tool output masking to save tokens.\n\n- Category: `Experimental`\n- Requires restart: `yes`\n- Default: `false`", + "default": false, + "type": "boolean" + }, + "toolProtectionThreshold": { + "title": "Tool Protection Threshold", + "description": "Minimum number of tokens to protect from masking (most recent tool outputs).", + "markdownDescription": "Minimum number of tokens to protect from masking (most recent tool outputs).\n\n- Category: `Experimental`\n- Requires restart: `yes`\n- Default: `50000`", + "default": 50000, + "type": "number" + }, + "minPrunableTokensThreshold": { + "title": "Min Prunable Tokens Threshold", + "description": "Minimum prunable tokens required to trigger a masking pass.", + "markdownDescription": "Minimum prunable tokens required to trigger a masking pass.\n\n- Category: `Experimental`\n- Requires restart: `yes`\n- Default: `30000`", + "default": 30000, + "type": "number" + }, + "protectLatestTurn": { + "title": "Protect Latest Turn", + "description": "Ensures the absolute latest turn is never masked, regardless of token count.", + "markdownDescription": "Ensures the absolute latest turn is never masked, regardless of token count.\n\n- Category: `Experimental`\n- Requires restart: `yes`\n- Default: `true`", + "default": true, + "type": "boolean" + } + }, + "additionalProperties": false + }, "enableAgents": { "title": "Enable Agents", "description": "Enable local and remote subagents. Warning: Experimental feature, uses YOLO mode for subagents",