feat(context): implement observation masking for tool outputs (#18389)

This commit is contained in:
Abhi
2026-02-05 20:53:11 -05:00
committed by GitHub
parent 289769f544
commit 8ec176e005
15 changed files with 1151 additions and 7 deletions

7
.gemini/settings.json Normal file
View File

@@ -0,0 +1,7 @@
{
"experimental": {
"toolOutputMasking": {
"enabled": true
}
}
}

View File

@@ -806,6 +806,7 @@ export async function loadCliConfig(
skillsSupport: settings.skills?.enabled ?? true,
disabledSkills: settings.skills?.disabled,
experimentalJitContext: settings.experimental?.jitContext,
toolOutputMasking: settings.experimental?.toolOutputMasking,
noBrowser: !!process.env['NO_BROWSER'],
summarizeToolOutput: settings.model?.summarizeToolOutput,
ideMode,

View File

@@ -1462,6 +1462,58 @@ const SETTINGS_SCHEMA = {
description: 'Setting to enable experimental features',
showInDialog: false,
properties: {
toolOutputMasking: {
type: 'object',
label: 'Tool Output Masking',
category: 'Experimental',
requiresRestart: true,
ignoreInDocs: true,
default: {},
description:
'Advanced settings for tool output masking to manage context window efficiency.',
showInDialog: false,
properties: {
enabled: {
type: 'boolean',
label: 'Enable Tool Output Masking',
category: 'Experimental',
requiresRestart: true,
default: false,
description: 'Enables tool output masking to save tokens.',
showInDialog: false,
},
toolProtectionThreshold: {
type: 'number',
label: 'Tool Protection Threshold',
category: 'Experimental',
requiresRestart: true,
default: 50000,
description:
'Minimum number of tokens to protect from masking (most recent tool outputs).',
showInDialog: false,
},
minPrunableTokensThreshold: {
type: 'number',
label: 'Min Prunable Tokens Threshold',
category: 'Experimental',
requiresRestart: true,
default: 30000,
description:
'Minimum prunable tokens required to trigger a masking pass.',
showInDialog: false,
},
protectLatestTurn: {
type: 'boolean',
label: 'Protect Latest Turn',
category: 'Experimental',
requiresRestart: true,
default: true,
description:
'Ensures the absolute latest turn is never masked, regardless of token count.',
showInDialog: false,
},
},
},
enableAgents: {
type: 'boolean',
label: 'Enable Agents',

View File

@@ -149,6 +149,13 @@ export interface OutputSettings {
format?: OutputFormat;
}
export interface ToolOutputMaskingConfig {
enabled: boolean;
toolProtectionThreshold: number;
minPrunableTokensThreshold: number;
protectLatestTurn: boolean;
}
export interface ExtensionSetting {
name: string;
description: string;
@@ -273,6 +280,11 @@ import {
DEFAULT_FILE_FILTERING_OPTIONS,
DEFAULT_MEMORY_FILE_FILTERING_OPTIONS,
} from './constants.js';
import {
DEFAULT_TOOL_PROTECTION_THRESHOLD,
DEFAULT_MIN_PRUNABLE_TOKENS_THRESHOLD,
DEFAULT_PROTECT_LATEST_TURN,
} from '../services/toolOutputMaskingService.js';
import {
type ExtensionLoader,
@@ -462,6 +474,7 @@ export interface ConfigParameters {
disabledSkills?: string[];
adminSkillsEnabled?: boolean;
experimentalJitContext?: boolean;
toolOutputMasking?: Partial<ToolOutputMaskingConfig>;
disableLLMCorrection?: boolean;
plan?: boolean;
onModelChange?: (model: string) => void;
@@ -599,6 +612,7 @@ export class Config {
private pendingIncludeDirectories: string[];
private readonly enableHooks: boolean;
private readonly enableHooksUI: boolean;
private readonly toolOutputMasking: ToolOutputMaskingConfig;
private hooks: { [K in HookEventName]?: HookDefinition[] } | undefined;
private projectHooks:
| ({ [K in HookEventName]?: HookDefinition[] } & { disabled?: string[] })
@@ -721,6 +735,18 @@ export class Config {
this.modelAvailabilityService = new ModelAvailabilityService();
this.previewFeatures = params.previewFeatures ?? undefined;
this.experimentalJitContext = params.experimentalJitContext ?? false;
this.toolOutputMasking = {
enabled: params.toolOutputMasking?.enabled ?? false,
toolProtectionThreshold:
params.toolOutputMasking?.toolProtectionThreshold ??
DEFAULT_TOOL_PROTECTION_THRESHOLD,
minPrunableTokensThreshold:
params.toolOutputMasking?.minPrunableTokensThreshold ??
DEFAULT_MIN_PRUNABLE_TOKENS_THRESHOLD,
protectLatestTurn:
params.toolOutputMasking?.protectLatestTurn ??
DEFAULT_PROTECT_LATEST_TURN,
};
this.maxSessionTurns = params.maxSessionTurns ?? -1;
this.experimentalZedIntegration =
params.experimentalZedIntegration ?? false;
@@ -1445,6 +1471,14 @@ export class Config {
return this.experimentalJitContext;
}
getToolOutputMaskingEnabled(): boolean {
return this.toolOutputMasking.enabled;
}
getToolOutputMaskingConfig(): ToolOutputMaskingConfig {
return this.toolOutputMasking;
}
getGeminiMdFileCount(): number {
if (this.experimentalJitContext && this.contextManager) {
return this.contextManager.getLoadedPaths().size;

View File

@@ -213,6 +213,7 @@ describe('Gemini Client (client.ts)', () => {
getGlobalMemory: vi.fn().mockReturnValue(''),
getEnvironmentMemory: vi.fn().mockReturnValue(''),
isJitContextEnabled: vi.fn().mockReturnValue(false),
getToolOutputMaskingEnabled: vi.fn().mockReturnValue(false),
getDisableLoopDetection: vi.fn().mockReturnValue(false),
getSessionId: vi.fn().mockReturnValue('test-session-id'),

View File

@@ -54,6 +54,7 @@ import { handleFallback } from '../fallback/handler.js';
import type { RoutingContext } from '../routing/routingStrategy.js';
import { debugLogger } from '../utils/debugLogger.js';
import type { ModelConfigKey } from '../services/modelConfigService.js';
import { ToolOutputMaskingService } from '../services/toolOutputMaskingService.js';
import { calculateRequestTokenCount } from '../utils/tokenCalculation.js';
import {
applyModelSelection,
@@ -84,6 +85,7 @@ export class GeminiClient {
private readonly loopDetector: LoopDetectionService;
private readonly compressionService: ChatCompressionService;
private readonly toolOutputMaskingService: ToolOutputMaskingService;
private lastPromptId: string;
private currentSequenceModel: string | null = null;
private lastSentIdeContext: IdeContext | undefined;
@@ -98,6 +100,7 @@ export class GeminiClient {
constructor(private readonly config: Config) {
this.loopDetector = new LoopDetectionService(config);
this.compressionService = new ChatCompressionService();
this.toolOutputMaskingService = new ToolOutputMaskingService();
this.lastPromptId = this.config.getSessionId();
coreEvents.on(CoreEvent.ModelChanged, this.handleModelChanged);
@@ -562,6 +565,8 @@ export class GeminiClient {
const remainingTokenCount =
tokenLimit(modelForLimitCheck) - this.getChat().getLastPromptTokenCount();
await this.tryMaskToolOutputs(this.getHistory());
// Estimate tokens. For text-only requests, we estimate based on character length.
// For requests with non-text parts (like images, tools), we use the countTokens API.
const estimatedRequestTokenCount = await calculateRequestTokenCount(
@@ -1056,4 +1061,20 @@ export class GeminiClient {
return info;
}
/**
* Masks bulky tool outputs to save context window space.
*/
private async tryMaskToolOutputs(history: Content[]): Promise<void> {
if (!this.config.getToolOutputMaskingEnabled()) {
return;
}
const result = await this.toolOutputMaskingService.mask(
history,
this.config,
);
if (result.maskedCount > 0) {
this.getChat().setHistory(result.newHistory);
}
}
}

View File

@@ -0,0 +1,31 @@
// Vitest Snapshot v1, https://vitest.dev/guide/snapshot.html
exports[`ToolOutputMaskingService > should match the expected snapshot for a masked tool output 1`] = `
"<tool_output_masked>
Line
Line
Line
Line
Line
Line
Line
Line
Line
Line
... [6 lines omitted] ...
Line
Line
Line
Line
Line
Line
Line
Line
Line
Output too large. Full output available at: /mock/history/tool-outputs/run_shell_command_deterministic.txt
</tool_output_masked>"
`;

View File

@@ -0,0 +1,500 @@
/**
* @license
* Copyright 2026 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import { describe, it, expect, vi, beforeEach } from 'vitest';
import {
ToolOutputMaskingService,
MASKING_INDICATOR_TAG,
} from './toolOutputMaskingService.js';
import { SHELL_TOOL_NAME } from '../tools/tool-names.js';
import { estimateTokenCountSync } from '../utils/tokenCalculation.js';
import type { Config } from '../config/config.js';
import type { Content, Part } from '@google/genai';
vi.mock('../utils/tokenCalculation.js', () => ({
estimateTokenCountSync: vi.fn(),
}));
vi.mock('node:fs/promises', () => ({
mkdir: vi.fn().mockResolvedValue(undefined),
writeFile: vi.fn().mockResolvedValue(undefined),
}));
describe('ToolOutputMaskingService', () => {
let service: ToolOutputMaskingService;
let mockConfig: Config;
const mockedEstimateTokenCountSync = vi.mocked(estimateTokenCountSync);
beforeEach(() => {
service = new ToolOutputMaskingService();
mockConfig = {
storage: {
getHistoryDir: () => '/mock/history',
},
getUsageStatisticsEnabled: () => false,
getToolOutputMaskingConfig: () => ({
enabled: true,
toolProtectionThreshold: 50000,
minPrunableTokensThreshold: 30000,
protectLatestTurn: true,
}),
} as unknown as Config;
vi.clearAllMocks();
});
it('should not mask if total tool tokens are below protection threshold', async () => {
const history: Content[] = [
{
role: 'user',
parts: [
{
functionResponse: {
name: 'test_tool',
response: { output: 'small output' },
},
},
],
},
];
mockedEstimateTokenCountSync.mockReturnValue(100);
const result = await service.mask(history, mockConfig);
expect(result.maskedCount).toBe(0);
expect(result.newHistory).toEqual(history);
});
const getToolResponse = (part: Part | undefined): string => {
const resp = part?.functionResponse?.response as
| { output: string }
| undefined;
return resp?.output ?? (resp as unknown as string) ?? '';
};
it('should protect the latest turn and mask older outputs beyond 50k window if total > 30k', async () => {
// History:
// Turn 1: 60k (Oldest)
// Turn 2: 20k
// Turn 3: 10k (Latest) - Protected because PROTECT_LATEST_TURN is true
const history: Content[] = [
{
role: 'user',
parts: [
{
functionResponse: {
name: 't1',
response: { output: 'A'.repeat(60000) },
},
},
],
},
{
role: 'user',
parts: [
{
functionResponse: {
name: 't2',
response: { output: 'B'.repeat(20000) },
},
},
],
},
{
role: 'user',
parts: [
{
functionResponse: {
name: 't3',
response: { output: 'C'.repeat(10000) },
},
},
],
},
];
mockedEstimateTokenCountSync.mockImplementation((parts: Part[]) => {
const toolName = parts[0].functionResponse?.name;
const resp = parts[0].functionResponse?.response as Record<
string,
unknown
>;
const content = (resp?.['output'] as string) ?? JSON.stringify(resp);
if (content.includes(`<${MASKING_INDICATOR_TAG}`)) return 100;
if (toolName === 't1') return 60000;
if (toolName === 't2') return 20000;
if (toolName === 't3') return 10000;
return 0;
});
// Scanned: Turn 2 (20k), Turn 1 (60k). Total = 80k.
// Turn 2: Cumulative = 20k. Protected (<= 50k).
// Turn 1: Cumulative = 80k. Crossed 50k boundary. Prunabled.
// Total Prunable = 60k (> 30k trigger).
const result = await service.mask(history, mockConfig);
expect(result.maskedCount).toBe(1);
expect(getToolResponse(result.newHistory[0].parts?.[0])).toContain(
`<${MASKING_INDICATOR_TAG}`,
);
expect(getToolResponse(result.newHistory[1].parts?.[0])).toEqual(
'B'.repeat(20000),
);
expect(getToolResponse(result.newHistory[2].parts?.[0])).toEqual(
'C'.repeat(10000),
);
});
it('should perform global aggregation for many small parts once boundary is hit', async () => {
// history.length = 12. Skip index 11 (latest).
// Indices 0-10: 10k each.
// Index 10: 10k (Sum 10k)
// Index 9: 10k (Sum 20k)
// Index 8: 10k (Sum 30k)
// Index 7: 10k (Sum 40k)
// Index 6: 10k (Sum 50k) - Boundary hit here?
// Actually, Boundary is 50k. So Index 6 crosses it.
// Index 6, 5, 4, 3, 2, 1, 0 are all prunable. (7 * 10k = 70k).
const history: Content[] = Array.from({ length: 12 }, (_, i) => ({
role: 'user',
parts: [
{
functionResponse: {
name: `tool${i}`,
response: { output: 'A'.repeat(10000) },
},
},
],
}));
mockedEstimateTokenCountSync.mockImplementation((parts: Part[]) => {
const resp = parts[0].functionResponse?.response as
| { output?: string; result?: string }
| string
| undefined;
const content =
typeof resp === 'string'
? resp
: resp?.output || resp?.result || JSON.stringify(resp);
if (content?.includes(`<${MASKING_INDICATOR_TAG}`)) return 100;
return content?.length || 0;
});
const result = await service.mask(history, mockConfig);
expect(result.maskedCount).toBe(6); // boundary at 50k protects 0-5
expect(result.tokensSaved).toBeGreaterThan(0);
});
it('should verify tool-aware previews (shell vs generic)', async () => {
const shellHistory: Content[] = [
{
role: 'user',
parts: [
{
functionResponse: {
name: SHELL_TOOL_NAME,
response: {
output:
'Output: line1\nline2\nline3\nline4\nline5\nError: failed\nExit Code: 1',
},
},
},
],
},
// Protection buffer
{
role: 'user',
parts: [
{
functionResponse: {
name: 'p',
response: { output: 'p'.repeat(60000) },
},
},
],
},
// Latest turn
{
role: 'user',
parts: [{ functionResponse: { name: 'l', response: { output: 'l' } } }],
},
];
mockedEstimateTokenCountSync.mockImplementation((parts: Part[]) => {
const name = parts[0].functionResponse?.name;
const resp = parts[0].functionResponse?.response as Record<
string,
unknown
>;
const content = (resp?.['output'] as string) ?? JSON.stringify(resp);
if (content.includes(`<${MASKING_INDICATOR_TAG}`)) return 100;
if (name === SHELL_TOOL_NAME) return 100000;
if (name === 'p') return 60000;
return 100;
});
const result = await service.mask(shellHistory, mockConfig);
const maskedBash = getToolResponse(result.newHistory[0].parts?.[0]);
expect(maskedBash).toContain('Output: line1\nline2\nline3\nline4\nline5');
expect(maskedBash).toContain('Exit Code: 1');
expect(maskedBash).toContain('Error: failed');
});
it('should skip already masked content and not count it towards totals', async () => {
const history: Content[] = [
{
role: 'user',
parts: [
{
functionResponse: {
name: 'tool1',
response: {
output: `<${MASKING_INDICATOR_TAG}>...</${MASKING_INDICATOR_TAG}>`,
},
},
},
],
},
{
role: 'user',
parts: [
{
functionResponse: {
name: 'tool2',
response: { output: 'A'.repeat(60000) },
},
},
],
},
];
mockedEstimateTokenCountSync.mockReturnValue(60000);
const result = await service.mask(history, mockConfig);
expect(result.maskedCount).toBe(0); // tool1 skipped, tool2 is the "latest" which is protected
});
it('should handle different response keys in masked update', async () => {
const history: Content[] = [
{
role: 'model',
parts: [
{
functionResponse: {
name: 't1',
response: { result: 'A'.repeat(60000) },
},
},
],
},
{
role: 'model',
parts: [
{
functionResponse: {
name: 'p',
response: { output: 'P'.repeat(60000) },
},
},
],
},
{ role: 'user', parts: [{ text: 'latest' }] },
];
mockedEstimateTokenCountSync.mockImplementation((parts: Part[]) => {
const resp = parts[0].functionResponse?.response as Record<
string,
unknown
>;
const content =
(resp?.['output'] as string) ??
(resp?.['result'] as string) ??
JSON.stringify(resp);
if (content.includes(`<${MASKING_INDICATOR_TAG}`)) return 100;
return 60000;
});
const result = await service.mask(history, mockConfig);
expect(result.maskedCount).toBe(2); // both t1 and p are prunable (cumulative 60k and 120k)
const responseObj = result.newHistory[0].parts?.[0].functionResponse
?.response as Record<string, unknown>;
expect(Object.keys(responseObj)).toEqual(['output']);
});
it('should preserve multimodal parts while masking tool responses', async () => {
const history: Content[] = [
{
role: 'user',
parts: [
{
functionResponse: {
name: 't1',
response: { output: 'A'.repeat(60000) },
},
},
{
inlineData: {
data: 'base64data',
mimeType: 'image/png',
},
},
],
},
// Protection buffer
{
role: 'user',
parts: [
{
functionResponse: {
name: 'p',
response: { output: 'p'.repeat(60000) },
},
},
],
},
// Latest turn
{ role: 'user', parts: [{ text: 'latest' }] },
];
mockedEstimateTokenCountSync.mockImplementation((parts: Part[]) => {
const resp = parts[0].functionResponse?.response as Record<
string,
unknown
>;
const content = (resp?.['output'] as string) ?? JSON.stringify(resp);
if (content.includes(`<${MASKING_INDICATOR_TAG}`)) return 100;
if (parts[0].functionResponse?.name === 't1') return 60000;
if (parts[0].functionResponse?.name === 'p') return 60000;
return 100;
});
const result = await service.mask(history, mockConfig);
expect(result.maskedCount).toBe(2); //Both t1 and p are prunable (cumulative 60k each > 50k protection)
expect(result.newHistory[0].parts).toHaveLength(2);
expect(result.newHistory[0].parts?.[0].functionResponse).toBeDefined();
expect(
(
result.newHistory[0].parts?.[0].functionResponse?.response as Record<
string,
unknown
>
)['output'],
).toContain(`<${MASKING_INDICATOR_TAG}`);
expect(result.newHistory[0].parts?.[1].inlineData).toEqual({
data: 'base64data',
mimeType: 'image/png',
});
});
it('should match the expected snapshot for a masked tool output', async () => {
const history: Content[] = [
{
role: 'user',
parts: [
{
functionResponse: {
name: SHELL_TOOL_NAME,
response: {
output: 'Line\n'.repeat(25),
exitCode: 0,
},
},
},
],
},
// Buffer to push shell_tool into prunable territory
{
role: 'user',
parts: [
{
functionResponse: {
name: 'padding',
response: { output: 'B'.repeat(60000) },
},
},
],
},
{ role: 'user', parts: [{ text: 'latest' }] },
];
mockedEstimateTokenCountSync.mockImplementation((parts: Part[]) => {
const resp = parts[0].functionResponse?.response as Record<
string,
unknown
>;
const content = (resp?.['output'] as string) ?? JSON.stringify(resp);
if (content.includes(`<${MASKING_INDICATOR_TAG}`)) return 100;
if (parts[0].functionResponse?.name === SHELL_TOOL_NAME) return 1000;
if (parts[0].functionResponse?.name === 'padding') return 60000;
return 10;
});
const result = await service.mask(history, mockConfig);
// Verify complete masking: only 'output' key should exist
const responseObj = result.newHistory[0].parts?.[0].functionResponse
?.response as Record<string, unknown>;
expect(Object.keys(responseObj)).toEqual(['output']);
const response = responseObj['output'] as string;
// We replace the random part of the filename for deterministic snapshots
// and normalize path separators for cross-platform compatibility
const deterministicResponse = response
.replace(
new RegExp(`${SHELL_TOOL_NAME}_[^\\s"]+\\.txt`, 'g'),
`${SHELL_TOOL_NAME}_deterministic.txt`,
)
.replace(/\\/g, '/');
expect(deterministicResponse).toMatchSnapshot();
});
it('should not mask if masking increases token count (due to overhead)', async () => {
const history: Content[] = [
{
role: 'user',
parts: [
{
functionResponse: {
name: 'tiny_tool',
response: { output: 'tiny' },
},
},
],
},
// Protection buffer to push tiny_tool into prunable territory
{
role: 'user',
parts: [
{
functionResponse: {
name: 'padding',
response: { output: 'B'.repeat(60000) },
},
},
],
},
{ role: 'user', parts: [{ text: 'latest' }] },
];
mockedEstimateTokenCountSync.mockImplementation((parts: Part[]) => {
if (parts[0].functionResponse?.name === 'tiny_tool') return 5;
if (parts[0].functionResponse?.name === 'padding') return 60000;
return 1000; // The masked version would be huge due to boilerplate
});
const result = await service.mask(history, mockConfig);
expect(result.maskedCount).toBe(0); // padding is protected, tiny_tool would increase size
});
});

View File

@@ -0,0 +1,344 @@
/**
* @license
* Copyright 2026 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import type { Content, Part } from '@google/genai';
import path from 'node:path';
import * as fsPromises from 'node:fs/promises';
import { estimateTokenCountSync } from '../utils/tokenCalculation.js';
import { debugLogger } from '../utils/debugLogger.js';
import { sanitizeFilenamePart } from '../utils/fileUtils.js';
import type { Config } from '../config/config.js';
import { logToolOutputMasking } from '../telemetry/loggers.js';
import { SHELL_TOOL_NAME } from '../tools/tool-names.js';
import { ToolOutputMaskingEvent } from '../telemetry/types.js';
// Tool output masking defaults
export const DEFAULT_TOOL_PROTECTION_THRESHOLD = 50000;
export const DEFAULT_MIN_PRUNABLE_TOKENS_THRESHOLD = 30000;
export const DEFAULT_PROTECT_LATEST_TURN = true;
export const MASKING_INDICATOR_TAG = 'tool_output_masked';
export const TOOL_OUTPUTS_DIR = 'tool-outputs';
export interface MaskingResult {
newHistory: Content[];
maskedCount: number;
tokensSaved: number;
}
/**
* Service to manage context window efficiency by masking bulky tool outputs (Tool Output Masking).
*
* It implements a "Hybrid Backward Scanned FIFO" algorithm to balance context relevance with
* token savings:
* 1. **Protection Window**: Protects the newest `toolProtectionThreshold` (default 50k) tool tokens
* from pruning. Optionally skips the entire latest conversation turn to ensure full context for
* the model's next response.
* 2. **Global Aggregation**: Scans backwards past the protection window to identify all remaining
* tool outputs that haven't been masked yet.
* 3. **Batch Trigger**: Trigger masking only if the total prunable tokens exceed
* `minPrunableTokensThreshold` (default 30k).
*
* @remarks
* Effectively, this means masking only starts once the conversation contains approximately 80k
* tokens of prunable tool outputs (50k protected + 30k prunable buffer). Small tool outputs
* are preserved until they collectively reach the threshold.
*/
export class ToolOutputMaskingService {
async mask(history: Content[], config: Config): Promise<MaskingResult> {
if (history.length === 0) {
return { newHistory: history, maskedCount: 0, tokensSaved: 0 };
}
let cumulativeToolTokens = 0;
let protectionBoundaryReached = false;
let totalPrunableTokens = 0;
let maskedCount = 0;
const prunableParts: Array<{
contentIndex: number;
partIndex: number;
tokens: number;
content: string;
originalPart: Part;
}> = [];
const maskingConfig = config.getToolOutputMaskingConfig();
// Decide where to start scanning.
// If PROTECT_LATEST_TURN is true, we skip the most recent message (index history.length - 1).
const scanStartIdx = maskingConfig.protectLatestTurn
? history.length - 2
: history.length - 1;
// Backward scan to identify prunable tool outputs
for (let i = scanStartIdx; i >= 0; i--) {
const content = history[i];
const parts = content.parts || [];
for (let j = parts.length - 1; j >= 0; j--) {
const part = parts[j];
// Tool outputs (functionResponse) are the primary targets for pruning because
// they often contain voluminous data (e.g., shell logs, file content) that
// can exceed context limits. We preserve other parts—such as user text,
// model reasoning, and multimodal data—because they define the conversation's
// core intent and logic, which are harder for the model to recover if lost.
if (!part.functionResponse) continue;
const toolOutputContent = this.getToolOutputContent(part);
if (!toolOutputContent || this.isAlreadyMasked(toolOutputContent)) {
continue;
}
const partTokens = estimateTokenCountSync([part]);
if (!protectionBoundaryReached) {
cumulativeToolTokens += partTokens;
if (cumulativeToolTokens > maskingConfig.toolProtectionThreshold) {
protectionBoundaryReached = true;
// The part that crossed the boundary is prunable.
totalPrunableTokens += partTokens;
prunableParts.push({
contentIndex: i,
partIndex: j,
tokens: partTokens,
content: toolOutputContent,
originalPart: part,
});
}
} else {
totalPrunableTokens += partTokens;
prunableParts.push({
contentIndex: i,
partIndex: j,
tokens: partTokens,
content: toolOutputContent,
originalPart: part,
});
}
}
}
// Trigger pruning only if we have accumulated enough savings to justify the
// overhead of masking and file I/O (batch pruning threshold).
if (totalPrunableTokens < maskingConfig.minPrunableTokensThreshold) {
return { newHistory: history, maskedCount: 0, tokensSaved: 0 };
}
debugLogger.debug(
`[ToolOutputMasking] Triggering masking. Prunable tool tokens: ${totalPrunableTokens.toLocaleString()} (> ${maskingConfig.minPrunableTokensThreshold.toLocaleString()})`,
);
// Perform masking and offloading
const newHistory = [...history]; // Shallow copy of history
let actualTokensSaved = 0;
const toolOutputsDir = path.join(
config.storage.getHistoryDir(),
TOOL_OUTPUTS_DIR,
);
await fsPromises.mkdir(toolOutputsDir, { recursive: true });
for (const item of prunableParts) {
const { contentIndex, partIndex, content, tokens } = item;
const contentRecord = newHistory[contentIndex];
const part = contentRecord.parts![partIndex];
if (!part.functionResponse) continue;
const toolName = part.functionResponse.name || 'unknown_tool';
const callId = part.functionResponse.id || Date.now().toString();
const safeToolName = sanitizeFilenamePart(toolName).toLowerCase();
const safeCallId = sanitizeFilenamePart(callId).toLowerCase();
const fileName = `${safeToolName}_${safeCallId}_${Math.random()
.toString(36)
.substring(7)}.txt`;
const filePath = path.join(toolOutputsDir, fileName);
await fsPromises.writeFile(filePath, content, 'utf-8');
const originalResponse =
(part.functionResponse.response as Record<string, unknown>) || {};
const totalLines = content.split('\n').length;
const fileSizeMB = (
Buffer.byteLength(content, 'utf8') /
1024 /
1024
).toFixed(2);
let preview = '';
if (toolName === SHELL_TOOL_NAME) {
preview = this.formatShellPreview(originalResponse);
} else {
// General tools: Head + Tail preview (250 chars each)
if (content.length > 500) {
preview = `${content.slice(0, 250)}\n... [TRUNCATED] ...\n${content.slice(-250)}`;
} else {
preview = content;
}
}
const maskedSnippet = this.formatMaskedSnippet({
toolName,
filePath,
fileSizeMB,
totalLines,
tokens,
preview,
});
const maskedPart = {
...part,
functionResponse: {
...part.functionResponse,
response: { output: maskedSnippet },
},
};
const newTaskTokens = estimateTokenCountSync([maskedPart]);
const savings = tokens - newTaskTokens;
if (savings > 0) {
const newParts = [...contentRecord.parts!];
newParts[partIndex] = maskedPart;
newHistory[contentIndex] = { ...contentRecord, parts: newParts };
actualTokensSaved += savings;
maskedCount++;
}
}
debugLogger.debug(
`[ToolOutputMasking] Masked ${maskedCount} tool outputs. Saved ~${actualTokensSaved.toLocaleString()} tokens.`,
);
const result = {
newHistory,
maskedCount,
tokensSaved: actualTokensSaved,
};
if (actualTokensSaved <= 0) {
return result;
}
logToolOutputMasking(
config,
new ToolOutputMaskingEvent({
tokens_before: totalPrunableTokens,
tokens_after: totalPrunableTokens - actualTokensSaved,
masked_count: maskedCount,
total_prunable_tokens: totalPrunableTokens,
}),
);
return result;
}
private getToolOutputContent(part: Part): string | null {
if (!part.functionResponse) return null;
const response = part.functionResponse.response as Record<string, unknown>;
if (!response) return null;
// Stringify the entire response for saving.
// This handles any tool output schema automatically.
const content = JSON.stringify(response, null, 2);
// Multimodal safety check: Sibling parts (inlineData, etc.) are handled by mask()
// by keeping the original part structure and only replacing the functionResponse content.
return content;
}
private isAlreadyMasked(content: string): boolean {
return content.includes(`<${MASKING_INDICATOR_TAG}`);
}
private formatShellPreview(response: Record<string, unknown>): string {
const content = (response['output'] || response['stdout'] || '') as string;
if (typeof content !== 'string') {
return typeof content === 'object'
? JSON.stringify(content)
: String(content);
}
// The shell tool output is structured in shell.ts with specific section prefixes:
const sectionRegex =
/^(Output|Error|Exit Code|Signal|Background PIDs|Process Group PGID): /m;
const parts = content.split(sectionRegex);
if (parts.length < 3) {
// Fallback to simple head/tail if not in expected shell.ts format
return this.formatSimplePreview(content);
}
const previewParts: string[] = [];
if (parts[0].trim()) {
previewParts.push(this.formatSimplePreview(parts[0].trim()));
}
for (let i = 1; i < parts.length; i += 2) {
const name = parts[i];
const sectionContent = parts[i + 1]?.trim() || '';
if (name === 'Output') {
previewParts.push(
`Output: ${this.formatSimplePreview(sectionContent)}`,
);
} else {
// Keep other sections (Error, Exit Code, etc.) in full as they are usually high-signal and small
previewParts.push(`${name}: ${sectionContent}`);
}
}
let preview = previewParts.join('\n');
// Also check root levels just in case some tool uses them or for future-proofing
const exitCode = response['exitCode'] ?? response['exit_code'];
const error = response['error'];
if (
exitCode !== undefined &&
exitCode !== 0 &&
exitCode !== null &&
!content.includes(`Exit Code: ${exitCode}`)
) {
preview += `\n[Exit Code: ${exitCode}]`;
}
if (error && !content.includes(`Error: ${error}`)) {
preview += `\n[Error: ${error}]`;
}
return preview;
}
private formatSimplePreview(content: string): string {
const lines = content.split('\n');
if (lines.length <= 20) return content;
const head = lines.slice(0, 10);
const tail = lines.slice(-10);
return `${head.join('\n')}\n\n... [${
lines.length - head.length - tail.length
} lines omitted] ...\n\n${tail.join('\n')}`;
}
private formatMaskedSnippet(params: MaskedSnippetParams): string {
const { filePath, preview } = params;
return `<${MASKING_INDICATOR_TAG}>
${preview}
Output too large. Full output available at: ${filePath}
</${MASKING_INDICATOR_TAG}>`;
}
}
interface MaskedSnippetParams {
toolName: string;
filePath: string;
fileSizeMB: string;
totalLines: number;
tokens: number;
preview: string;
}

View File

@@ -46,6 +46,7 @@ import type {
ApprovalModeSwitchEvent,
ApprovalModeDurationEvent,
PlanExecutionEvent,
ToolOutputMaskingEvent,
} from '../types.js';
import { EventMetadataKey } from './event-metadata-key.js';
import type { Config } from '../../config/config.js';
@@ -108,6 +109,7 @@ export enum EventNames {
APPROVAL_MODE_SWITCH = 'approval_mode_switch',
APPROVAL_MODE_DURATION = 'approval_mode_duration',
PLAN_EXECUTION = 'plan_execution',
TOOL_OUTPUT_MASKING = 'tool_output_masking',
}
export interface LogResponse {
@@ -1217,8 +1219,40 @@ export class ClearcutLogger {
},
];
const logEvent = this.createLogEvent(
EventNames.TOOL_OUTPUT_TRUNCATED,
data,
);
this.enqueueLogEvent(logEvent);
this.flushIfNeeded();
}
logToolOutputMaskingEvent(event: ToolOutputMaskingEvent): void {
const data: EventValue[] = [
{
gemini_cli_key:
EventMetadataKey.GEMINI_CLI_TOOL_OUTPUT_MASKING_TOKENS_BEFORE,
value: event.tokens_before.toString(),
},
{
gemini_cli_key:
EventMetadataKey.GEMINI_CLI_TOOL_OUTPUT_MASKING_TOKENS_AFTER,
value: event.tokens_after.toString(),
},
{
gemini_cli_key:
EventMetadataKey.GEMINI_CLI_TOOL_OUTPUT_MASKING_MASKED_COUNT,
value: event.masked_count.toString(),
},
{
gemini_cli_key:
EventMetadataKey.GEMINI_CLI_TOOL_OUTPUT_MASKING_TOTAL_PRUNABLE_TOKENS,
value: event.total_prunable_tokens.toString(),
},
];
this.enqueueLogEvent(
this.createLogEvent(EventNames.TOOL_OUTPUT_TRUNCATED, data),
this.createLogEvent(EventNames.TOOL_OUTPUT_MASKING, data),
);
this.flushIfNeeded();
}

View File

@@ -7,7 +7,7 @@
// Defines valid event metadata keys for Clearcut logging.
export enum EventMetadataKey {
// Deleted enums: 24
// Next ID: 148
// Next ID: 152
GEMINI_CLI_KEY_UNKNOWN = 0,
@@ -561,4 +561,20 @@ export enum EventMetadataKey {
// Logs the classifier threshold used.
GEMINI_CLI_ROUTING_CLASSIFIER_THRESHOLD = 147,
// ==========================================================================
// Tool Output Masking Event Keys
// ==========================================================================
// Logs the total tokens in the prunable block before masking.
GEMINI_CLI_TOOL_OUTPUT_MASKING_TOKENS_BEFORE = 148,
// Logs the total tokens in the masked remnants after masking.
GEMINI_CLI_TOOL_OUTPUT_MASKING_TOKENS_AFTER = 149,
// Logs the number of tool outputs masked in this operation.
GEMINI_CLI_TOOL_OUTPUT_MASKING_MASKED_COUNT = 150,
// Logs the total prunable tokens identified at the trigger point.
GEMINI_CLI_TOOL_OUTPUT_MASKING_TOTAL_PRUNABLE_TOKENS = 151,
}

View File

@@ -56,6 +56,7 @@ import type {
StartupStatsEvent,
LlmLoopCheckEvent,
PlanExecutionEvent,
ToolOutputMaskingEvent,
} from './types.js';
import {
recordApiErrorMetrics,
@@ -163,6 +164,21 @@ export function logToolOutputTruncated(
});
}
export function logToolOutputMasking(
config: Config,
event: ToolOutputMaskingEvent,
): void {
ClearcutLogger.getInstance(config)?.logToolOutputMaskingEvent(event);
bufferTelemetryEvent(() => {
const logger = logs.getLogger(SERVICE_NAME);
const logRecord: LogRecord = {
body: event.toLogBody(),
attributes: event.toOpenTelemetryAttributes(config),
};
logger.emit(logRecord);
});
}
export function logFileOperation(
config: Config,
event: FileOperationEvent,

View File

@@ -1376,6 +1376,49 @@ export class ToolOutputTruncatedEvent implements BaseTelemetryEvent {
}
}
export const EVENT_TOOL_OUTPUT_MASKING = 'gemini_cli.tool_output_masking';
export class ToolOutputMaskingEvent implements BaseTelemetryEvent {
'event.name': 'tool_output_masking';
'event.timestamp': string;
tokens_before: number;
tokens_after: number;
masked_count: number;
total_prunable_tokens: number;
constructor(details: {
tokens_before: number;
tokens_after: number;
masked_count: number;
total_prunable_tokens: number;
}) {
this['event.name'] = 'tool_output_masking';
this['event.timestamp'] = new Date().toISOString();
this.tokens_before = details.tokens_before;
this.tokens_after = details.tokens_after;
this.masked_count = details.masked_count;
this.total_prunable_tokens = details.total_prunable_tokens;
}
toOpenTelemetryAttributes(config: Config): LogAttributes {
return {
...getCommonAttributes(config),
'event.name': EVENT_TOOL_OUTPUT_MASKING,
'event.timestamp': this['event.timestamp'],
tokens_before: this.tokens_before,
tokens_after: this.tokens_after,
masked_count: this.masked_count,
total_prunable_tokens: this.total_prunable_tokens,
};
}
toLogBody(): string {
return `Tool output masking (Masked ${this.masked_count} tool outputs. Saved ${
this.tokens_before - this.tokens_after
} tokens)`;
}
}
export const EVENT_EXTENSION_UNINSTALL = 'gemini_cli.extension_uninstall';
export class ExtensionUninstallEvent implements BaseTelemetryEvent {
'event.name': 'extension_uninstall';
@@ -1602,6 +1645,7 @@ export type TelemetryEvent =
| LlmLoopCheckEvent
| StartupStatsEvent
| WebFetchFallbackAttemptEvent
| ToolOutputMaskingEvent
| EditStrategyEvent
| PlanExecutionEvent
| RewindEvent

View File

@@ -572,6 +572,14 @@ export async function fileExists(filePath: string): Promise<boolean> {
const MAX_TRUNCATED_LINE_WIDTH = 1000;
const MAX_TRUNCATED_CHARS = 4000;
/**
* Sanitizes a string for use as a filename part by removing path traversal
* characters and other non-alphanumeric characters.
*/
export function sanitizeFilenamePart(part: string): string {
return part.replace(/[^a-zA-Z0-9_-]/g, '_');
}
/**
* Formats a truncated message for tool output, handling multi-line and single-line (elephant) cases.
*/
@@ -623,11 +631,8 @@ export async function saveTruncatedToolOutput(
id: string | number, // Accept string (callId) or number (truncationId)
projectTempDir: string,
): Promise<{ outputFile: string; totalLines: number }> {
const safeToolName = toolName.replace(/[^a-z0-9]/gi, '_').toLowerCase();
const safeId = id
.toString()
.replace(/[^a-z0-9]/gi, '_')
.toLowerCase();
const safeToolName = sanitizeFilenamePart(toolName).toLowerCase();
const safeId = sanitizeFilenamePart(id.toString()).toLowerCase();
const fileName = `${safeToolName}_${safeId}.txt`;
const toolOutputDir = path.join(projectTempDir, TOOL_OUTPUT_DIR);
const outputFile = path.join(toolOutputDir, fileName);

View File

@@ -1428,6 +1428,44 @@
"default": {},
"type": "object",
"properties": {
"toolOutputMasking": {
"title": "Tool Output Masking",
"description": "Advanced settings for tool output masking to manage context window efficiency.",
"markdownDescription": "Advanced settings for tool output masking to manage context window efficiency.\n\n- Category: `Experimental`\n- Requires restart: `yes`\n- Default: `{}`",
"default": {},
"type": "object",
"properties": {
"enabled": {
"title": "Enable Tool Output Masking",
"description": "Enables tool output masking to save tokens.",
"markdownDescription": "Enables tool output masking to save tokens.\n\n- Category: `Experimental`\n- Requires restart: `yes`\n- Default: `false`",
"default": false,
"type": "boolean"
},
"toolProtectionThreshold": {
"title": "Tool Protection Threshold",
"description": "Minimum number of tokens to protect from masking (most recent tool outputs).",
"markdownDescription": "Minimum number of tokens to protect from masking (most recent tool outputs).\n\n- Category: `Experimental`\n- Requires restart: `yes`\n- Default: `50000`",
"default": 50000,
"type": "number"
},
"minPrunableTokensThreshold": {
"title": "Min Prunable Tokens Threshold",
"description": "Minimum prunable tokens required to trigger a masking pass.",
"markdownDescription": "Minimum prunable tokens required to trigger a masking pass.\n\n- Category: `Experimental`\n- Requires restart: `yes`\n- Default: `30000`",
"default": 30000,
"type": "number"
},
"protectLatestTurn": {
"title": "Protect Latest Turn",
"description": "Ensures the absolute latest turn is never masked, regardless of token count.",
"markdownDescription": "Ensures the absolute latest turn is never masked, regardless of token count.\n\n- Category: `Experimental`\n- Requires restart: `yes`\n- Default: `true`",
"default": true,
"type": "boolean"
}
},
"additionalProperties": false
},
"enableAgents": {
"title": "Enable Agents",
"description": "Enable local and remote subagents. Warning: Experimental feature, uses YOLO mode for subagents",