mirror of
https://github.com/google-gemini/gemini-cli.git
synced 2026-05-13 05:12:55 -07:00
feat(context): implement observation masking for tool outputs (#18389)
This commit is contained in:
@@ -0,0 +1,7 @@
|
|||||||
|
{
|
||||||
|
"experimental": {
|
||||||
|
"toolOutputMasking": {
|
||||||
|
"enabled": true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -806,6 +806,7 @@ export async function loadCliConfig(
|
|||||||
skillsSupport: settings.skills?.enabled ?? true,
|
skillsSupport: settings.skills?.enabled ?? true,
|
||||||
disabledSkills: settings.skills?.disabled,
|
disabledSkills: settings.skills?.disabled,
|
||||||
experimentalJitContext: settings.experimental?.jitContext,
|
experimentalJitContext: settings.experimental?.jitContext,
|
||||||
|
toolOutputMasking: settings.experimental?.toolOutputMasking,
|
||||||
noBrowser: !!process.env['NO_BROWSER'],
|
noBrowser: !!process.env['NO_BROWSER'],
|
||||||
summarizeToolOutput: settings.model?.summarizeToolOutput,
|
summarizeToolOutput: settings.model?.summarizeToolOutput,
|
||||||
ideMode,
|
ideMode,
|
||||||
|
|||||||
@@ -1462,6 +1462,58 @@ const SETTINGS_SCHEMA = {
|
|||||||
description: 'Setting to enable experimental features',
|
description: 'Setting to enable experimental features',
|
||||||
showInDialog: false,
|
showInDialog: false,
|
||||||
properties: {
|
properties: {
|
||||||
|
toolOutputMasking: {
|
||||||
|
type: 'object',
|
||||||
|
label: 'Tool Output Masking',
|
||||||
|
category: 'Experimental',
|
||||||
|
requiresRestart: true,
|
||||||
|
ignoreInDocs: true,
|
||||||
|
default: {},
|
||||||
|
description:
|
||||||
|
'Advanced settings for tool output masking to manage context window efficiency.',
|
||||||
|
showInDialog: false,
|
||||||
|
properties: {
|
||||||
|
enabled: {
|
||||||
|
type: 'boolean',
|
||||||
|
label: 'Enable Tool Output Masking',
|
||||||
|
category: 'Experimental',
|
||||||
|
requiresRestart: true,
|
||||||
|
default: false,
|
||||||
|
description: 'Enables tool output masking to save tokens.',
|
||||||
|
showInDialog: false,
|
||||||
|
},
|
||||||
|
toolProtectionThreshold: {
|
||||||
|
type: 'number',
|
||||||
|
label: 'Tool Protection Threshold',
|
||||||
|
category: 'Experimental',
|
||||||
|
requiresRestart: true,
|
||||||
|
default: 50000,
|
||||||
|
description:
|
||||||
|
'Minimum number of tokens to protect from masking (most recent tool outputs).',
|
||||||
|
showInDialog: false,
|
||||||
|
},
|
||||||
|
minPrunableTokensThreshold: {
|
||||||
|
type: 'number',
|
||||||
|
label: 'Min Prunable Tokens Threshold',
|
||||||
|
category: 'Experimental',
|
||||||
|
requiresRestart: true,
|
||||||
|
default: 30000,
|
||||||
|
description:
|
||||||
|
'Minimum prunable tokens required to trigger a masking pass.',
|
||||||
|
showInDialog: false,
|
||||||
|
},
|
||||||
|
protectLatestTurn: {
|
||||||
|
type: 'boolean',
|
||||||
|
label: 'Protect Latest Turn',
|
||||||
|
category: 'Experimental',
|
||||||
|
requiresRestart: true,
|
||||||
|
default: true,
|
||||||
|
description:
|
||||||
|
'Ensures the absolute latest turn is never masked, regardless of token count.',
|
||||||
|
showInDialog: false,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
enableAgents: {
|
enableAgents: {
|
||||||
type: 'boolean',
|
type: 'boolean',
|
||||||
label: 'Enable Agents',
|
label: 'Enable Agents',
|
||||||
|
|||||||
@@ -149,6 +149,13 @@ export interface OutputSettings {
|
|||||||
format?: OutputFormat;
|
format?: OutputFormat;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export interface ToolOutputMaskingConfig {
|
||||||
|
enabled: boolean;
|
||||||
|
toolProtectionThreshold: number;
|
||||||
|
minPrunableTokensThreshold: number;
|
||||||
|
protectLatestTurn: boolean;
|
||||||
|
}
|
||||||
|
|
||||||
export interface ExtensionSetting {
|
export interface ExtensionSetting {
|
||||||
name: string;
|
name: string;
|
||||||
description: string;
|
description: string;
|
||||||
@@ -273,6 +280,11 @@ import {
|
|||||||
DEFAULT_FILE_FILTERING_OPTIONS,
|
DEFAULT_FILE_FILTERING_OPTIONS,
|
||||||
DEFAULT_MEMORY_FILE_FILTERING_OPTIONS,
|
DEFAULT_MEMORY_FILE_FILTERING_OPTIONS,
|
||||||
} from './constants.js';
|
} from './constants.js';
|
||||||
|
import {
|
||||||
|
DEFAULT_TOOL_PROTECTION_THRESHOLD,
|
||||||
|
DEFAULT_MIN_PRUNABLE_TOKENS_THRESHOLD,
|
||||||
|
DEFAULT_PROTECT_LATEST_TURN,
|
||||||
|
} from '../services/toolOutputMaskingService.js';
|
||||||
|
|
||||||
import {
|
import {
|
||||||
type ExtensionLoader,
|
type ExtensionLoader,
|
||||||
@@ -462,6 +474,7 @@ export interface ConfigParameters {
|
|||||||
disabledSkills?: string[];
|
disabledSkills?: string[];
|
||||||
adminSkillsEnabled?: boolean;
|
adminSkillsEnabled?: boolean;
|
||||||
experimentalJitContext?: boolean;
|
experimentalJitContext?: boolean;
|
||||||
|
toolOutputMasking?: Partial<ToolOutputMaskingConfig>;
|
||||||
disableLLMCorrection?: boolean;
|
disableLLMCorrection?: boolean;
|
||||||
plan?: boolean;
|
plan?: boolean;
|
||||||
onModelChange?: (model: string) => void;
|
onModelChange?: (model: string) => void;
|
||||||
@@ -599,6 +612,7 @@ export class Config {
|
|||||||
private pendingIncludeDirectories: string[];
|
private pendingIncludeDirectories: string[];
|
||||||
private readonly enableHooks: boolean;
|
private readonly enableHooks: boolean;
|
||||||
private readonly enableHooksUI: boolean;
|
private readonly enableHooksUI: boolean;
|
||||||
|
private readonly toolOutputMasking: ToolOutputMaskingConfig;
|
||||||
private hooks: { [K in HookEventName]?: HookDefinition[] } | undefined;
|
private hooks: { [K in HookEventName]?: HookDefinition[] } | undefined;
|
||||||
private projectHooks:
|
private projectHooks:
|
||||||
| ({ [K in HookEventName]?: HookDefinition[] } & { disabled?: string[] })
|
| ({ [K in HookEventName]?: HookDefinition[] } & { disabled?: string[] })
|
||||||
@@ -721,6 +735,18 @@ export class Config {
|
|||||||
this.modelAvailabilityService = new ModelAvailabilityService();
|
this.modelAvailabilityService = new ModelAvailabilityService();
|
||||||
this.previewFeatures = params.previewFeatures ?? undefined;
|
this.previewFeatures = params.previewFeatures ?? undefined;
|
||||||
this.experimentalJitContext = params.experimentalJitContext ?? false;
|
this.experimentalJitContext = params.experimentalJitContext ?? false;
|
||||||
|
this.toolOutputMasking = {
|
||||||
|
enabled: params.toolOutputMasking?.enabled ?? false,
|
||||||
|
toolProtectionThreshold:
|
||||||
|
params.toolOutputMasking?.toolProtectionThreshold ??
|
||||||
|
DEFAULT_TOOL_PROTECTION_THRESHOLD,
|
||||||
|
minPrunableTokensThreshold:
|
||||||
|
params.toolOutputMasking?.minPrunableTokensThreshold ??
|
||||||
|
DEFAULT_MIN_PRUNABLE_TOKENS_THRESHOLD,
|
||||||
|
protectLatestTurn:
|
||||||
|
params.toolOutputMasking?.protectLatestTurn ??
|
||||||
|
DEFAULT_PROTECT_LATEST_TURN,
|
||||||
|
};
|
||||||
this.maxSessionTurns = params.maxSessionTurns ?? -1;
|
this.maxSessionTurns = params.maxSessionTurns ?? -1;
|
||||||
this.experimentalZedIntegration =
|
this.experimentalZedIntegration =
|
||||||
params.experimentalZedIntegration ?? false;
|
params.experimentalZedIntegration ?? false;
|
||||||
@@ -1445,6 +1471,14 @@ export class Config {
|
|||||||
return this.experimentalJitContext;
|
return this.experimentalJitContext;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
getToolOutputMaskingEnabled(): boolean {
|
||||||
|
return this.toolOutputMasking.enabled;
|
||||||
|
}
|
||||||
|
|
||||||
|
getToolOutputMaskingConfig(): ToolOutputMaskingConfig {
|
||||||
|
return this.toolOutputMasking;
|
||||||
|
}
|
||||||
|
|
||||||
getGeminiMdFileCount(): number {
|
getGeminiMdFileCount(): number {
|
||||||
if (this.experimentalJitContext && this.contextManager) {
|
if (this.experimentalJitContext && this.contextManager) {
|
||||||
return this.contextManager.getLoadedPaths().size;
|
return this.contextManager.getLoadedPaths().size;
|
||||||
|
|||||||
@@ -213,6 +213,7 @@ describe('Gemini Client (client.ts)', () => {
|
|||||||
getGlobalMemory: vi.fn().mockReturnValue(''),
|
getGlobalMemory: vi.fn().mockReturnValue(''),
|
||||||
getEnvironmentMemory: vi.fn().mockReturnValue(''),
|
getEnvironmentMemory: vi.fn().mockReturnValue(''),
|
||||||
isJitContextEnabled: vi.fn().mockReturnValue(false),
|
isJitContextEnabled: vi.fn().mockReturnValue(false),
|
||||||
|
getToolOutputMaskingEnabled: vi.fn().mockReturnValue(false),
|
||||||
getDisableLoopDetection: vi.fn().mockReturnValue(false),
|
getDisableLoopDetection: vi.fn().mockReturnValue(false),
|
||||||
|
|
||||||
getSessionId: vi.fn().mockReturnValue('test-session-id'),
|
getSessionId: vi.fn().mockReturnValue('test-session-id'),
|
||||||
|
|||||||
@@ -54,6 +54,7 @@ import { handleFallback } from '../fallback/handler.js';
|
|||||||
import type { RoutingContext } from '../routing/routingStrategy.js';
|
import type { RoutingContext } from '../routing/routingStrategy.js';
|
||||||
import { debugLogger } from '../utils/debugLogger.js';
|
import { debugLogger } from '../utils/debugLogger.js';
|
||||||
import type { ModelConfigKey } from '../services/modelConfigService.js';
|
import type { ModelConfigKey } from '../services/modelConfigService.js';
|
||||||
|
import { ToolOutputMaskingService } from '../services/toolOutputMaskingService.js';
|
||||||
import { calculateRequestTokenCount } from '../utils/tokenCalculation.js';
|
import { calculateRequestTokenCount } from '../utils/tokenCalculation.js';
|
||||||
import {
|
import {
|
||||||
applyModelSelection,
|
applyModelSelection,
|
||||||
@@ -84,6 +85,7 @@ export class GeminiClient {
|
|||||||
|
|
||||||
private readonly loopDetector: LoopDetectionService;
|
private readonly loopDetector: LoopDetectionService;
|
||||||
private readonly compressionService: ChatCompressionService;
|
private readonly compressionService: ChatCompressionService;
|
||||||
|
private readonly toolOutputMaskingService: ToolOutputMaskingService;
|
||||||
private lastPromptId: string;
|
private lastPromptId: string;
|
||||||
private currentSequenceModel: string | null = null;
|
private currentSequenceModel: string | null = null;
|
||||||
private lastSentIdeContext: IdeContext | undefined;
|
private lastSentIdeContext: IdeContext | undefined;
|
||||||
@@ -98,6 +100,7 @@ export class GeminiClient {
|
|||||||
constructor(private readonly config: Config) {
|
constructor(private readonly config: Config) {
|
||||||
this.loopDetector = new LoopDetectionService(config);
|
this.loopDetector = new LoopDetectionService(config);
|
||||||
this.compressionService = new ChatCompressionService();
|
this.compressionService = new ChatCompressionService();
|
||||||
|
this.toolOutputMaskingService = new ToolOutputMaskingService();
|
||||||
this.lastPromptId = this.config.getSessionId();
|
this.lastPromptId = this.config.getSessionId();
|
||||||
|
|
||||||
coreEvents.on(CoreEvent.ModelChanged, this.handleModelChanged);
|
coreEvents.on(CoreEvent.ModelChanged, this.handleModelChanged);
|
||||||
@@ -562,6 +565,8 @@ export class GeminiClient {
|
|||||||
const remainingTokenCount =
|
const remainingTokenCount =
|
||||||
tokenLimit(modelForLimitCheck) - this.getChat().getLastPromptTokenCount();
|
tokenLimit(modelForLimitCheck) - this.getChat().getLastPromptTokenCount();
|
||||||
|
|
||||||
|
await this.tryMaskToolOutputs(this.getHistory());
|
||||||
|
|
||||||
// Estimate tokens. For text-only requests, we estimate based on character length.
|
// Estimate tokens. For text-only requests, we estimate based on character length.
|
||||||
// For requests with non-text parts (like images, tools), we use the countTokens API.
|
// For requests with non-text parts (like images, tools), we use the countTokens API.
|
||||||
const estimatedRequestTokenCount = await calculateRequestTokenCount(
|
const estimatedRequestTokenCount = await calculateRequestTokenCount(
|
||||||
@@ -1056,4 +1061,20 @@ export class GeminiClient {
|
|||||||
|
|
||||||
return info;
|
return info;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Masks bulky tool outputs to save context window space.
|
||||||
|
*/
|
||||||
|
private async tryMaskToolOutputs(history: Content[]): Promise<void> {
|
||||||
|
if (!this.config.getToolOutputMaskingEnabled()) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
const result = await this.toolOutputMaskingService.mask(
|
||||||
|
history,
|
||||||
|
this.config,
|
||||||
|
);
|
||||||
|
if (result.maskedCount > 0) {
|
||||||
|
this.getChat().setHistory(result.newHistory);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -0,0 +1,31 @@
|
|||||||
|
// Vitest Snapshot v1, https://vitest.dev/guide/snapshot.html
|
||||||
|
|
||||||
|
exports[`ToolOutputMaskingService > should match the expected snapshot for a masked tool output 1`] = `
|
||||||
|
"<tool_output_masked>
|
||||||
|
Line
|
||||||
|
Line
|
||||||
|
Line
|
||||||
|
Line
|
||||||
|
Line
|
||||||
|
Line
|
||||||
|
Line
|
||||||
|
Line
|
||||||
|
Line
|
||||||
|
Line
|
||||||
|
|
||||||
|
... [6 lines omitted] ...
|
||||||
|
|
||||||
|
Line
|
||||||
|
Line
|
||||||
|
Line
|
||||||
|
Line
|
||||||
|
Line
|
||||||
|
Line
|
||||||
|
Line
|
||||||
|
Line
|
||||||
|
Line
|
||||||
|
|
||||||
|
|
||||||
|
Output too large. Full output available at: /mock/history/tool-outputs/run_shell_command_deterministic.txt
|
||||||
|
</tool_output_masked>"
|
||||||
|
`;
|
||||||
@@ -0,0 +1,500 @@
|
|||||||
|
/**
|
||||||
|
* @license
|
||||||
|
* Copyright 2026 Google LLC
|
||||||
|
* SPDX-License-Identifier: Apache-2.0
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { describe, it, expect, vi, beforeEach } from 'vitest';
|
||||||
|
import {
|
||||||
|
ToolOutputMaskingService,
|
||||||
|
MASKING_INDICATOR_TAG,
|
||||||
|
} from './toolOutputMaskingService.js';
|
||||||
|
import { SHELL_TOOL_NAME } from '../tools/tool-names.js';
|
||||||
|
import { estimateTokenCountSync } from '../utils/tokenCalculation.js';
|
||||||
|
import type { Config } from '../config/config.js';
|
||||||
|
import type { Content, Part } from '@google/genai';
|
||||||
|
|
||||||
|
vi.mock('../utils/tokenCalculation.js', () => ({
|
||||||
|
estimateTokenCountSync: vi.fn(),
|
||||||
|
}));
|
||||||
|
|
||||||
|
vi.mock('node:fs/promises', () => ({
|
||||||
|
mkdir: vi.fn().mockResolvedValue(undefined),
|
||||||
|
writeFile: vi.fn().mockResolvedValue(undefined),
|
||||||
|
}));
|
||||||
|
|
||||||
|
describe('ToolOutputMaskingService', () => {
|
||||||
|
let service: ToolOutputMaskingService;
|
||||||
|
let mockConfig: Config;
|
||||||
|
|
||||||
|
const mockedEstimateTokenCountSync = vi.mocked(estimateTokenCountSync);
|
||||||
|
|
||||||
|
beforeEach(() => {
|
||||||
|
service = new ToolOutputMaskingService();
|
||||||
|
mockConfig = {
|
||||||
|
storage: {
|
||||||
|
getHistoryDir: () => '/mock/history',
|
||||||
|
},
|
||||||
|
getUsageStatisticsEnabled: () => false,
|
||||||
|
getToolOutputMaskingConfig: () => ({
|
||||||
|
enabled: true,
|
||||||
|
toolProtectionThreshold: 50000,
|
||||||
|
minPrunableTokensThreshold: 30000,
|
||||||
|
protectLatestTurn: true,
|
||||||
|
}),
|
||||||
|
} as unknown as Config;
|
||||||
|
vi.clearAllMocks();
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should not mask if total tool tokens are below protection threshold', async () => {
|
||||||
|
const history: Content[] = [
|
||||||
|
{
|
||||||
|
role: 'user',
|
||||||
|
parts: [
|
||||||
|
{
|
||||||
|
functionResponse: {
|
||||||
|
name: 'test_tool',
|
||||||
|
response: { output: 'small output' },
|
||||||
|
},
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
];
|
||||||
|
|
||||||
|
mockedEstimateTokenCountSync.mockReturnValue(100);
|
||||||
|
|
||||||
|
const result = await service.mask(history, mockConfig);
|
||||||
|
|
||||||
|
expect(result.maskedCount).toBe(0);
|
||||||
|
expect(result.newHistory).toEqual(history);
|
||||||
|
});
|
||||||
|
|
||||||
|
const getToolResponse = (part: Part | undefined): string => {
|
||||||
|
const resp = part?.functionResponse?.response as
|
||||||
|
| { output: string }
|
||||||
|
| undefined;
|
||||||
|
return resp?.output ?? (resp as unknown as string) ?? '';
|
||||||
|
};
|
||||||
|
|
||||||
|
it('should protect the latest turn and mask older outputs beyond 50k window if total > 30k', async () => {
|
||||||
|
// History:
|
||||||
|
// Turn 1: 60k (Oldest)
|
||||||
|
// Turn 2: 20k
|
||||||
|
// Turn 3: 10k (Latest) - Protected because PROTECT_LATEST_TURN is true
|
||||||
|
const history: Content[] = [
|
||||||
|
{
|
||||||
|
role: 'user',
|
||||||
|
parts: [
|
||||||
|
{
|
||||||
|
functionResponse: {
|
||||||
|
name: 't1',
|
||||||
|
response: { output: 'A'.repeat(60000) },
|
||||||
|
},
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
{
|
||||||
|
role: 'user',
|
||||||
|
parts: [
|
||||||
|
{
|
||||||
|
functionResponse: {
|
||||||
|
name: 't2',
|
||||||
|
response: { output: 'B'.repeat(20000) },
|
||||||
|
},
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
{
|
||||||
|
role: 'user',
|
||||||
|
parts: [
|
||||||
|
{
|
||||||
|
functionResponse: {
|
||||||
|
name: 't3',
|
||||||
|
response: { output: 'C'.repeat(10000) },
|
||||||
|
},
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
];
|
||||||
|
|
||||||
|
mockedEstimateTokenCountSync.mockImplementation((parts: Part[]) => {
|
||||||
|
const toolName = parts[0].functionResponse?.name;
|
||||||
|
const resp = parts[0].functionResponse?.response as Record<
|
||||||
|
string,
|
||||||
|
unknown
|
||||||
|
>;
|
||||||
|
const content = (resp?.['output'] as string) ?? JSON.stringify(resp);
|
||||||
|
if (content.includes(`<${MASKING_INDICATOR_TAG}`)) return 100;
|
||||||
|
|
||||||
|
if (toolName === 't1') return 60000;
|
||||||
|
if (toolName === 't2') return 20000;
|
||||||
|
if (toolName === 't3') return 10000;
|
||||||
|
return 0;
|
||||||
|
});
|
||||||
|
|
||||||
|
// Scanned: Turn 2 (20k), Turn 1 (60k). Total = 80k.
|
||||||
|
// Turn 2: Cumulative = 20k. Protected (<= 50k).
|
||||||
|
// Turn 1: Cumulative = 80k. Crossed 50k boundary. Prunabled.
|
||||||
|
// Total Prunable = 60k (> 30k trigger).
|
||||||
|
const result = await service.mask(history, mockConfig);
|
||||||
|
|
||||||
|
expect(result.maskedCount).toBe(1);
|
||||||
|
expect(getToolResponse(result.newHistory[0].parts?.[0])).toContain(
|
||||||
|
`<${MASKING_INDICATOR_TAG}`,
|
||||||
|
);
|
||||||
|
expect(getToolResponse(result.newHistory[1].parts?.[0])).toEqual(
|
||||||
|
'B'.repeat(20000),
|
||||||
|
);
|
||||||
|
expect(getToolResponse(result.newHistory[2].parts?.[0])).toEqual(
|
||||||
|
'C'.repeat(10000),
|
||||||
|
);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should perform global aggregation for many small parts once boundary is hit', async () => {
|
||||||
|
// history.length = 12. Skip index 11 (latest).
|
||||||
|
// Indices 0-10: 10k each.
|
||||||
|
// Index 10: 10k (Sum 10k)
|
||||||
|
// Index 9: 10k (Sum 20k)
|
||||||
|
// Index 8: 10k (Sum 30k)
|
||||||
|
// Index 7: 10k (Sum 40k)
|
||||||
|
// Index 6: 10k (Sum 50k) - Boundary hit here?
|
||||||
|
// Actually, Boundary is 50k. So Index 6 crosses it.
|
||||||
|
// Index 6, 5, 4, 3, 2, 1, 0 are all prunable. (7 * 10k = 70k).
|
||||||
|
const history: Content[] = Array.from({ length: 12 }, (_, i) => ({
|
||||||
|
role: 'user',
|
||||||
|
parts: [
|
||||||
|
{
|
||||||
|
functionResponse: {
|
||||||
|
name: `tool${i}`,
|
||||||
|
response: { output: 'A'.repeat(10000) },
|
||||||
|
},
|
||||||
|
},
|
||||||
|
],
|
||||||
|
}));
|
||||||
|
|
||||||
|
mockedEstimateTokenCountSync.mockImplementation((parts: Part[]) => {
|
||||||
|
const resp = parts[0].functionResponse?.response as
|
||||||
|
| { output?: string; result?: string }
|
||||||
|
| string
|
||||||
|
| undefined;
|
||||||
|
const content =
|
||||||
|
typeof resp === 'string'
|
||||||
|
? resp
|
||||||
|
: resp?.output || resp?.result || JSON.stringify(resp);
|
||||||
|
if (content?.includes(`<${MASKING_INDICATOR_TAG}`)) return 100;
|
||||||
|
return content?.length || 0;
|
||||||
|
});
|
||||||
|
|
||||||
|
const result = await service.mask(history, mockConfig);
|
||||||
|
|
||||||
|
expect(result.maskedCount).toBe(6); // boundary at 50k protects 0-5
|
||||||
|
expect(result.tokensSaved).toBeGreaterThan(0);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should verify tool-aware previews (shell vs generic)', async () => {
|
||||||
|
const shellHistory: Content[] = [
|
||||||
|
{
|
||||||
|
role: 'user',
|
||||||
|
parts: [
|
||||||
|
{
|
||||||
|
functionResponse: {
|
||||||
|
name: SHELL_TOOL_NAME,
|
||||||
|
response: {
|
||||||
|
output:
|
||||||
|
'Output: line1\nline2\nline3\nline4\nline5\nError: failed\nExit Code: 1',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
// Protection buffer
|
||||||
|
{
|
||||||
|
role: 'user',
|
||||||
|
parts: [
|
||||||
|
{
|
||||||
|
functionResponse: {
|
||||||
|
name: 'p',
|
||||||
|
response: { output: 'p'.repeat(60000) },
|
||||||
|
},
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
// Latest turn
|
||||||
|
{
|
||||||
|
role: 'user',
|
||||||
|
parts: [{ functionResponse: { name: 'l', response: { output: 'l' } } }],
|
||||||
|
},
|
||||||
|
];
|
||||||
|
|
||||||
|
mockedEstimateTokenCountSync.mockImplementation((parts: Part[]) => {
|
||||||
|
const name = parts[0].functionResponse?.name;
|
||||||
|
const resp = parts[0].functionResponse?.response as Record<
|
||||||
|
string,
|
||||||
|
unknown
|
||||||
|
>;
|
||||||
|
const content = (resp?.['output'] as string) ?? JSON.stringify(resp);
|
||||||
|
if (content.includes(`<${MASKING_INDICATOR_TAG}`)) return 100;
|
||||||
|
|
||||||
|
if (name === SHELL_TOOL_NAME) return 100000;
|
||||||
|
if (name === 'p') return 60000;
|
||||||
|
return 100;
|
||||||
|
});
|
||||||
|
|
||||||
|
const result = await service.mask(shellHistory, mockConfig);
|
||||||
|
const maskedBash = getToolResponse(result.newHistory[0].parts?.[0]);
|
||||||
|
|
||||||
|
expect(maskedBash).toContain('Output: line1\nline2\nline3\nline4\nline5');
|
||||||
|
expect(maskedBash).toContain('Exit Code: 1');
|
||||||
|
expect(maskedBash).toContain('Error: failed');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should skip already masked content and not count it towards totals', async () => {
|
||||||
|
const history: Content[] = [
|
||||||
|
{
|
||||||
|
role: 'user',
|
||||||
|
parts: [
|
||||||
|
{
|
||||||
|
functionResponse: {
|
||||||
|
name: 'tool1',
|
||||||
|
response: {
|
||||||
|
output: `<${MASKING_INDICATOR_TAG}>...</${MASKING_INDICATOR_TAG}>`,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
{
|
||||||
|
role: 'user',
|
||||||
|
parts: [
|
||||||
|
{
|
||||||
|
functionResponse: {
|
||||||
|
name: 'tool2',
|
||||||
|
response: { output: 'A'.repeat(60000) },
|
||||||
|
},
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
];
|
||||||
|
mockedEstimateTokenCountSync.mockReturnValue(60000);
|
||||||
|
|
||||||
|
const result = await service.mask(history, mockConfig);
|
||||||
|
expect(result.maskedCount).toBe(0); // tool1 skipped, tool2 is the "latest" which is protected
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should handle different response keys in masked update', async () => {
|
||||||
|
const history: Content[] = [
|
||||||
|
{
|
||||||
|
role: 'model',
|
||||||
|
parts: [
|
||||||
|
{
|
||||||
|
functionResponse: {
|
||||||
|
name: 't1',
|
||||||
|
response: { result: 'A'.repeat(60000) },
|
||||||
|
},
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
{
|
||||||
|
role: 'model',
|
||||||
|
parts: [
|
||||||
|
{
|
||||||
|
functionResponse: {
|
||||||
|
name: 'p',
|
||||||
|
response: { output: 'P'.repeat(60000) },
|
||||||
|
},
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
{ role: 'user', parts: [{ text: 'latest' }] },
|
||||||
|
];
|
||||||
|
|
||||||
|
mockedEstimateTokenCountSync.mockImplementation((parts: Part[]) => {
|
||||||
|
const resp = parts[0].functionResponse?.response as Record<
|
||||||
|
string,
|
||||||
|
unknown
|
||||||
|
>;
|
||||||
|
const content =
|
||||||
|
(resp?.['output'] as string) ??
|
||||||
|
(resp?.['result'] as string) ??
|
||||||
|
JSON.stringify(resp);
|
||||||
|
if (content.includes(`<${MASKING_INDICATOR_TAG}`)) return 100;
|
||||||
|
return 60000;
|
||||||
|
});
|
||||||
|
|
||||||
|
const result = await service.mask(history, mockConfig);
|
||||||
|
expect(result.maskedCount).toBe(2); // both t1 and p are prunable (cumulative 60k and 120k)
|
||||||
|
const responseObj = result.newHistory[0].parts?.[0].functionResponse
|
||||||
|
?.response as Record<string, unknown>;
|
||||||
|
expect(Object.keys(responseObj)).toEqual(['output']);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should preserve multimodal parts while masking tool responses', async () => {
|
||||||
|
const history: Content[] = [
|
||||||
|
{
|
||||||
|
role: 'user',
|
||||||
|
parts: [
|
||||||
|
{
|
||||||
|
functionResponse: {
|
||||||
|
name: 't1',
|
||||||
|
response: { output: 'A'.repeat(60000) },
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
inlineData: {
|
||||||
|
data: 'base64data',
|
||||||
|
mimeType: 'image/png',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
// Protection buffer
|
||||||
|
{
|
||||||
|
role: 'user',
|
||||||
|
parts: [
|
||||||
|
{
|
||||||
|
functionResponse: {
|
||||||
|
name: 'p',
|
||||||
|
response: { output: 'p'.repeat(60000) },
|
||||||
|
},
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
// Latest turn
|
||||||
|
{ role: 'user', parts: [{ text: 'latest' }] },
|
||||||
|
];
|
||||||
|
|
||||||
|
mockedEstimateTokenCountSync.mockImplementation((parts: Part[]) => {
|
||||||
|
const resp = parts[0].functionResponse?.response as Record<
|
||||||
|
string,
|
||||||
|
unknown
|
||||||
|
>;
|
||||||
|
const content = (resp?.['output'] as string) ?? JSON.stringify(resp);
|
||||||
|
if (content.includes(`<${MASKING_INDICATOR_TAG}`)) return 100;
|
||||||
|
|
||||||
|
if (parts[0].functionResponse?.name === 't1') return 60000;
|
||||||
|
if (parts[0].functionResponse?.name === 'p') return 60000;
|
||||||
|
return 100;
|
||||||
|
});
|
||||||
|
|
||||||
|
const result = await service.mask(history, mockConfig);
|
||||||
|
|
||||||
|
expect(result.maskedCount).toBe(2); //Both t1 and p are prunable (cumulative 60k each > 50k protection)
|
||||||
|
expect(result.newHistory[0].parts).toHaveLength(2);
|
||||||
|
expect(result.newHistory[0].parts?.[0].functionResponse).toBeDefined();
|
||||||
|
expect(
|
||||||
|
(
|
||||||
|
result.newHistory[0].parts?.[0].functionResponse?.response as Record<
|
||||||
|
string,
|
||||||
|
unknown
|
||||||
|
>
|
||||||
|
)['output'],
|
||||||
|
).toContain(`<${MASKING_INDICATOR_TAG}`);
|
||||||
|
expect(result.newHistory[0].parts?.[1].inlineData).toEqual({
|
||||||
|
data: 'base64data',
|
||||||
|
mimeType: 'image/png',
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should match the expected snapshot for a masked tool output', async () => {
|
||||||
|
const history: Content[] = [
|
||||||
|
{
|
||||||
|
role: 'user',
|
||||||
|
parts: [
|
||||||
|
{
|
||||||
|
functionResponse: {
|
||||||
|
name: SHELL_TOOL_NAME,
|
||||||
|
response: {
|
||||||
|
output: 'Line\n'.repeat(25),
|
||||||
|
exitCode: 0,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
// Buffer to push shell_tool into prunable territory
|
||||||
|
{
|
||||||
|
role: 'user',
|
||||||
|
parts: [
|
||||||
|
{
|
||||||
|
functionResponse: {
|
||||||
|
name: 'padding',
|
||||||
|
response: { output: 'B'.repeat(60000) },
|
||||||
|
},
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
{ role: 'user', parts: [{ text: 'latest' }] },
|
||||||
|
];
|
||||||
|
|
||||||
|
mockedEstimateTokenCountSync.mockImplementation((parts: Part[]) => {
|
||||||
|
const resp = parts[0].functionResponse?.response as Record<
|
||||||
|
string,
|
||||||
|
unknown
|
||||||
|
>;
|
||||||
|
const content = (resp?.['output'] as string) ?? JSON.stringify(resp);
|
||||||
|
if (content.includes(`<${MASKING_INDICATOR_TAG}`)) return 100;
|
||||||
|
|
||||||
|
if (parts[0].functionResponse?.name === SHELL_TOOL_NAME) return 1000;
|
||||||
|
if (parts[0].functionResponse?.name === 'padding') return 60000;
|
||||||
|
return 10;
|
||||||
|
});
|
||||||
|
|
||||||
|
const result = await service.mask(history, mockConfig);
|
||||||
|
|
||||||
|
// Verify complete masking: only 'output' key should exist
|
||||||
|
const responseObj = result.newHistory[0].parts?.[0].functionResponse
|
||||||
|
?.response as Record<string, unknown>;
|
||||||
|
expect(Object.keys(responseObj)).toEqual(['output']);
|
||||||
|
|
||||||
|
const response = responseObj['output'] as string;
|
||||||
|
|
||||||
|
// We replace the random part of the filename for deterministic snapshots
|
||||||
|
// and normalize path separators for cross-platform compatibility
|
||||||
|
const deterministicResponse = response
|
||||||
|
.replace(
|
||||||
|
new RegExp(`${SHELL_TOOL_NAME}_[^\\s"]+\\.txt`, 'g'),
|
||||||
|
`${SHELL_TOOL_NAME}_deterministic.txt`,
|
||||||
|
)
|
||||||
|
.replace(/\\/g, '/');
|
||||||
|
|
||||||
|
expect(deterministicResponse).toMatchSnapshot();
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should not mask if masking increases token count (due to overhead)', async () => {
|
||||||
|
const history: Content[] = [
|
||||||
|
{
|
||||||
|
role: 'user',
|
||||||
|
parts: [
|
||||||
|
{
|
||||||
|
functionResponse: {
|
||||||
|
name: 'tiny_tool',
|
||||||
|
response: { output: 'tiny' },
|
||||||
|
},
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
// Protection buffer to push tiny_tool into prunable territory
|
||||||
|
{
|
||||||
|
role: 'user',
|
||||||
|
parts: [
|
||||||
|
{
|
||||||
|
functionResponse: {
|
||||||
|
name: 'padding',
|
||||||
|
response: { output: 'B'.repeat(60000) },
|
||||||
|
},
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
{ role: 'user', parts: [{ text: 'latest' }] },
|
||||||
|
];
|
||||||
|
|
||||||
|
mockedEstimateTokenCountSync.mockImplementation((parts: Part[]) => {
|
||||||
|
if (parts[0].functionResponse?.name === 'tiny_tool') return 5;
|
||||||
|
if (parts[0].functionResponse?.name === 'padding') return 60000;
|
||||||
|
return 1000; // The masked version would be huge due to boilerplate
|
||||||
|
});
|
||||||
|
|
||||||
|
const result = await service.mask(history, mockConfig);
|
||||||
|
expect(result.maskedCount).toBe(0); // padding is protected, tiny_tool would increase size
|
||||||
|
});
|
||||||
|
});
|
||||||
@@ -0,0 +1,344 @@
|
|||||||
|
/**
|
||||||
|
* @license
|
||||||
|
* Copyright 2026 Google LLC
|
||||||
|
* SPDX-License-Identifier: Apache-2.0
|
||||||
|
*/
|
||||||
|
|
||||||
|
import type { Content, Part } from '@google/genai';
|
||||||
|
import path from 'node:path';
|
||||||
|
import * as fsPromises from 'node:fs/promises';
|
||||||
|
import { estimateTokenCountSync } from '../utils/tokenCalculation.js';
|
||||||
|
import { debugLogger } from '../utils/debugLogger.js';
|
||||||
|
import { sanitizeFilenamePart } from '../utils/fileUtils.js';
|
||||||
|
import type { Config } from '../config/config.js';
|
||||||
|
import { logToolOutputMasking } from '../telemetry/loggers.js';
|
||||||
|
import { SHELL_TOOL_NAME } from '../tools/tool-names.js';
|
||||||
|
import { ToolOutputMaskingEvent } from '../telemetry/types.js';
|
||||||
|
|
||||||
|
// Tool output masking defaults
|
||||||
|
export const DEFAULT_TOOL_PROTECTION_THRESHOLD = 50000;
|
||||||
|
export const DEFAULT_MIN_PRUNABLE_TOKENS_THRESHOLD = 30000;
|
||||||
|
export const DEFAULT_PROTECT_LATEST_TURN = true;
|
||||||
|
export const MASKING_INDICATOR_TAG = 'tool_output_masked';
|
||||||
|
|
||||||
|
export const TOOL_OUTPUTS_DIR = 'tool-outputs';
|
||||||
|
|
||||||
|
export interface MaskingResult {
|
||||||
|
newHistory: Content[];
|
||||||
|
maskedCount: number;
|
||||||
|
tokensSaved: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Service to manage context window efficiency by masking bulky tool outputs (Tool Output Masking).
|
||||||
|
*
|
||||||
|
* It implements a "Hybrid Backward Scanned FIFO" algorithm to balance context relevance with
|
||||||
|
* token savings:
|
||||||
|
* 1. **Protection Window**: Protects the newest `toolProtectionThreshold` (default 50k) tool tokens
|
||||||
|
* from pruning. Optionally skips the entire latest conversation turn to ensure full context for
|
||||||
|
* the model's next response.
|
||||||
|
* 2. **Global Aggregation**: Scans backwards past the protection window to identify all remaining
|
||||||
|
* tool outputs that haven't been masked yet.
|
||||||
|
* 3. **Batch Trigger**: Trigger masking only if the total prunable tokens exceed
|
||||||
|
* `minPrunableTokensThreshold` (default 30k).
|
||||||
|
*
|
||||||
|
* @remarks
|
||||||
|
* Effectively, this means masking only starts once the conversation contains approximately 80k
|
||||||
|
* tokens of prunable tool outputs (50k protected + 30k prunable buffer). Small tool outputs
|
||||||
|
* are preserved until they collectively reach the threshold.
|
||||||
|
*/
|
||||||
|
export class ToolOutputMaskingService {
|
||||||
|
async mask(history: Content[], config: Config): Promise<MaskingResult> {
|
||||||
|
if (history.length === 0) {
|
||||||
|
return { newHistory: history, maskedCount: 0, tokensSaved: 0 };
|
||||||
|
}
|
||||||
|
|
||||||
|
let cumulativeToolTokens = 0;
|
||||||
|
let protectionBoundaryReached = false;
|
||||||
|
let totalPrunableTokens = 0;
|
||||||
|
let maskedCount = 0;
|
||||||
|
|
||||||
|
const prunableParts: Array<{
|
||||||
|
contentIndex: number;
|
||||||
|
partIndex: number;
|
||||||
|
tokens: number;
|
||||||
|
content: string;
|
||||||
|
originalPart: Part;
|
||||||
|
}> = [];
|
||||||
|
|
||||||
|
const maskingConfig = config.getToolOutputMaskingConfig();
|
||||||
|
|
||||||
|
// Decide where to start scanning.
|
||||||
|
// If PROTECT_LATEST_TURN is true, we skip the most recent message (index history.length - 1).
|
||||||
|
const scanStartIdx = maskingConfig.protectLatestTurn
|
||||||
|
? history.length - 2
|
||||||
|
: history.length - 1;
|
||||||
|
|
||||||
|
// Backward scan to identify prunable tool outputs
|
||||||
|
for (let i = scanStartIdx; i >= 0; i--) {
|
||||||
|
const content = history[i];
|
||||||
|
const parts = content.parts || [];
|
||||||
|
|
||||||
|
for (let j = parts.length - 1; j >= 0; j--) {
|
||||||
|
const part = parts[j];
|
||||||
|
|
||||||
|
// Tool outputs (functionResponse) are the primary targets for pruning because
|
||||||
|
// they often contain voluminous data (e.g., shell logs, file content) that
|
||||||
|
// can exceed context limits. We preserve other parts—such as user text,
|
||||||
|
// model reasoning, and multimodal data—because they define the conversation's
|
||||||
|
// core intent and logic, which are harder for the model to recover if lost.
|
||||||
|
if (!part.functionResponse) continue;
|
||||||
|
|
||||||
|
const toolOutputContent = this.getToolOutputContent(part);
|
||||||
|
if (!toolOutputContent || this.isAlreadyMasked(toolOutputContent)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
const partTokens = estimateTokenCountSync([part]);
|
||||||
|
|
||||||
|
if (!protectionBoundaryReached) {
|
||||||
|
cumulativeToolTokens += partTokens;
|
||||||
|
if (cumulativeToolTokens > maskingConfig.toolProtectionThreshold) {
|
||||||
|
protectionBoundaryReached = true;
|
||||||
|
// The part that crossed the boundary is prunable.
|
||||||
|
totalPrunableTokens += partTokens;
|
||||||
|
prunableParts.push({
|
||||||
|
contentIndex: i,
|
||||||
|
partIndex: j,
|
||||||
|
tokens: partTokens,
|
||||||
|
content: toolOutputContent,
|
||||||
|
originalPart: part,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
totalPrunableTokens += partTokens;
|
||||||
|
prunableParts.push({
|
||||||
|
contentIndex: i,
|
||||||
|
partIndex: j,
|
||||||
|
tokens: partTokens,
|
||||||
|
content: toolOutputContent,
|
||||||
|
originalPart: part,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Trigger pruning only if we have accumulated enough savings to justify the
|
||||||
|
// overhead of masking and file I/O (batch pruning threshold).
|
||||||
|
if (totalPrunableTokens < maskingConfig.minPrunableTokensThreshold) {
|
||||||
|
return { newHistory: history, maskedCount: 0, tokensSaved: 0 };
|
||||||
|
}
|
||||||
|
|
||||||
|
debugLogger.debug(
|
||||||
|
`[ToolOutputMasking] Triggering masking. Prunable tool tokens: ${totalPrunableTokens.toLocaleString()} (> ${maskingConfig.minPrunableTokensThreshold.toLocaleString()})`,
|
||||||
|
);
|
||||||
|
|
||||||
|
// Perform masking and offloading
|
||||||
|
const newHistory = [...history]; // Shallow copy of history
|
||||||
|
let actualTokensSaved = 0;
|
||||||
|
const toolOutputsDir = path.join(
|
||||||
|
config.storage.getHistoryDir(),
|
||||||
|
TOOL_OUTPUTS_DIR,
|
||||||
|
);
|
||||||
|
await fsPromises.mkdir(toolOutputsDir, { recursive: true });
|
||||||
|
|
||||||
|
for (const item of prunableParts) {
|
||||||
|
const { contentIndex, partIndex, content, tokens } = item;
|
||||||
|
const contentRecord = newHistory[contentIndex];
|
||||||
|
const part = contentRecord.parts![partIndex];
|
||||||
|
|
||||||
|
if (!part.functionResponse) continue;
|
||||||
|
|
||||||
|
const toolName = part.functionResponse.name || 'unknown_tool';
|
||||||
|
const callId = part.functionResponse.id || Date.now().toString();
|
||||||
|
const safeToolName = sanitizeFilenamePart(toolName).toLowerCase();
|
||||||
|
const safeCallId = sanitizeFilenamePart(callId).toLowerCase();
|
||||||
|
const fileName = `${safeToolName}_${safeCallId}_${Math.random()
|
||||||
|
.toString(36)
|
||||||
|
.substring(7)}.txt`;
|
||||||
|
const filePath = path.join(toolOutputsDir, fileName);
|
||||||
|
|
||||||
|
await fsPromises.writeFile(filePath, content, 'utf-8');
|
||||||
|
|
||||||
|
const originalResponse =
|
||||||
|
(part.functionResponse.response as Record<string, unknown>) || {};
|
||||||
|
|
||||||
|
const totalLines = content.split('\n').length;
|
||||||
|
const fileSizeMB = (
|
||||||
|
Buffer.byteLength(content, 'utf8') /
|
||||||
|
1024 /
|
||||||
|
1024
|
||||||
|
).toFixed(2);
|
||||||
|
|
||||||
|
let preview = '';
|
||||||
|
if (toolName === SHELL_TOOL_NAME) {
|
||||||
|
preview = this.formatShellPreview(originalResponse);
|
||||||
|
} else {
|
||||||
|
// General tools: Head + Tail preview (250 chars each)
|
||||||
|
if (content.length > 500) {
|
||||||
|
preview = `${content.slice(0, 250)}\n... [TRUNCATED] ...\n${content.slice(-250)}`;
|
||||||
|
} else {
|
||||||
|
preview = content;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const maskedSnippet = this.formatMaskedSnippet({
|
||||||
|
toolName,
|
||||||
|
filePath,
|
||||||
|
fileSizeMB,
|
||||||
|
totalLines,
|
||||||
|
tokens,
|
||||||
|
preview,
|
||||||
|
});
|
||||||
|
|
||||||
|
const maskedPart = {
|
||||||
|
...part,
|
||||||
|
functionResponse: {
|
||||||
|
...part.functionResponse,
|
||||||
|
response: { output: maskedSnippet },
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
const newTaskTokens = estimateTokenCountSync([maskedPart]);
|
||||||
|
const savings = tokens - newTaskTokens;
|
||||||
|
|
||||||
|
if (savings > 0) {
|
||||||
|
const newParts = [...contentRecord.parts!];
|
||||||
|
newParts[partIndex] = maskedPart;
|
||||||
|
newHistory[contentIndex] = { ...contentRecord, parts: newParts };
|
||||||
|
actualTokensSaved += savings;
|
||||||
|
maskedCount++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
debugLogger.debug(
|
||||||
|
`[ToolOutputMasking] Masked ${maskedCount} tool outputs. Saved ~${actualTokensSaved.toLocaleString()} tokens.`,
|
||||||
|
);
|
||||||
|
|
||||||
|
const result = {
|
||||||
|
newHistory,
|
||||||
|
maskedCount,
|
||||||
|
tokensSaved: actualTokensSaved,
|
||||||
|
};
|
||||||
|
|
||||||
|
if (actualTokensSaved <= 0) {
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
logToolOutputMasking(
|
||||||
|
config,
|
||||||
|
new ToolOutputMaskingEvent({
|
||||||
|
tokens_before: totalPrunableTokens,
|
||||||
|
tokens_after: totalPrunableTokens - actualTokensSaved,
|
||||||
|
masked_count: maskedCount,
|
||||||
|
total_prunable_tokens: totalPrunableTokens,
|
||||||
|
}),
|
||||||
|
);
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
private getToolOutputContent(part: Part): string | null {
|
||||||
|
if (!part.functionResponse) return null;
|
||||||
|
const response = part.functionResponse.response as Record<string, unknown>;
|
||||||
|
if (!response) return null;
|
||||||
|
|
||||||
|
// Stringify the entire response for saving.
|
||||||
|
// This handles any tool output schema automatically.
|
||||||
|
const content = JSON.stringify(response, null, 2);
|
||||||
|
|
||||||
|
// Multimodal safety check: Sibling parts (inlineData, etc.) are handled by mask()
|
||||||
|
// by keeping the original part structure and only replacing the functionResponse content.
|
||||||
|
|
||||||
|
return content;
|
||||||
|
}
|
||||||
|
|
||||||
|
private isAlreadyMasked(content: string): boolean {
|
||||||
|
return content.includes(`<${MASKING_INDICATOR_TAG}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
private formatShellPreview(response: Record<string, unknown>): string {
|
||||||
|
const content = (response['output'] || response['stdout'] || '') as string;
|
||||||
|
if (typeof content !== 'string') {
|
||||||
|
return typeof content === 'object'
|
||||||
|
? JSON.stringify(content)
|
||||||
|
: String(content);
|
||||||
|
}
|
||||||
|
|
||||||
|
// The shell tool output is structured in shell.ts with specific section prefixes:
|
||||||
|
const sectionRegex =
|
||||||
|
/^(Output|Error|Exit Code|Signal|Background PIDs|Process Group PGID): /m;
|
||||||
|
const parts = content.split(sectionRegex);
|
||||||
|
|
||||||
|
if (parts.length < 3) {
|
||||||
|
// Fallback to simple head/tail if not in expected shell.ts format
|
||||||
|
return this.formatSimplePreview(content);
|
||||||
|
}
|
||||||
|
|
||||||
|
const previewParts: string[] = [];
|
||||||
|
if (parts[0].trim()) {
|
||||||
|
previewParts.push(this.formatSimplePreview(parts[0].trim()));
|
||||||
|
}
|
||||||
|
|
||||||
|
for (let i = 1; i < parts.length; i += 2) {
|
||||||
|
const name = parts[i];
|
||||||
|
const sectionContent = parts[i + 1]?.trim() || '';
|
||||||
|
|
||||||
|
if (name === 'Output') {
|
||||||
|
previewParts.push(
|
||||||
|
`Output: ${this.formatSimplePreview(sectionContent)}`,
|
||||||
|
);
|
||||||
|
} else {
|
||||||
|
// Keep other sections (Error, Exit Code, etc.) in full as they are usually high-signal and small
|
||||||
|
previewParts.push(`${name}: ${sectionContent}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let preview = previewParts.join('\n');
|
||||||
|
|
||||||
|
// Also check root levels just in case some tool uses them or for future-proofing
|
||||||
|
const exitCode = response['exitCode'] ?? response['exit_code'];
|
||||||
|
const error = response['error'];
|
||||||
|
if (
|
||||||
|
exitCode !== undefined &&
|
||||||
|
exitCode !== 0 &&
|
||||||
|
exitCode !== null &&
|
||||||
|
!content.includes(`Exit Code: ${exitCode}`)
|
||||||
|
) {
|
||||||
|
preview += `\n[Exit Code: ${exitCode}]`;
|
||||||
|
}
|
||||||
|
if (error && !content.includes(`Error: ${error}`)) {
|
||||||
|
preview += `\n[Error: ${error}]`;
|
||||||
|
}
|
||||||
|
|
||||||
|
return preview;
|
||||||
|
}
|
||||||
|
|
||||||
|
private formatSimplePreview(content: string): string {
|
||||||
|
const lines = content.split('\n');
|
||||||
|
if (lines.length <= 20) return content;
|
||||||
|
const head = lines.slice(0, 10);
|
||||||
|
const tail = lines.slice(-10);
|
||||||
|
return `${head.join('\n')}\n\n... [${
|
||||||
|
lines.length - head.length - tail.length
|
||||||
|
} lines omitted] ...\n\n${tail.join('\n')}`;
|
||||||
|
}
|
||||||
|
|
||||||
|
private formatMaskedSnippet(params: MaskedSnippetParams): string {
|
||||||
|
const { filePath, preview } = params;
|
||||||
|
return `<${MASKING_INDICATOR_TAG}>
|
||||||
|
${preview}
|
||||||
|
|
||||||
|
Output too large. Full output available at: ${filePath}
|
||||||
|
</${MASKING_INDICATOR_TAG}>`;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
interface MaskedSnippetParams {
|
||||||
|
toolName: string;
|
||||||
|
filePath: string;
|
||||||
|
fileSizeMB: string;
|
||||||
|
totalLines: number;
|
||||||
|
tokens: number;
|
||||||
|
preview: string;
|
||||||
|
}
|
||||||
@@ -46,6 +46,7 @@ import type {
|
|||||||
ApprovalModeSwitchEvent,
|
ApprovalModeSwitchEvent,
|
||||||
ApprovalModeDurationEvent,
|
ApprovalModeDurationEvent,
|
||||||
PlanExecutionEvent,
|
PlanExecutionEvent,
|
||||||
|
ToolOutputMaskingEvent,
|
||||||
} from '../types.js';
|
} from '../types.js';
|
||||||
import { EventMetadataKey } from './event-metadata-key.js';
|
import { EventMetadataKey } from './event-metadata-key.js';
|
||||||
import type { Config } from '../../config/config.js';
|
import type { Config } from '../../config/config.js';
|
||||||
@@ -108,6 +109,7 @@ export enum EventNames {
|
|||||||
APPROVAL_MODE_SWITCH = 'approval_mode_switch',
|
APPROVAL_MODE_SWITCH = 'approval_mode_switch',
|
||||||
APPROVAL_MODE_DURATION = 'approval_mode_duration',
|
APPROVAL_MODE_DURATION = 'approval_mode_duration',
|
||||||
PLAN_EXECUTION = 'plan_execution',
|
PLAN_EXECUTION = 'plan_execution',
|
||||||
|
TOOL_OUTPUT_MASKING = 'tool_output_masking',
|
||||||
}
|
}
|
||||||
|
|
||||||
export interface LogResponse {
|
export interface LogResponse {
|
||||||
@@ -1217,8 +1219,40 @@ export class ClearcutLogger {
|
|||||||
},
|
},
|
||||||
];
|
];
|
||||||
|
|
||||||
|
const logEvent = this.createLogEvent(
|
||||||
|
EventNames.TOOL_OUTPUT_TRUNCATED,
|
||||||
|
data,
|
||||||
|
);
|
||||||
|
this.enqueueLogEvent(logEvent);
|
||||||
|
this.flushIfNeeded();
|
||||||
|
}
|
||||||
|
|
||||||
|
logToolOutputMaskingEvent(event: ToolOutputMaskingEvent): void {
|
||||||
|
const data: EventValue[] = [
|
||||||
|
{
|
||||||
|
gemini_cli_key:
|
||||||
|
EventMetadataKey.GEMINI_CLI_TOOL_OUTPUT_MASKING_TOKENS_BEFORE,
|
||||||
|
value: event.tokens_before.toString(),
|
||||||
|
},
|
||||||
|
{
|
||||||
|
gemini_cli_key:
|
||||||
|
EventMetadataKey.GEMINI_CLI_TOOL_OUTPUT_MASKING_TOKENS_AFTER,
|
||||||
|
value: event.tokens_after.toString(),
|
||||||
|
},
|
||||||
|
{
|
||||||
|
gemini_cli_key:
|
||||||
|
EventMetadataKey.GEMINI_CLI_TOOL_OUTPUT_MASKING_MASKED_COUNT,
|
||||||
|
value: event.masked_count.toString(),
|
||||||
|
},
|
||||||
|
{
|
||||||
|
gemini_cli_key:
|
||||||
|
EventMetadataKey.GEMINI_CLI_TOOL_OUTPUT_MASKING_TOTAL_PRUNABLE_TOKENS,
|
||||||
|
value: event.total_prunable_tokens.toString(),
|
||||||
|
},
|
||||||
|
];
|
||||||
|
|
||||||
this.enqueueLogEvent(
|
this.enqueueLogEvent(
|
||||||
this.createLogEvent(EventNames.TOOL_OUTPUT_TRUNCATED, data),
|
this.createLogEvent(EventNames.TOOL_OUTPUT_MASKING, data),
|
||||||
);
|
);
|
||||||
this.flushIfNeeded();
|
this.flushIfNeeded();
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -7,7 +7,7 @@
|
|||||||
// Defines valid event metadata keys for Clearcut logging.
|
// Defines valid event metadata keys for Clearcut logging.
|
||||||
export enum EventMetadataKey {
|
export enum EventMetadataKey {
|
||||||
// Deleted enums: 24
|
// Deleted enums: 24
|
||||||
// Next ID: 148
|
// Next ID: 152
|
||||||
|
|
||||||
GEMINI_CLI_KEY_UNKNOWN = 0,
|
GEMINI_CLI_KEY_UNKNOWN = 0,
|
||||||
|
|
||||||
@@ -561,4 +561,20 @@ export enum EventMetadataKey {
|
|||||||
|
|
||||||
// Logs the classifier threshold used.
|
// Logs the classifier threshold used.
|
||||||
GEMINI_CLI_ROUTING_CLASSIFIER_THRESHOLD = 147,
|
GEMINI_CLI_ROUTING_CLASSIFIER_THRESHOLD = 147,
|
||||||
|
|
||||||
|
// ==========================================================================
|
||||||
|
// Tool Output Masking Event Keys
|
||||||
|
// ==========================================================================
|
||||||
|
|
||||||
|
// Logs the total tokens in the prunable block before masking.
|
||||||
|
GEMINI_CLI_TOOL_OUTPUT_MASKING_TOKENS_BEFORE = 148,
|
||||||
|
|
||||||
|
// Logs the total tokens in the masked remnants after masking.
|
||||||
|
GEMINI_CLI_TOOL_OUTPUT_MASKING_TOKENS_AFTER = 149,
|
||||||
|
|
||||||
|
// Logs the number of tool outputs masked in this operation.
|
||||||
|
GEMINI_CLI_TOOL_OUTPUT_MASKING_MASKED_COUNT = 150,
|
||||||
|
|
||||||
|
// Logs the total prunable tokens identified at the trigger point.
|
||||||
|
GEMINI_CLI_TOOL_OUTPUT_MASKING_TOTAL_PRUNABLE_TOKENS = 151,
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -56,6 +56,7 @@ import type {
|
|||||||
StartupStatsEvent,
|
StartupStatsEvent,
|
||||||
LlmLoopCheckEvent,
|
LlmLoopCheckEvent,
|
||||||
PlanExecutionEvent,
|
PlanExecutionEvent,
|
||||||
|
ToolOutputMaskingEvent,
|
||||||
} from './types.js';
|
} from './types.js';
|
||||||
import {
|
import {
|
||||||
recordApiErrorMetrics,
|
recordApiErrorMetrics,
|
||||||
@@ -163,6 +164,21 @@ export function logToolOutputTruncated(
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export function logToolOutputMasking(
|
||||||
|
config: Config,
|
||||||
|
event: ToolOutputMaskingEvent,
|
||||||
|
): void {
|
||||||
|
ClearcutLogger.getInstance(config)?.logToolOutputMaskingEvent(event);
|
||||||
|
bufferTelemetryEvent(() => {
|
||||||
|
const logger = logs.getLogger(SERVICE_NAME);
|
||||||
|
const logRecord: LogRecord = {
|
||||||
|
body: event.toLogBody(),
|
||||||
|
attributes: event.toOpenTelemetryAttributes(config),
|
||||||
|
};
|
||||||
|
logger.emit(logRecord);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
export function logFileOperation(
|
export function logFileOperation(
|
||||||
config: Config,
|
config: Config,
|
||||||
event: FileOperationEvent,
|
event: FileOperationEvent,
|
||||||
|
|||||||
@@ -1376,6 +1376,49 @@ export class ToolOutputTruncatedEvent implements BaseTelemetryEvent {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export const EVENT_TOOL_OUTPUT_MASKING = 'gemini_cli.tool_output_masking';
|
||||||
|
|
||||||
|
export class ToolOutputMaskingEvent implements BaseTelemetryEvent {
|
||||||
|
'event.name': 'tool_output_masking';
|
||||||
|
'event.timestamp': string;
|
||||||
|
tokens_before: number;
|
||||||
|
tokens_after: number;
|
||||||
|
masked_count: number;
|
||||||
|
total_prunable_tokens: number;
|
||||||
|
|
||||||
|
constructor(details: {
|
||||||
|
tokens_before: number;
|
||||||
|
tokens_after: number;
|
||||||
|
masked_count: number;
|
||||||
|
total_prunable_tokens: number;
|
||||||
|
}) {
|
||||||
|
this['event.name'] = 'tool_output_masking';
|
||||||
|
this['event.timestamp'] = new Date().toISOString();
|
||||||
|
this.tokens_before = details.tokens_before;
|
||||||
|
this.tokens_after = details.tokens_after;
|
||||||
|
this.masked_count = details.masked_count;
|
||||||
|
this.total_prunable_tokens = details.total_prunable_tokens;
|
||||||
|
}
|
||||||
|
|
||||||
|
toOpenTelemetryAttributes(config: Config): LogAttributes {
|
||||||
|
return {
|
||||||
|
...getCommonAttributes(config),
|
||||||
|
'event.name': EVENT_TOOL_OUTPUT_MASKING,
|
||||||
|
'event.timestamp': this['event.timestamp'],
|
||||||
|
tokens_before: this.tokens_before,
|
||||||
|
tokens_after: this.tokens_after,
|
||||||
|
masked_count: this.masked_count,
|
||||||
|
total_prunable_tokens: this.total_prunable_tokens,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
toLogBody(): string {
|
||||||
|
return `Tool output masking (Masked ${this.masked_count} tool outputs. Saved ${
|
||||||
|
this.tokens_before - this.tokens_after
|
||||||
|
} tokens)`;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
export const EVENT_EXTENSION_UNINSTALL = 'gemini_cli.extension_uninstall';
|
export const EVENT_EXTENSION_UNINSTALL = 'gemini_cli.extension_uninstall';
|
||||||
export class ExtensionUninstallEvent implements BaseTelemetryEvent {
|
export class ExtensionUninstallEvent implements BaseTelemetryEvent {
|
||||||
'event.name': 'extension_uninstall';
|
'event.name': 'extension_uninstall';
|
||||||
@@ -1602,6 +1645,7 @@ export type TelemetryEvent =
|
|||||||
| LlmLoopCheckEvent
|
| LlmLoopCheckEvent
|
||||||
| StartupStatsEvent
|
| StartupStatsEvent
|
||||||
| WebFetchFallbackAttemptEvent
|
| WebFetchFallbackAttemptEvent
|
||||||
|
| ToolOutputMaskingEvent
|
||||||
| EditStrategyEvent
|
| EditStrategyEvent
|
||||||
| PlanExecutionEvent
|
| PlanExecutionEvent
|
||||||
| RewindEvent
|
| RewindEvent
|
||||||
|
|||||||
@@ -572,6 +572,14 @@ export async function fileExists(filePath: string): Promise<boolean> {
|
|||||||
const MAX_TRUNCATED_LINE_WIDTH = 1000;
|
const MAX_TRUNCATED_LINE_WIDTH = 1000;
|
||||||
const MAX_TRUNCATED_CHARS = 4000;
|
const MAX_TRUNCATED_CHARS = 4000;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sanitizes a string for use as a filename part by removing path traversal
|
||||||
|
* characters and other non-alphanumeric characters.
|
||||||
|
*/
|
||||||
|
export function sanitizeFilenamePart(part: string): string {
|
||||||
|
return part.replace(/[^a-zA-Z0-9_-]/g, '_');
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Formats a truncated message for tool output, handling multi-line and single-line (elephant) cases.
|
* Formats a truncated message for tool output, handling multi-line and single-line (elephant) cases.
|
||||||
*/
|
*/
|
||||||
@@ -623,11 +631,8 @@ export async function saveTruncatedToolOutput(
|
|||||||
id: string | number, // Accept string (callId) or number (truncationId)
|
id: string | number, // Accept string (callId) or number (truncationId)
|
||||||
projectTempDir: string,
|
projectTempDir: string,
|
||||||
): Promise<{ outputFile: string; totalLines: number }> {
|
): Promise<{ outputFile: string; totalLines: number }> {
|
||||||
const safeToolName = toolName.replace(/[^a-z0-9]/gi, '_').toLowerCase();
|
const safeToolName = sanitizeFilenamePart(toolName).toLowerCase();
|
||||||
const safeId = id
|
const safeId = sanitizeFilenamePart(id.toString()).toLowerCase();
|
||||||
.toString()
|
|
||||||
.replace(/[^a-z0-9]/gi, '_')
|
|
||||||
.toLowerCase();
|
|
||||||
const fileName = `${safeToolName}_${safeId}.txt`;
|
const fileName = `${safeToolName}_${safeId}.txt`;
|
||||||
const toolOutputDir = path.join(projectTempDir, TOOL_OUTPUT_DIR);
|
const toolOutputDir = path.join(projectTempDir, TOOL_OUTPUT_DIR);
|
||||||
const outputFile = path.join(toolOutputDir, fileName);
|
const outputFile = path.join(toolOutputDir, fileName);
|
||||||
|
|||||||
@@ -1428,6 +1428,44 @@
|
|||||||
"default": {},
|
"default": {},
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
|
"toolOutputMasking": {
|
||||||
|
"title": "Tool Output Masking",
|
||||||
|
"description": "Advanced settings for tool output masking to manage context window efficiency.",
|
||||||
|
"markdownDescription": "Advanced settings for tool output masking to manage context window efficiency.\n\n- Category: `Experimental`\n- Requires restart: `yes`\n- Default: `{}`",
|
||||||
|
"default": {},
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"enabled": {
|
||||||
|
"title": "Enable Tool Output Masking",
|
||||||
|
"description": "Enables tool output masking to save tokens.",
|
||||||
|
"markdownDescription": "Enables tool output masking to save tokens.\n\n- Category: `Experimental`\n- Requires restart: `yes`\n- Default: `false`",
|
||||||
|
"default": false,
|
||||||
|
"type": "boolean"
|
||||||
|
},
|
||||||
|
"toolProtectionThreshold": {
|
||||||
|
"title": "Tool Protection Threshold",
|
||||||
|
"description": "Minimum number of tokens to protect from masking (most recent tool outputs).",
|
||||||
|
"markdownDescription": "Minimum number of tokens to protect from masking (most recent tool outputs).\n\n- Category: `Experimental`\n- Requires restart: `yes`\n- Default: `50000`",
|
||||||
|
"default": 50000,
|
||||||
|
"type": "number"
|
||||||
|
},
|
||||||
|
"minPrunableTokensThreshold": {
|
||||||
|
"title": "Min Prunable Tokens Threshold",
|
||||||
|
"description": "Minimum prunable tokens required to trigger a masking pass.",
|
||||||
|
"markdownDescription": "Minimum prunable tokens required to trigger a masking pass.\n\n- Category: `Experimental`\n- Requires restart: `yes`\n- Default: `30000`",
|
||||||
|
"default": 30000,
|
||||||
|
"type": "number"
|
||||||
|
},
|
||||||
|
"protectLatestTurn": {
|
||||||
|
"title": "Protect Latest Turn",
|
||||||
|
"description": "Ensures the absolute latest turn is never masked, regardless of token count.",
|
||||||
|
"markdownDescription": "Ensures the absolute latest turn is never masked, regardless of token count.\n\n- Category: `Experimental`\n- Requires restart: `yes`\n- Default: `true`",
|
||||||
|
"default": true,
|
||||||
|
"type": "boolean"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"additionalProperties": false
|
||||||
|
},
|
||||||
"enableAgents": {
|
"enableAgents": {
|
||||||
"title": "Enable Agents",
|
"title": "Enable Agents",
|
||||||
"description": "Enable local and remote subagents. Warning: Experimental feature, uses YOLO mode for subagents",
|
"description": "Enable local and remote subagents. Warning: Experimental feature, uses YOLO mode for subagents",
|
||||||
|
|||||||
Reference in New Issue
Block a user