mirror of
https://github.com/google-gemini/gemini-cli.git
synced 2026-03-17 17:41:24 -07:00
feat(context): implement observation masking for tool outputs (#18389)
This commit is contained in:
7
.gemini/settings.json
Normal file
7
.gemini/settings.json
Normal file
@@ -0,0 +1,7 @@
|
||||
{
|
||||
"experimental": {
|
||||
"toolOutputMasking": {
|
||||
"enabled": true
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -806,6 +806,7 @@ export async function loadCliConfig(
|
||||
skillsSupport: settings.skills?.enabled ?? true,
|
||||
disabledSkills: settings.skills?.disabled,
|
||||
experimentalJitContext: settings.experimental?.jitContext,
|
||||
toolOutputMasking: settings.experimental?.toolOutputMasking,
|
||||
noBrowser: !!process.env['NO_BROWSER'],
|
||||
summarizeToolOutput: settings.model?.summarizeToolOutput,
|
||||
ideMode,
|
||||
|
||||
@@ -1462,6 +1462,58 @@ const SETTINGS_SCHEMA = {
|
||||
description: 'Setting to enable experimental features',
|
||||
showInDialog: false,
|
||||
properties: {
|
||||
toolOutputMasking: {
|
||||
type: 'object',
|
||||
label: 'Tool Output Masking',
|
||||
category: 'Experimental',
|
||||
requiresRestart: true,
|
||||
ignoreInDocs: true,
|
||||
default: {},
|
||||
description:
|
||||
'Advanced settings for tool output masking to manage context window efficiency.',
|
||||
showInDialog: false,
|
||||
properties: {
|
||||
enabled: {
|
||||
type: 'boolean',
|
||||
label: 'Enable Tool Output Masking',
|
||||
category: 'Experimental',
|
||||
requiresRestart: true,
|
||||
default: false,
|
||||
description: 'Enables tool output masking to save tokens.',
|
||||
showInDialog: false,
|
||||
},
|
||||
toolProtectionThreshold: {
|
||||
type: 'number',
|
||||
label: 'Tool Protection Threshold',
|
||||
category: 'Experimental',
|
||||
requiresRestart: true,
|
||||
default: 50000,
|
||||
description:
|
||||
'Minimum number of tokens to protect from masking (most recent tool outputs).',
|
||||
showInDialog: false,
|
||||
},
|
||||
minPrunableTokensThreshold: {
|
||||
type: 'number',
|
||||
label: 'Min Prunable Tokens Threshold',
|
||||
category: 'Experimental',
|
||||
requiresRestart: true,
|
||||
default: 30000,
|
||||
description:
|
||||
'Minimum prunable tokens required to trigger a masking pass.',
|
||||
showInDialog: false,
|
||||
},
|
||||
protectLatestTurn: {
|
||||
type: 'boolean',
|
||||
label: 'Protect Latest Turn',
|
||||
category: 'Experimental',
|
||||
requiresRestart: true,
|
||||
default: true,
|
||||
description:
|
||||
'Ensures the absolute latest turn is never masked, regardless of token count.',
|
||||
showInDialog: false,
|
||||
},
|
||||
},
|
||||
},
|
||||
enableAgents: {
|
||||
type: 'boolean',
|
||||
label: 'Enable Agents',
|
||||
|
||||
@@ -149,6 +149,13 @@ export interface OutputSettings {
|
||||
format?: OutputFormat;
|
||||
}
|
||||
|
||||
export interface ToolOutputMaskingConfig {
|
||||
enabled: boolean;
|
||||
toolProtectionThreshold: number;
|
||||
minPrunableTokensThreshold: number;
|
||||
protectLatestTurn: boolean;
|
||||
}
|
||||
|
||||
export interface ExtensionSetting {
|
||||
name: string;
|
||||
description: string;
|
||||
@@ -273,6 +280,11 @@ import {
|
||||
DEFAULT_FILE_FILTERING_OPTIONS,
|
||||
DEFAULT_MEMORY_FILE_FILTERING_OPTIONS,
|
||||
} from './constants.js';
|
||||
import {
|
||||
DEFAULT_TOOL_PROTECTION_THRESHOLD,
|
||||
DEFAULT_MIN_PRUNABLE_TOKENS_THRESHOLD,
|
||||
DEFAULT_PROTECT_LATEST_TURN,
|
||||
} from '../services/toolOutputMaskingService.js';
|
||||
|
||||
import {
|
||||
type ExtensionLoader,
|
||||
@@ -462,6 +474,7 @@ export interface ConfigParameters {
|
||||
disabledSkills?: string[];
|
||||
adminSkillsEnabled?: boolean;
|
||||
experimentalJitContext?: boolean;
|
||||
toolOutputMasking?: Partial<ToolOutputMaskingConfig>;
|
||||
disableLLMCorrection?: boolean;
|
||||
plan?: boolean;
|
||||
onModelChange?: (model: string) => void;
|
||||
@@ -599,6 +612,7 @@ export class Config {
|
||||
private pendingIncludeDirectories: string[];
|
||||
private readonly enableHooks: boolean;
|
||||
private readonly enableHooksUI: boolean;
|
||||
private readonly toolOutputMasking: ToolOutputMaskingConfig;
|
||||
private hooks: { [K in HookEventName]?: HookDefinition[] } | undefined;
|
||||
private projectHooks:
|
||||
| ({ [K in HookEventName]?: HookDefinition[] } & { disabled?: string[] })
|
||||
@@ -721,6 +735,18 @@ export class Config {
|
||||
this.modelAvailabilityService = new ModelAvailabilityService();
|
||||
this.previewFeatures = params.previewFeatures ?? undefined;
|
||||
this.experimentalJitContext = params.experimentalJitContext ?? false;
|
||||
this.toolOutputMasking = {
|
||||
enabled: params.toolOutputMasking?.enabled ?? false,
|
||||
toolProtectionThreshold:
|
||||
params.toolOutputMasking?.toolProtectionThreshold ??
|
||||
DEFAULT_TOOL_PROTECTION_THRESHOLD,
|
||||
minPrunableTokensThreshold:
|
||||
params.toolOutputMasking?.minPrunableTokensThreshold ??
|
||||
DEFAULT_MIN_PRUNABLE_TOKENS_THRESHOLD,
|
||||
protectLatestTurn:
|
||||
params.toolOutputMasking?.protectLatestTurn ??
|
||||
DEFAULT_PROTECT_LATEST_TURN,
|
||||
};
|
||||
this.maxSessionTurns = params.maxSessionTurns ?? -1;
|
||||
this.experimentalZedIntegration =
|
||||
params.experimentalZedIntegration ?? false;
|
||||
@@ -1445,6 +1471,14 @@ export class Config {
|
||||
return this.experimentalJitContext;
|
||||
}
|
||||
|
||||
getToolOutputMaskingEnabled(): boolean {
|
||||
return this.toolOutputMasking.enabled;
|
||||
}
|
||||
|
||||
getToolOutputMaskingConfig(): ToolOutputMaskingConfig {
|
||||
return this.toolOutputMasking;
|
||||
}
|
||||
|
||||
getGeminiMdFileCount(): number {
|
||||
if (this.experimentalJitContext && this.contextManager) {
|
||||
return this.contextManager.getLoadedPaths().size;
|
||||
|
||||
@@ -213,6 +213,7 @@ describe('Gemini Client (client.ts)', () => {
|
||||
getGlobalMemory: vi.fn().mockReturnValue(''),
|
||||
getEnvironmentMemory: vi.fn().mockReturnValue(''),
|
||||
isJitContextEnabled: vi.fn().mockReturnValue(false),
|
||||
getToolOutputMaskingEnabled: vi.fn().mockReturnValue(false),
|
||||
getDisableLoopDetection: vi.fn().mockReturnValue(false),
|
||||
|
||||
getSessionId: vi.fn().mockReturnValue('test-session-id'),
|
||||
|
||||
@@ -54,6 +54,7 @@ import { handleFallback } from '../fallback/handler.js';
|
||||
import type { RoutingContext } from '../routing/routingStrategy.js';
|
||||
import { debugLogger } from '../utils/debugLogger.js';
|
||||
import type { ModelConfigKey } from '../services/modelConfigService.js';
|
||||
import { ToolOutputMaskingService } from '../services/toolOutputMaskingService.js';
|
||||
import { calculateRequestTokenCount } from '../utils/tokenCalculation.js';
|
||||
import {
|
||||
applyModelSelection,
|
||||
@@ -84,6 +85,7 @@ export class GeminiClient {
|
||||
|
||||
private readonly loopDetector: LoopDetectionService;
|
||||
private readonly compressionService: ChatCompressionService;
|
||||
private readonly toolOutputMaskingService: ToolOutputMaskingService;
|
||||
private lastPromptId: string;
|
||||
private currentSequenceModel: string | null = null;
|
||||
private lastSentIdeContext: IdeContext | undefined;
|
||||
@@ -98,6 +100,7 @@ export class GeminiClient {
|
||||
constructor(private readonly config: Config) {
|
||||
this.loopDetector = new LoopDetectionService(config);
|
||||
this.compressionService = new ChatCompressionService();
|
||||
this.toolOutputMaskingService = new ToolOutputMaskingService();
|
||||
this.lastPromptId = this.config.getSessionId();
|
||||
|
||||
coreEvents.on(CoreEvent.ModelChanged, this.handleModelChanged);
|
||||
@@ -562,6 +565,8 @@ export class GeminiClient {
|
||||
const remainingTokenCount =
|
||||
tokenLimit(modelForLimitCheck) - this.getChat().getLastPromptTokenCount();
|
||||
|
||||
await this.tryMaskToolOutputs(this.getHistory());
|
||||
|
||||
// Estimate tokens. For text-only requests, we estimate based on character length.
|
||||
// For requests with non-text parts (like images, tools), we use the countTokens API.
|
||||
const estimatedRequestTokenCount = await calculateRequestTokenCount(
|
||||
@@ -1056,4 +1061,20 @@ export class GeminiClient {
|
||||
|
||||
return info;
|
||||
}
|
||||
|
||||
/**
|
||||
* Masks bulky tool outputs to save context window space.
|
||||
*/
|
||||
private async tryMaskToolOutputs(history: Content[]): Promise<void> {
|
||||
if (!this.config.getToolOutputMaskingEnabled()) {
|
||||
return;
|
||||
}
|
||||
const result = await this.toolOutputMaskingService.mask(
|
||||
history,
|
||||
this.config,
|
||||
);
|
||||
if (result.maskedCount > 0) {
|
||||
this.getChat().setHistory(result.newHistory);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,31 @@
|
||||
// Vitest Snapshot v1, https://vitest.dev/guide/snapshot.html
|
||||
|
||||
exports[`ToolOutputMaskingService > should match the expected snapshot for a masked tool output 1`] = `
|
||||
"<tool_output_masked>
|
||||
Line
|
||||
Line
|
||||
Line
|
||||
Line
|
||||
Line
|
||||
Line
|
||||
Line
|
||||
Line
|
||||
Line
|
||||
Line
|
||||
|
||||
... [6 lines omitted] ...
|
||||
|
||||
Line
|
||||
Line
|
||||
Line
|
||||
Line
|
||||
Line
|
||||
Line
|
||||
Line
|
||||
Line
|
||||
Line
|
||||
|
||||
|
||||
Output too large. Full output available at: /mock/history/tool-outputs/run_shell_command_deterministic.txt
|
||||
</tool_output_masked>"
|
||||
`;
|
||||
500
packages/core/src/services/toolOutputMaskingService.test.ts
Normal file
500
packages/core/src/services/toolOutputMaskingService.test.ts
Normal file
@@ -0,0 +1,500 @@
|
||||
/**
|
||||
* @license
|
||||
* Copyright 2026 Google LLC
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
import { describe, it, expect, vi, beforeEach } from 'vitest';
|
||||
import {
|
||||
ToolOutputMaskingService,
|
||||
MASKING_INDICATOR_TAG,
|
||||
} from './toolOutputMaskingService.js';
|
||||
import { SHELL_TOOL_NAME } from '../tools/tool-names.js';
|
||||
import { estimateTokenCountSync } from '../utils/tokenCalculation.js';
|
||||
import type { Config } from '../config/config.js';
|
||||
import type { Content, Part } from '@google/genai';
|
||||
|
||||
vi.mock('../utils/tokenCalculation.js', () => ({
|
||||
estimateTokenCountSync: vi.fn(),
|
||||
}));
|
||||
|
||||
vi.mock('node:fs/promises', () => ({
|
||||
mkdir: vi.fn().mockResolvedValue(undefined),
|
||||
writeFile: vi.fn().mockResolvedValue(undefined),
|
||||
}));
|
||||
|
||||
describe('ToolOutputMaskingService', () => {
|
||||
let service: ToolOutputMaskingService;
|
||||
let mockConfig: Config;
|
||||
|
||||
const mockedEstimateTokenCountSync = vi.mocked(estimateTokenCountSync);
|
||||
|
||||
beforeEach(() => {
|
||||
service = new ToolOutputMaskingService();
|
||||
mockConfig = {
|
||||
storage: {
|
||||
getHistoryDir: () => '/mock/history',
|
||||
},
|
||||
getUsageStatisticsEnabled: () => false,
|
||||
getToolOutputMaskingConfig: () => ({
|
||||
enabled: true,
|
||||
toolProtectionThreshold: 50000,
|
||||
minPrunableTokensThreshold: 30000,
|
||||
protectLatestTurn: true,
|
||||
}),
|
||||
} as unknown as Config;
|
||||
vi.clearAllMocks();
|
||||
});
|
||||
|
||||
it('should not mask if total tool tokens are below protection threshold', async () => {
|
||||
const history: Content[] = [
|
||||
{
|
||||
role: 'user',
|
||||
parts: [
|
||||
{
|
||||
functionResponse: {
|
||||
name: 'test_tool',
|
||||
response: { output: 'small output' },
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
];
|
||||
|
||||
mockedEstimateTokenCountSync.mockReturnValue(100);
|
||||
|
||||
const result = await service.mask(history, mockConfig);
|
||||
|
||||
expect(result.maskedCount).toBe(0);
|
||||
expect(result.newHistory).toEqual(history);
|
||||
});
|
||||
|
||||
const getToolResponse = (part: Part | undefined): string => {
|
||||
const resp = part?.functionResponse?.response as
|
||||
| { output: string }
|
||||
| undefined;
|
||||
return resp?.output ?? (resp as unknown as string) ?? '';
|
||||
};
|
||||
|
||||
it('should protect the latest turn and mask older outputs beyond 50k window if total > 30k', async () => {
|
||||
// History:
|
||||
// Turn 1: 60k (Oldest)
|
||||
// Turn 2: 20k
|
||||
// Turn 3: 10k (Latest) - Protected because PROTECT_LATEST_TURN is true
|
||||
const history: Content[] = [
|
||||
{
|
||||
role: 'user',
|
||||
parts: [
|
||||
{
|
||||
functionResponse: {
|
||||
name: 't1',
|
||||
response: { output: 'A'.repeat(60000) },
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
role: 'user',
|
||||
parts: [
|
||||
{
|
||||
functionResponse: {
|
||||
name: 't2',
|
||||
response: { output: 'B'.repeat(20000) },
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
role: 'user',
|
||||
parts: [
|
||||
{
|
||||
functionResponse: {
|
||||
name: 't3',
|
||||
response: { output: 'C'.repeat(10000) },
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
];
|
||||
|
||||
mockedEstimateTokenCountSync.mockImplementation((parts: Part[]) => {
|
||||
const toolName = parts[0].functionResponse?.name;
|
||||
const resp = parts[0].functionResponse?.response as Record<
|
||||
string,
|
||||
unknown
|
||||
>;
|
||||
const content = (resp?.['output'] as string) ?? JSON.stringify(resp);
|
||||
if (content.includes(`<${MASKING_INDICATOR_TAG}`)) return 100;
|
||||
|
||||
if (toolName === 't1') return 60000;
|
||||
if (toolName === 't2') return 20000;
|
||||
if (toolName === 't3') return 10000;
|
||||
return 0;
|
||||
});
|
||||
|
||||
// Scanned: Turn 2 (20k), Turn 1 (60k). Total = 80k.
|
||||
// Turn 2: Cumulative = 20k. Protected (<= 50k).
|
||||
// Turn 1: Cumulative = 80k. Crossed 50k boundary. Prunabled.
|
||||
// Total Prunable = 60k (> 30k trigger).
|
||||
const result = await service.mask(history, mockConfig);
|
||||
|
||||
expect(result.maskedCount).toBe(1);
|
||||
expect(getToolResponse(result.newHistory[0].parts?.[0])).toContain(
|
||||
`<${MASKING_INDICATOR_TAG}`,
|
||||
);
|
||||
expect(getToolResponse(result.newHistory[1].parts?.[0])).toEqual(
|
||||
'B'.repeat(20000),
|
||||
);
|
||||
expect(getToolResponse(result.newHistory[2].parts?.[0])).toEqual(
|
||||
'C'.repeat(10000),
|
||||
);
|
||||
});
|
||||
|
||||
it('should perform global aggregation for many small parts once boundary is hit', async () => {
|
||||
// history.length = 12. Skip index 11 (latest).
|
||||
// Indices 0-10: 10k each.
|
||||
// Index 10: 10k (Sum 10k)
|
||||
// Index 9: 10k (Sum 20k)
|
||||
// Index 8: 10k (Sum 30k)
|
||||
// Index 7: 10k (Sum 40k)
|
||||
// Index 6: 10k (Sum 50k) - Boundary hit here?
|
||||
// Actually, Boundary is 50k. So Index 6 crosses it.
|
||||
// Index 6, 5, 4, 3, 2, 1, 0 are all prunable. (7 * 10k = 70k).
|
||||
const history: Content[] = Array.from({ length: 12 }, (_, i) => ({
|
||||
role: 'user',
|
||||
parts: [
|
||||
{
|
||||
functionResponse: {
|
||||
name: `tool${i}`,
|
||||
response: { output: 'A'.repeat(10000) },
|
||||
},
|
||||
},
|
||||
],
|
||||
}));
|
||||
|
||||
mockedEstimateTokenCountSync.mockImplementation((parts: Part[]) => {
|
||||
const resp = parts[0].functionResponse?.response as
|
||||
| { output?: string; result?: string }
|
||||
| string
|
||||
| undefined;
|
||||
const content =
|
||||
typeof resp === 'string'
|
||||
? resp
|
||||
: resp?.output || resp?.result || JSON.stringify(resp);
|
||||
if (content?.includes(`<${MASKING_INDICATOR_TAG}`)) return 100;
|
||||
return content?.length || 0;
|
||||
});
|
||||
|
||||
const result = await service.mask(history, mockConfig);
|
||||
|
||||
expect(result.maskedCount).toBe(6); // boundary at 50k protects 0-5
|
||||
expect(result.tokensSaved).toBeGreaterThan(0);
|
||||
});
|
||||
|
||||
it('should verify tool-aware previews (shell vs generic)', async () => {
|
||||
const shellHistory: Content[] = [
|
||||
{
|
||||
role: 'user',
|
||||
parts: [
|
||||
{
|
||||
functionResponse: {
|
||||
name: SHELL_TOOL_NAME,
|
||||
response: {
|
||||
output:
|
||||
'Output: line1\nline2\nline3\nline4\nline5\nError: failed\nExit Code: 1',
|
||||
},
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
// Protection buffer
|
||||
{
|
||||
role: 'user',
|
||||
parts: [
|
||||
{
|
||||
functionResponse: {
|
||||
name: 'p',
|
||||
response: { output: 'p'.repeat(60000) },
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
// Latest turn
|
||||
{
|
||||
role: 'user',
|
||||
parts: [{ functionResponse: { name: 'l', response: { output: 'l' } } }],
|
||||
},
|
||||
];
|
||||
|
||||
mockedEstimateTokenCountSync.mockImplementation((parts: Part[]) => {
|
||||
const name = parts[0].functionResponse?.name;
|
||||
const resp = parts[0].functionResponse?.response as Record<
|
||||
string,
|
||||
unknown
|
||||
>;
|
||||
const content = (resp?.['output'] as string) ?? JSON.stringify(resp);
|
||||
if (content.includes(`<${MASKING_INDICATOR_TAG}`)) return 100;
|
||||
|
||||
if (name === SHELL_TOOL_NAME) return 100000;
|
||||
if (name === 'p') return 60000;
|
||||
return 100;
|
||||
});
|
||||
|
||||
const result = await service.mask(shellHistory, mockConfig);
|
||||
const maskedBash = getToolResponse(result.newHistory[0].parts?.[0]);
|
||||
|
||||
expect(maskedBash).toContain('Output: line1\nline2\nline3\nline4\nline5');
|
||||
expect(maskedBash).toContain('Exit Code: 1');
|
||||
expect(maskedBash).toContain('Error: failed');
|
||||
});
|
||||
|
||||
it('should skip already masked content and not count it towards totals', async () => {
|
||||
const history: Content[] = [
|
||||
{
|
||||
role: 'user',
|
||||
parts: [
|
||||
{
|
||||
functionResponse: {
|
||||
name: 'tool1',
|
||||
response: {
|
||||
output: `<${MASKING_INDICATOR_TAG}>...</${MASKING_INDICATOR_TAG}>`,
|
||||
},
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
role: 'user',
|
||||
parts: [
|
||||
{
|
||||
functionResponse: {
|
||||
name: 'tool2',
|
||||
response: { output: 'A'.repeat(60000) },
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
];
|
||||
mockedEstimateTokenCountSync.mockReturnValue(60000);
|
||||
|
||||
const result = await service.mask(history, mockConfig);
|
||||
expect(result.maskedCount).toBe(0); // tool1 skipped, tool2 is the "latest" which is protected
|
||||
});
|
||||
|
||||
it('should handle different response keys in masked update', async () => {
|
||||
const history: Content[] = [
|
||||
{
|
||||
role: 'model',
|
||||
parts: [
|
||||
{
|
||||
functionResponse: {
|
||||
name: 't1',
|
||||
response: { result: 'A'.repeat(60000) },
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
role: 'model',
|
||||
parts: [
|
||||
{
|
||||
functionResponse: {
|
||||
name: 'p',
|
||||
response: { output: 'P'.repeat(60000) },
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
{ role: 'user', parts: [{ text: 'latest' }] },
|
||||
];
|
||||
|
||||
mockedEstimateTokenCountSync.mockImplementation((parts: Part[]) => {
|
||||
const resp = parts[0].functionResponse?.response as Record<
|
||||
string,
|
||||
unknown
|
||||
>;
|
||||
const content =
|
||||
(resp?.['output'] as string) ??
|
||||
(resp?.['result'] as string) ??
|
||||
JSON.stringify(resp);
|
||||
if (content.includes(`<${MASKING_INDICATOR_TAG}`)) return 100;
|
||||
return 60000;
|
||||
});
|
||||
|
||||
const result = await service.mask(history, mockConfig);
|
||||
expect(result.maskedCount).toBe(2); // both t1 and p are prunable (cumulative 60k and 120k)
|
||||
const responseObj = result.newHistory[0].parts?.[0].functionResponse
|
||||
?.response as Record<string, unknown>;
|
||||
expect(Object.keys(responseObj)).toEqual(['output']);
|
||||
});
|
||||
|
||||
it('should preserve multimodal parts while masking tool responses', async () => {
|
||||
const history: Content[] = [
|
||||
{
|
||||
role: 'user',
|
||||
parts: [
|
||||
{
|
||||
functionResponse: {
|
||||
name: 't1',
|
||||
response: { output: 'A'.repeat(60000) },
|
||||
},
|
||||
},
|
||||
{
|
||||
inlineData: {
|
||||
data: 'base64data',
|
||||
mimeType: 'image/png',
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
// Protection buffer
|
||||
{
|
||||
role: 'user',
|
||||
parts: [
|
||||
{
|
||||
functionResponse: {
|
||||
name: 'p',
|
||||
response: { output: 'p'.repeat(60000) },
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
// Latest turn
|
||||
{ role: 'user', parts: [{ text: 'latest' }] },
|
||||
];
|
||||
|
||||
mockedEstimateTokenCountSync.mockImplementation((parts: Part[]) => {
|
||||
const resp = parts[0].functionResponse?.response as Record<
|
||||
string,
|
||||
unknown
|
||||
>;
|
||||
const content = (resp?.['output'] as string) ?? JSON.stringify(resp);
|
||||
if (content.includes(`<${MASKING_INDICATOR_TAG}`)) return 100;
|
||||
|
||||
if (parts[0].functionResponse?.name === 't1') return 60000;
|
||||
if (parts[0].functionResponse?.name === 'p') return 60000;
|
||||
return 100;
|
||||
});
|
||||
|
||||
const result = await service.mask(history, mockConfig);
|
||||
|
||||
expect(result.maskedCount).toBe(2); //Both t1 and p are prunable (cumulative 60k each > 50k protection)
|
||||
expect(result.newHistory[0].parts).toHaveLength(2);
|
||||
expect(result.newHistory[0].parts?.[0].functionResponse).toBeDefined();
|
||||
expect(
|
||||
(
|
||||
result.newHistory[0].parts?.[0].functionResponse?.response as Record<
|
||||
string,
|
||||
unknown
|
||||
>
|
||||
)['output'],
|
||||
).toContain(`<${MASKING_INDICATOR_TAG}`);
|
||||
expect(result.newHistory[0].parts?.[1].inlineData).toEqual({
|
||||
data: 'base64data',
|
||||
mimeType: 'image/png',
|
||||
});
|
||||
});
|
||||
|
||||
it('should match the expected snapshot for a masked tool output', async () => {
|
||||
const history: Content[] = [
|
||||
{
|
||||
role: 'user',
|
||||
parts: [
|
||||
{
|
||||
functionResponse: {
|
||||
name: SHELL_TOOL_NAME,
|
||||
response: {
|
||||
output: 'Line\n'.repeat(25),
|
||||
exitCode: 0,
|
||||
},
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
// Buffer to push shell_tool into prunable territory
|
||||
{
|
||||
role: 'user',
|
||||
parts: [
|
||||
{
|
||||
functionResponse: {
|
||||
name: 'padding',
|
||||
response: { output: 'B'.repeat(60000) },
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
{ role: 'user', parts: [{ text: 'latest' }] },
|
||||
];
|
||||
|
||||
mockedEstimateTokenCountSync.mockImplementation((parts: Part[]) => {
|
||||
const resp = parts[0].functionResponse?.response as Record<
|
||||
string,
|
||||
unknown
|
||||
>;
|
||||
const content = (resp?.['output'] as string) ?? JSON.stringify(resp);
|
||||
if (content.includes(`<${MASKING_INDICATOR_TAG}`)) return 100;
|
||||
|
||||
if (parts[0].functionResponse?.name === SHELL_TOOL_NAME) return 1000;
|
||||
if (parts[0].functionResponse?.name === 'padding') return 60000;
|
||||
return 10;
|
||||
});
|
||||
|
||||
const result = await service.mask(history, mockConfig);
|
||||
|
||||
// Verify complete masking: only 'output' key should exist
|
||||
const responseObj = result.newHistory[0].parts?.[0].functionResponse
|
||||
?.response as Record<string, unknown>;
|
||||
expect(Object.keys(responseObj)).toEqual(['output']);
|
||||
|
||||
const response = responseObj['output'] as string;
|
||||
|
||||
// We replace the random part of the filename for deterministic snapshots
|
||||
// and normalize path separators for cross-platform compatibility
|
||||
const deterministicResponse = response
|
||||
.replace(
|
||||
new RegExp(`${SHELL_TOOL_NAME}_[^\\s"]+\\.txt`, 'g'),
|
||||
`${SHELL_TOOL_NAME}_deterministic.txt`,
|
||||
)
|
||||
.replace(/\\/g, '/');
|
||||
|
||||
expect(deterministicResponse).toMatchSnapshot();
|
||||
});
|
||||
|
||||
it('should not mask if masking increases token count (due to overhead)', async () => {
|
||||
const history: Content[] = [
|
||||
{
|
||||
role: 'user',
|
||||
parts: [
|
||||
{
|
||||
functionResponse: {
|
||||
name: 'tiny_tool',
|
||||
response: { output: 'tiny' },
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
// Protection buffer to push tiny_tool into prunable territory
|
||||
{
|
||||
role: 'user',
|
||||
parts: [
|
||||
{
|
||||
functionResponse: {
|
||||
name: 'padding',
|
||||
response: { output: 'B'.repeat(60000) },
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
{ role: 'user', parts: [{ text: 'latest' }] },
|
||||
];
|
||||
|
||||
mockedEstimateTokenCountSync.mockImplementation((parts: Part[]) => {
|
||||
if (parts[0].functionResponse?.name === 'tiny_tool') return 5;
|
||||
if (parts[0].functionResponse?.name === 'padding') return 60000;
|
||||
return 1000; // The masked version would be huge due to boilerplate
|
||||
});
|
||||
|
||||
const result = await service.mask(history, mockConfig);
|
||||
expect(result.maskedCount).toBe(0); // padding is protected, tiny_tool would increase size
|
||||
});
|
||||
});
|
||||
344
packages/core/src/services/toolOutputMaskingService.ts
Normal file
344
packages/core/src/services/toolOutputMaskingService.ts
Normal file
@@ -0,0 +1,344 @@
|
||||
/**
|
||||
* @license
|
||||
* Copyright 2026 Google LLC
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
import type { Content, Part } from '@google/genai';
|
||||
import path from 'node:path';
|
||||
import * as fsPromises from 'node:fs/promises';
|
||||
import { estimateTokenCountSync } from '../utils/tokenCalculation.js';
|
||||
import { debugLogger } from '../utils/debugLogger.js';
|
||||
import { sanitizeFilenamePart } from '../utils/fileUtils.js';
|
||||
import type { Config } from '../config/config.js';
|
||||
import { logToolOutputMasking } from '../telemetry/loggers.js';
|
||||
import { SHELL_TOOL_NAME } from '../tools/tool-names.js';
|
||||
import { ToolOutputMaskingEvent } from '../telemetry/types.js';
|
||||
|
||||
// Tool output masking defaults
|
||||
export const DEFAULT_TOOL_PROTECTION_THRESHOLD = 50000;
|
||||
export const DEFAULT_MIN_PRUNABLE_TOKENS_THRESHOLD = 30000;
|
||||
export const DEFAULT_PROTECT_LATEST_TURN = true;
|
||||
export const MASKING_INDICATOR_TAG = 'tool_output_masked';
|
||||
|
||||
export const TOOL_OUTPUTS_DIR = 'tool-outputs';
|
||||
|
||||
export interface MaskingResult {
|
||||
newHistory: Content[];
|
||||
maskedCount: number;
|
||||
tokensSaved: number;
|
||||
}
|
||||
|
||||
/**
|
||||
* Service to manage context window efficiency by masking bulky tool outputs (Tool Output Masking).
|
||||
*
|
||||
* It implements a "Hybrid Backward Scanned FIFO" algorithm to balance context relevance with
|
||||
* token savings:
|
||||
* 1. **Protection Window**: Protects the newest `toolProtectionThreshold` (default 50k) tool tokens
|
||||
* from pruning. Optionally skips the entire latest conversation turn to ensure full context for
|
||||
* the model's next response.
|
||||
* 2. **Global Aggregation**: Scans backwards past the protection window to identify all remaining
|
||||
* tool outputs that haven't been masked yet.
|
||||
* 3. **Batch Trigger**: Trigger masking only if the total prunable tokens exceed
|
||||
* `minPrunableTokensThreshold` (default 30k).
|
||||
*
|
||||
* @remarks
|
||||
* Effectively, this means masking only starts once the conversation contains approximately 80k
|
||||
* tokens of prunable tool outputs (50k protected + 30k prunable buffer). Small tool outputs
|
||||
* are preserved until they collectively reach the threshold.
|
||||
*/
|
||||
export class ToolOutputMaskingService {
|
||||
async mask(history: Content[], config: Config): Promise<MaskingResult> {
|
||||
if (history.length === 0) {
|
||||
return { newHistory: history, maskedCount: 0, tokensSaved: 0 };
|
||||
}
|
||||
|
||||
let cumulativeToolTokens = 0;
|
||||
let protectionBoundaryReached = false;
|
||||
let totalPrunableTokens = 0;
|
||||
let maskedCount = 0;
|
||||
|
||||
const prunableParts: Array<{
|
||||
contentIndex: number;
|
||||
partIndex: number;
|
||||
tokens: number;
|
||||
content: string;
|
||||
originalPart: Part;
|
||||
}> = [];
|
||||
|
||||
const maskingConfig = config.getToolOutputMaskingConfig();
|
||||
|
||||
// Decide where to start scanning.
|
||||
// If PROTECT_LATEST_TURN is true, we skip the most recent message (index history.length - 1).
|
||||
const scanStartIdx = maskingConfig.protectLatestTurn
|
||||
? history.length - 2
|
||||
: history.length - 1;
|
||||
|
||||
// Backward scan to identify prunable tool outputs
|
||||
for (let i = scanStartIdx; i >= 0; i--) {
|
||||
const content = history[i];
|
||||
const parts = content.parts || [];
|
||||
|
||||
for (let j = parts.length - 1; j >= 0; j--) {
|
||||
const part = parts[j];
|
||||
|
||||
// Tool outputs (functionResponse) are the primary targets for pruning because
|
||||
// they often contain voluminous data (e.g., shell logs, file content) that
|
||||
// can exceed context limits. We preserve other parts—such as user text,
|
||||
// model reasoning, and multimodal data—because they define the conversation's
|
||||
// core intent and logic, which are harder for the model to recover if lost.
|
||||
if (!part.functionResponse) continue;
|
||||
|
||||
const toolOutputContent = this.getToolOutputContent(part);
|
||||
if (!toolOutputContent || this.isAlreadyMasked(toolOutputContent)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const partTokens = estimateTokenCountSync([part]);
|
||||
|
||||
if (!protectionBoundaryReached) {
|
||||
cumulativeToolTokens += partTokens;
|
||||
if (cumulativeToolTokens > maskingConfig.toolProtectionThreshold) {
|
||||
protectionBoundaryReached = true;
|
||||
// The part that crossed the boundary is prunable.
|
||||
totalPrunableTokens += partTokens;
|
||||
prunableParts.push({
|
||||
contentIndex: i,
|
||||
partIndex: j,
|
||||
tokens: partTokens,
|
||||
content: toolOutputContent,
|
||||
originalPart: part,
|
||||
});
|
||||
}
|
||||
} else {
|
||||
totalPrunableTokens += partTokens;
|
||||
prunableParts.push({
|
||||
contentIndex: i,
|
||||
partIndex: j,
|
||||
tokens: partTokens,
|
||||
content: toolOutputContent,
|
||||
originalPart: part,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Trigger pruning only if we have accumulated enough savings to justify the
|
||||
// overhead of masking and file I/O (batch pruning threshold).
|
||||
if (totalPrunableTokens < maskingConfig.minPrunableTokensThreshold) {
|
||||
return { newHistory: history, maskedCount: 0, tokensSaved: 0 };
|
||||
}
|
||||
|
||||
debugLogger.debug(
|
||||
`[ToolOutputMasking] Triggering masking. Prunable tool tokens: ${totalPrunableTokens.toLocaleString()} (> ${maskingConfig.minPrunableTokensThreshold.toLocaleString()})`,
|
||||
);
|
||||
|
||||
// Perform masking and offloading
|
||||
const newHistory = [...history]; // Shallow copy of history
|
||||
let actualTokensSaved = 0;
|
||||
const toolOutputsDir = path.join(
|
||||
config.storage.getHistoryDir(),
|
||||
TOOL_OUTPUTS_DIR,
|
||||
);
|
||||
await fsPromises.mkdir(toolOutputsDir, { recursive: true });
|
||||
|
||||
for (const item of prunableParts) {
|
||||
const { contentIndex, partIndex, content, tokens } = item;
|
||||
const contentRecord = newHistory[contentIndex];
|
||||
const part = contentRecord.parts![partIndex];
|
||||
|
||||
if (!part.functionResponse) continue;
|
||||
|
||||
const toolName = part.functionResponse.name || 'unknown_tool';
|
||||
const callId = part.functionResponse.id || Date.now().toString();
|
||||
const safeToolName = sanitizeFilenamePart(toolName).toLowerCase();
|
||||
const safeCallId = sanitizeFilenamePart(callId).toLowerCase();
|
||||
const fileName = `${safeToolName}_${safeCallId}_${Math.random()
|
||||
.toString(36)
|
||||
.substring(7)}.txt`;
|
||||
const filePath = path.join(toolOutputsDir, fileName);
|
||||
|
||||
await fsPromises.writeFile(filePath, content, 'utf-8');
|
||||
|
||||
const originalResponse =
|
||||
(part.functionResponse.response as Record<string, unknown>) || {};
|
||||
|
||||
const totalLines = content.split('\n').length;
|
||||
const fileSizeMB = (
|
||||
Buffer.byteLength(content, 'utf8') /
|
||||
1024 /
|
||||
1024
|
||||
).toFixed(2);
|
||||
|
||||
let preview = '';
|
||||
if (toolName === SHELL_TOOL_NAME) {
|
||||
preview = this.formatShellPreview(originalResponse);
|
||||
} else {
|
||||
// General tools: Head + Tail preview (250 chars each)
|
||||
if (content.length > 500) {
|
||||
preview = `${content.slice(0, 250)}\n... [TRUNCATED] ...\n${content.slice(-250)}`;
|
||||
} else {
|
||||
preview = content;
|
||||
}
|
||||
}
|
||||
|
||||
const maskedSnippet = this.formatMaskedSnippet({
|
||||
toolName,
|
||||
filePath,
|
||||
fileSizeMB,
|
||||
totalLines,
|
||||
tokens,
|
||||
preview,
|
||||
});
|
||||
|
||||
const maskedPart = {
|
||||
...part,
|
||||
functionResponse: {
|
||||
...part.functionResponse,
|
||||
response: { output: maskedSnippet },
|
||||
},
|
||||
};
|
||||
|
||||
const newTaskTokens = estimateTokenCountSync([maskedPart]);
|
||||
const savings = tokens - newTaskTokens;
|
||||
|
||||
if (savings > 0) {
|
||||
const newParts = [...contentRecord.parts!];
|
||||
newParts[partIndex] = maskedPart;
|
||||
newHistory[contentIndex] = { ...contentRecord, parts: newParts };
|
||||
actualTokensSaved += savings;
|
||||
maskedCount++;
|
||||
}
|
||||
}
|
||||
|
||||
debugLogger.debug(
|
||||
`[ToolOutputMasking] Masked ${maskedCount} tool outputs. Saved ~${actualTokensSaved.toLocaleString()} tokens.`,
|
||||
);
|
||||
|
||||
const result = {
|
||||
newHistory,
|
||||
maskedCount,
|
||||
tokensSaved: actualTokensSaved,
|
||||
};
|
||||
|
||||
if (actualTokensSaved <= 0) {
|
||||
return result;
|
||||
}
|
||||
|
||||
logToolOutputMasking(
|
||||
config,
|
||||
new ToolOutputMaskingEvent({
|
||||
tokens_before: totalPrunableTokens,
|
||||
tokens_after: totalPrunableTokens - actualTokensSaved,
|
||||
masked_count: maskedCount,
|
||||
total_prunable_tokens: totalPrunableTokens,
|
||||
}),
|
||||
);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
private getToolOutputContent(part: Part): string | null {
|
||||
if (!part.functionResponse) return null;
|
||||
const response = part.functionResponse.response as Record<string, unknown>;
|
||||
if (!response) return null;
|
||||
|
||||
// Stringify the entire response for saving.
|
||||
// This handles any tool output schema automatically.
|
||||
const content = JSON.stringify(response, null, 2);
|
||||
|
||||
// Multimodal safety check: Sibling parts (inlineData, etc.) are handled by mask()
|
||||
// by keeping the original part structure and only replacing the functionResponse content.
|
||||
|
||||
return content;
|
||||
}
|
||||
|
||||
private isAlreadyMasked(content: string): boolean {
|
||||
return content.includes(`<${MASKING_INDICATOR_TAG}`);
|
||||
}
|
||||
|
||||
private formatShellPreview(response: Record<string, unknown>): string {
|
||||
const content = (response['output'] || response['stdout'] || '') as string;
|
||||
if (typeof content !== 'string') {
|
||||
return typeof content === 'object'
|
||||
? JSON.stringify(content)
|
||||
: String(content);
|
||||
}
|
||||
|
||||
// The shell tool output is structured in shell.ts with specific section prefixes:
|
||||
const sectionRegex =
|
||||
/^(Output|Error|Exit Code|Signal|Background PIDs|Process Group PGID): /m;
|
||||
const parts = content.split(sectionRegex);
|
||||
|
||||
if (parts.length < 3) {
|
||||
// Fallback to simple head/tail if not in expected shell.ts format
|
||||
return this.formatSimplePreview(content);
|
||||
}
|
||||
|
||||
const previewParts: string[] = [];
|
||||
if (parts[0].trim()) {
|
||||
previewParts.push(this.formatSimplePreview(parts[0].trim()));
|
||||
}
|
||||
|
||||
for (let i = 1; i < parts.length; i += 2) {
|
||||
const name = parts[i];
|
||||
const sectionContent = parts[i + 1]?.trim() || '';
|
||||
|
||||
if (name === 'Output') {
|
||||
previewParts.push(
|
||||
`Output: ${this.formatSimplePreview(sectionContent)}`,
|
||||
);
|
||||
} else {
|
||||
// Keep other sections (Error, Exit Code, etc.) in full as they are usually high-signal and small
|
||||
previewParts.push(`${name}: ${sectionContent}`);
|
||||
}
|
||||
}
|
||||
|
||||
let preview = previewParts.join('\n');
|
||||
|
||||
// Also check root levels just in case some tool uses them or for future-proofing
|
||||
const exitCode = response['exitCode'] ?? response['exit_code'];
|
||||
const error = response['error'];
|
||||
if (
|
||||
exitCode !== undefined &&
|
||||
exitCode !== 0 &&
|
||||
exitCode !== null &&
|
||||
!content.includes(`Exit Code: ${exitCode}`)
|
||||
) {
|
||||
preview += `\n[Exit Code: ${exitCode}]`;
|
||||
}
|
||||
if (error && !content.includes(`Error: ${error}`)) {
|
||||
preview += `\n[Error: ${error}]`;
|
||||
}
|
||||
|
||||
return preview;
|
||||
}
|
||||
|
||||
private formatSimplePreview(content: string): string {
|
||||
const lines = content.split('\n');
|
||||
if (lines.length <= 20) return content;
|
||||
const head = lines.slice(0, 10);
|
||||
const tail = lines.slice(-10);
|
||||
return `${head.join('\n')}\n\n... [${
|
||||
lines.length - head.length - tail.length
|
||||
} lines omitted] ...\n\n${tail.join('\n')}`;
|
||||
}
|
||||
|
||||
private formatMaskedSnippet(params: MaskedSnippetParams): string {
|
||||
const { filePath, preview } = params;
|
||||
return `<${MASKING_INDICATOR_TAG}>
|
||||
${preview}
|
||||
|
||||
Output too large. Full output available at: ${filePath}
|
||||
</${MASKING_INDICATOR_TAG}>`;
|
||||
}
|
||||
}
|
||||
|
||||
interface MaskedSnippetParams {
|
||||
toolName: string;
|
||||
filePath: string;
|
||||
fileSizeMB: string;
|
||||
totalLines: number;
|
||||
tokens: number;
|
||||
preview: string;
|
||||
}
|
||||
@@ -46,6 +46,7 @@ import type {
|
||||
ApprovalModeSwitchEvent,
|
||||
ApprovalModeDurationEvent,
|
||||
PlanExecutionEvent,
|
||||
ToolOutputMaskingEvent,
|
||||
} from '../types.js';
|
||||
import { EventMetadataKey } from './event-metadata-key.js';
|
||||
import type { Config } from '../../config/config.js';
|
||||
@@ -108,6 +109,7 @@ export enum EventNames {
|
||||
APPROVAL_MODE_SWITCH = 'approval_mode_switch',
|
||||
APPROVAL_MODE_DURATION = 'approval_mode_duration',
|
||||
PLAN_EXECUTION = 'plan_execution',
|
||||
TOOL_OUTPUT_MASKING = 'tool_output_masking',
|
||||
}
|
||||
|
||||
export interface LogResponse {
|
||||
@@ -1217,8 +1219,40 @@ export class ClearcutLogger {
|
||||
},
|
||||
];
|
||||
|
||||
const logEvent = this.createLogEvent(
|
||||
EventNames.TOOL_OUTPUT_TRUNCATED,
|
||||
data,
|
||||
);
|
||||
this.enqueueLogEvent(logEvent);
|
||||
this.flushIfNeeded();
|
||||
}
|
||||
|
||||
logToolOutputMaskingEvent(event: ToolOutputMaskingEvent): void {
|
||||
const data: EventValue[] = [
|
||||
{
|
||||
gemini_cli_key:
|
||||
EventMetadataKey.GEMINI_CLI_TOOL_OUTPUT_MASKING_TOKENS_BEFORE,
|
||||
value: event.tokens_before.toString(),
|
||||
},
|
||||
{
|
||||
gemini_cli_key:
|
||||
EventMetadataKey.GEMINI_CLI_TOOL_OUTPUT_MASKING_TOKENS_AFTER,
|
||||
value: event.tokens_after.toString(),
|
||||
},
|
||||
{
|
||||
gemini_cli_key:
|
||||
EventMetadataKey.GEMINI_CLI_TOOL_OUTPUT_MASKING_MASKED_COUNT,
|
||||
value: event.masked_count.toString(),
|
||||
},
|
||||
{
|
||||
gemini_cli_key:
|
||||
EventMetadataKey.GEMINI_CLI_TOOL_OUTPUT_MASKING_TOTAL_PRUNABLE_TOKENS,
|
||||
value: event.total_prunable_tokens.toString(),
|
||||
},
|
||||
];
|
||||
|
||||
this.enqueueLogEvent(
|
||||
this.createLogEvent(EventNames.TOOL_OUTPUT_TRUNCATED, data),
|
||||
this.createLogEvent(EventNames.TOOL_OUTPUT_MASKING, data),
|
||||
);
|
||||
this.flushIfNeeded();
|
||||
}
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
// Defines valid event metadata keys for Clearcut logging.
|
||||
export enum EventMetadataKey {
|
||||
// Deleted enums: 24
|
||||
// Next ID: 148
|
||||
// Next ID: 152
|
||||
|
||||
GEMINI_CLI_KEY_UNKNOWN = 0,
|
||||
|
||||
@@ -561,4 +561,20 @@ export enum EventMetadataKey {
|
||||
|
||||
// Logs the classifier threshold used.
|
||||
GEMINI_CLI_ROUTING_CLASSIFIER_THRESHOLD = 147,
|
||||
|
||||
// ==========================================================================
|
||||
// Tool Output Masking Event Keys
|
||||
// ==========================================================================
|
||||
|
||||
// Logs the total tokens in the prunable block before masking.
|
||||
GEMINI_CLI_TOOL_OUTPUT_MASKING_TOKENS_BEFORE = 148,
|
||||
|
||||
// Logs the total tokens in the masked remnants after masking.
|
||||
GEMINI_CLI_TOOL_OUTPUT_MASKING_TOKENS_AFTER = 149,
|
||||
|
||||
// Logs the number of tool outputs masked in this operation.
|
||||
GEMINI_CLI_TOOL_OUTPUT_MASKING_MASKED_COUNT = 150,
|
||||
|
||||
// Logs the total prunable tokens identified at the trigger point.
|
||||
GEMINI_CLI_TOOL_OUTPUT_MASKING_TOTAL_PRUNABLE_TOKENS = 151,
|
||||
}
|
||||
|
||||
@@ -56,6 +56,7 @@ import type {
|
||||
StartupStatsEvent,
|
||||
LlmLoopCheckEvent,
|
||||
PlanExecutionEvent,
|
||||
ToolOutputMaskingEvent,
|
||||
} from './types.js';
|
||||
import {
|
||||
recordApiErrorMetrics,
|
||||
@@ -163,6 +164,21 @@ export function logToolOutputTruncated(
|
||||
});
|
||||
}
|
||||
|
||||
export function logToolOutputMasking(
|
||||
config: Config,
|
||||
event: ToolOutputMaskingEvent,
|
||||
): void {
|
||||
ClearcutLogger.getInstance(config)?.logToolOutputMaskingEvent(event);
|
||||
bufferTelemetryEvent(() => {
|
||||
const logger = logs.getLogger(SERVICE_NAME);
|
||||
const logRecord: LogRecord = {
|
||||
body: event.toLogBody(),
|
||||
attributes: event.toOpenTelemetryAttributes(config),
|
||||
};
|
||||
logger.emit(logRecord);
|
||||
});
|
||||
}
|
||||
|
||||
export function logFileOperation(
|
||||
config: Config,
|
||||
event: FileOperationEvent,
|
||||
|
||||
@@ -1376,6 +1376,49 @@ export class ToolOutputTruncatedEvent implements BaseTelemetryEvent {
|
||||
}
|
||||
}
|
||||
|
||||
export const EVENT_TOOL_OUTPUT_MASKING = 'gemini_cli.tool_output_masking';
|
||||
|
||||
export class ToolOutputMaskingEvent implements BaseTelemetryEvent {
|
||||
'event.name': 'tool_output_masking';
|
||||
'event.timestamp': string;
|
||||
tokens_before: number;
|
||||
tokens_after: number;
|
||||
masked_count: number;
|
||||
total_prunable_tokens: number;
|
||||
|
||||
constructor(details: {
|
||||
tokens_before: number;
|
||||
tokens_after: number;
|
||||
masked_count: number;
|
||||
total_prunable_tokens: number;
|
||||
}) {
|
||||
this['event.name'] = 'tool_output_masking';
|
||||
this['event.timestamp'] = new Date().toISOString();
|
||||
this.tokens_before = details.tokens_before;
|
||||
this.tokens_after = details.tokens_after;
|
||||
this.masked_count = details.masked_count;
|
||||
this.total_prunable_tokens = details.total_prunable_tokens;
|
||||
}
|
||||
|
||||
toOpenTelemetryAttributes(config: Config): LogAttributes {
|
||||
return {
|
||||
...getCommonAttributes(config),
|
||||
'event.name': EVENT_TOOL_OUTPUT_MASKING,
|
||||
'event.timestamp': this['event.timestamp'],
|
||||
tokens_before: this.tokens_before,
|
||||
tokens_after: this.tokens_after,
|
||||
masked_count: this.masked_count,
|
||||
total_prunable_tokens: this.total_prunable_tokens,
|
||||
};
|
||||
}
|
||||
|
||||
toLogBody(): string {
|
||||
return `Tool output masking (Masked ${this.masked_count} tool outputs. Saved ${
|
||||
this.tokens_before - this.tokens_after
|
||||
} tokens)`;
|
||||
}
|
||||
}
|
||||
|
||||
export const EVENT_EXTENSION_UNINSTALL = 'gemini_cli.extension_uninstall';
|
||||
export class ExtensionUninstallEvent implements BaseTelemetryEvent {
|
||||
'event.name': 'extension_uninstall';
|
||||
@@ -1602,6 +1645,7 @@ export type TelemetryEvent =
|
||||
| LlmLoopCheckEvent
|
||||
| StartupStatsEvent
|
||||
| WebFetchFallbackAttemptEvent
|
||||
| ToolOutputMaskingEvent
|
||||
| EditStrategyEvent
|
||||
| PlanExecutionEvent
|
||||
| RewindEvent
|
||||
|
||||
@@ -572,6 +572,14 @@ export async function fileExists(filePath: string): Promise<boolean> {
|
||||
const MAX_TRUNCATED_LINE_WIDTH = 1000;
|
||||
const MAX_TRUNCATED_CHARS = 4000;
|
||||
|
||||
/**
|
||||
* Sanitizes a string for use as a filename part by removing path traversal
|
||||
* characters and other non-alphanumeric characters.
|
||||
*/
|
||||
export function sanitizeFilenamePart(part: string): string {
|
||||
return part.replace(/[^a-zA-Z0-9_-]/g, '_');
|
||||
}
|
||||
|
||||
/**
|
||||
* Formats a truncated message for tool output, handling multi-line and single-line (elephant) cases.
|
||||
*/
|
||||
@@ -623,11 +631,8 @@ export async function saveTruncatedToolOutput(
|
||||
id: string | number, // Accept string (callId) or number (truncationId)
|
||||
projectTempDir: string,
|
||||
): Promise<{ outputFile: string; totalLines: number }> {
|
||||
const safeToolName = toolName.replace(/[^a-z0-9]/gi, '_').toLowerCase();
|
||||
const safeId = id
|
||||
.toString()
|
||||
.replace(/[^a-z0-9]/gi, '_')
|
||||
.toLowerCase();
|
||||
const safeToolName = sanitizeFilenamePart(toolName).toLowerCase();
|
||||
const safeId = sanitizeFilenamePart(id.toString()).toLowerCase();
|
||||
const fileName = `${safeToolName}_${safeId}.txt`;
|
||||
const toolOutputDir = path.join(projectTempDir, TOOL_OUTPUT_DIR);
|
||||
const outputFile = path.join(toolOutputDir, fileName);
|
||||
|
||||
@@ -1428,6 +1428,44 @@
|
||||
"default": {},
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"toolOutputMasking": {
|
||||
"title": "Tool Output Masking",
|
||||
"description": "Advanced settings for tool output masking to manage context window efficiency.",
|
||||
"markdownDescription": "Advanced settings for tool output masking to manage context window efficiency.\n\n- Category: `Experimental`\n- Requires restart: `yes`\n- Default: `{}`",
|
||||
"default": {},
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"enabled": {
|
||||
"title": "Enable Tool Output Masking",
|
||||
"description": "Enables tool output masking to save tokens.",
|
||||
"markdownDescription": "Enables tool output masking to save tokens.\n\n- Category: `Experimental`\n- Requires restart: `yes`\n- Default: `false`",
|
||||
"default": false,
|
||||
"type": "boolean"
|
||||
},
|
||||
"toolProtectionThreshold": {
|
||||
"title": "Tool Protection Threshold",
|
||||
"description": "Minimum number of tokens to protect from masking (most recent tool outputs).",
|
||||
"markdownDescription": "Minimum number of tokens to protect from masking (most recent tool outputs).\n\n- Category: `Experimental`\n- Requires restart: `yes`\n- Default: `50000`",
|
||||
"default": 50000,
|
||||
"type": "number"
|
||||
},
|
||||
"minPrunableTokensThreshold": {
|
||||
"title": "Min Prunable Tokens Threshold",
|
||||
"description": "Minimum prunable tokens required to trigger a masking pass.",
|
||||
"markdownDescription": "Minimum prunable tokens required to trigger a masking pass.\n\n- Category: `Experimental`\n- Requires restart: `yes`\n- Default: `30000`",
|
||||
"default": 30000,
|
||||
"type": "number"
|
||||
},
|
||||
"protectLatestTurn": {
|
||||
"title": "Protect Latest Turn",
|
||||
"description": "Ensures the absolute latest turn is never masked, regardless of token count.",
|
||||
"markdownDescription": "Ensures the absolute latest turn is never masked, regardless of token count.\n\n- Category: `Experimental`\n- Requires restart: `yes`\n- Default: `true`",
|
||||
"default": true,
|
||||
"type": "boolean"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false
|
||||
},
|
||||
"enableAgents": {
|
||||
"title": "Enable Agents",
|
||||
"description": "Enable local and remote subagents. Warning: Experimental feature, uses YOLO mode for subagents",
|
||||
|
||||
Reference in New Issue
Block a user