From a1f2aacd1e4506ee610b0b095fc33588c9d0ea5a Mon Sep 17 00:00:00 2001 From: Abhi <43648792+abhipatel12@users.noreply.github.com> Date: Sat, 7 Feb 2026 18:56:45 -0500 Subject: [PATCH] feat(core): exempt high-signal tools from output masking (#18545) --- .../services/toolOutputMaskingService.test.ts | 115 +++++++++++++++++- .../src/services/toolOutputMaskingService.ts | 26 +++- 2 files changed, 139 insertions(+), 2 deletions(-) diff --git a/packages/core/src/services/toolOutputMaskingService.test.ts b/packages/core/src/services/toolOutputMaskingService.test.ts index 26e44c4d17..08d8187ff3 100644 --- a/packages/core/src/services/toolOutputMaskingService.test.ts +++ b/packages/core/src/services/toolOutputMaskingService.test.ts @@ -12,7 +12,11 @@ import { ToolOutputMaskingService, MASKING_INDICATOR_TAG, } from './toolOutputMaskingService.js'; -import { SHELL_TOOL_NAME } from '../tools/tool-names.js'; +import { + SHELL_TOOL_NAME, + ACTIVATE_SKILL_TOOL_NAME, + MEMORY_TOOL_NAME, +} from '../tools/tool-names.js'; import { estimateTokenCountSync } from '../utils/tokenCalculation.js'; import type { Config } from '../config/config.js'; import type { Content, Part } from '@google/genai'; @@ -511,4 +515,113 @@ describe('ToolOutputMaskingService', () => { const result = await service.mask(history, mockConfig); expect(result.maskedCount).toBe(0); // padding is protected, tiny_tool would increase size }); + + it('should never mask exempt tools (like activate_skill) even if they are deep in history', async () => { + const history: Content[] = [ + { + role: 'user', + parts: [ + { + functionResponse: { + name: ACTIVATE_SKILL_TOOL_NAME, + response: { output: 'High value instructions for skill' }, + }, + }, + ], + }, + { + role: 'user', + parts: [ + { + functionResponse: { + name: MEMORY_TOOL_NAME, + response: { output: 'Important user preference' }, + }, + }, + ], + }, + { + role: 'user', + parts: [ + { + functionResponse: { + name: 'bulky_tool', + response: { output: 'A'.repeat(60000) }, + }, + }, + ], + }, + // Protection buffer + { + role: 'user', + parts: [ + { + functionResponse: { + name: 'padding', + response: { output: 'B'.repeat(60000) }, + }, + }, + ], + }, + { role: 'user', parts: [{ text: 'latest' }] }, + ]; + + mockedEstimateTokenCountSync.mockImplementation((parts: Part[]) => { + const resp = parts[0].functionResponse?.response as Record< + string, + unknown + >; + const content = (resp?.['output'] as string) ?? JSON.stringify(resp); + if (content.includes(`<${MASKING_INDICATOR_TAG}`)) return 100; + + const name = parts[0].functionResponse?.name; + if (name === ACTIVATE_SKILL_TOOL_NAME) return 1000; + if (name === MEMORY_TOOL_NAME) return 500; + if (name === 'bulky_tool') return 60000; + if (name === 'padding') return 60000; + return 10; + }); + + const result = await service.mask(history, mockConfig); + + // Both 'bulky_tool' and 'padding' should be masked. + // 'padding' (Index 3) crosses the 50k protection boundary immediately. + // ACTIVATE_SKILL and MEMORY are exempt. + expect(result.maskedCount).toBe(2); + expect(result.newHistory[0].parts?.[0].functionResponse?.name).toBe( + ACTIVATE_SKILL_TOOL_NAME, + ); + expect( + ( + result.newHistory[0].parts?.[0].functionResponse?.response as Record< + string, + unknown + > + )['output'], + ).toBe('High value instructions for skill'); + + expect(result.newHistory[1].parts?.[0].functionResponse?.name).toBe( + MEMORY_TOOL_NAME, + ); + expect( + ( + result.newHistory[1].parts?.[0].functionResponse?.response as Record< + string, + unknown + > + )['output'], + ).toBe('Important user preference'); + + expect(result.newHistory[2].parts?.[0].functionResponse?.name).toBe( + 'bulky_tool', + ); + expect( + ( + result.newHistory[2].parts?.[0].functionResponse?.response as Record< + string, + unknown + > + )['output'], + ).toContain(MASKING_INDICATOR_TAG); + }); }); diff --git a/packages/core/src/services/toolOutputMaskingService.ts b/packages/core/src/services/toolOutputMaskingService.ts index d62e1761e1..53804a1909 100644 --- a/packages/core/src/services/toolOutputMaskingService.ts +++ b/packages/core/src/services/toolOutputMaskingService.ts @@ -12,7 +12,14 @@ import { debugLogger } from '../utils/debugLogger.js'; import { sanitizeFilenamePart } from '../utils/fileUtils.js'; import type { Config } from '../config/config.js'; import { logToolOutputMasking } from '../telemetry/loggers.js'; -import { SHELL_TOOL_NAME } from '../tools/tool-names.js'; +import { + SHELL_TOOL_NAME, + ACTIVATE_SKILL_TOOL_NAME, + MEMORY_TOOL_NAME, + ASK_USER_TOOL_NAME, + ENTER_PLAN_MODE_TOOL_NAME, + EXIT_PLAN_MODE_TOOL_NAME, +} from '../tools/tool-names.js'; import { ToolOutputMaskingEvent } from '../telemetry/types.js'; // Tool output masking defaults @@ -23,6 +30,18 @@ export const MASKING_INDICATOR_TAG = 'tool_output_masked'; export const TOOL_OUTPUTS_DIR = 'tool-outputs'; +/** + * Tools whose outputs are always high-signal and should never be masked, + * regardless of their position in the conversation history. + */ +const EXEMPT_TOOLS = new Set([ + ACTIVATE_SKILL_TOOL_NAME, + MEMORY_TOOL_NAME, + ASK_USER_TOOL_NAME, + ENTER_PLAN_MODE_TOOL_NAME, + EXIT_PLAN_MODE_TOOL_NAME, +]); + export interface MaskingResult { newHistory: Content[]; maskedCount: number; @@ -89,6 +108,11 @@ export class ToolOutputMaskingService { // core intent and logic, which are harder for the model to recover if lost. if (!part.functionResponse) continue; + const toolName = part.functionResponse.name; + if (toolName && EXEMPT_TOOLS.has(toolName)) { + continue; + } + const toolOutputContent = this.getToolOutputContent(part); if (!toolOutputContent || this.isAlreadyMasked(toolOutputContent)) { continue;