feat(core): exempt high-signal tools from output masking (#18545)

This commit is contained in:
Abhi
2026-02-07 18:56:45 -05:00
committed by GitHub
parent 979bbee485
commit a1f2aacd1e
2 changed files with 139 additions and 2 deletions

View File

@@ -12,7 +12,11 @@ import {
ToolOutputMaskingService,
MASKING_INDICATOR_TAG,
} from './toolOutputMaskingService.js';
import { SHELL_TOOL_NAME } from '../tools/tool-names.js';
import {
SHELL_TOOL_NAME,
ACTIVATE_SKILL_TOOL_NAME,
MEMORY_TOOL_NAME,
} from '../tools/tool-names.js';
import { estimateTokenCountSync } from '../utils/tokenCalculation.js';
import type { Config } from '../config/config.js';
import type { Content, Part } from '@google/genai';
@@ -511,4 +515,113 @@ describe('ToolOutputMaskingService', () => {
const result = await service.mask(history, mockConfig);
expect(result.maskedCount).toBe(0); // padding is protected, tiny_tool would increase size
});
it('should never mask exempt tools (like activate_skill) even if they are deep in history', async () => {
const history: Content[] = [
{
role: 'user',
parts: [
{
functionResponse: {
name: ACTIVATE_SKILL_TOOL_NAME,
response: { output: 'High value instructions for skill' },
},
},
],
},
{
role: 'user',
parts: [
{
functionResponse: {
name: MEMORY_TOOL_NAME,
response: { output: 'Important user preference' },
},
},
],
},
{
role: 'user',
parts: [
{
functionResponse: {
name: 'bulky_tool',
response: { output: 'A'.repeat(60000) },
},
},
],
},
// Protection buffer
{
role: 'user',
parts: [
{
functionResponse: {
name: 'padding',
response: { output: 'B'.repeat(60000) },
},
},
],
},
{ role: 'user', parts: [{ text: 'latest' }] },
];
mockedEstimateTokenCountSync.mockImplementation((parts: Part[]) => {
const resp = parts[0].functionResponse?.response as Record<
string,
unknown
>;
const content = (resp?.['output'] as string) ?? JSON.stringify(resp);
if (content.includes(`<${MASKING_INDICATOR_TAG}`)) return 100;
const name = parts[0].functionResponse?.name;
if (name === ACTIVATE_SKILL_TOOL_NAME) return 1000;
if (name === MEMORY_TOOL_NAME) return 500;
if (name === 'bulky_tool') return 60000;
if (name === 'padding') return 60000;
return 10;
});
const result = await service.mask(history, mockConfig);
// Both 'bulky_tool' and 'padding' should be masked.
// 'padding' (Index 3) crosses the 50k protection boundary immediately.
// ACTIVATE_SKILL and MEMORY are exempt.
expect(result.maskedCount).toBe(2);
expect(result.newHistory[0].parts?.[0].functionResponse?.name).toBe(
ACTIVATE_SKILL_TOOL_NAME,
);
expect(
(
result.newHistory[0].parts?.[0].functionResponse?.response as Record<
string,
unknown
>
)['output'],
).toBe('High value instructions for skill');
expect(result.newHistory[1].parts?.[0].functionResponse?.name).toBe(
MEMORY_TOOL_NAME,
);
expect(
(
result.newHistory[1].parts?.[0].functionResponse?.response as Record<
string,
unknown
>
)['output'],
).toBe('Important user preference');
expect(result.newHistory[2].parts?.[0].functionResponse?.name).toBe(
'bulky_tool',
);
expect(
(
result.newHistory[2].parts?.[0].functionResponse?.response as Record<
string,
unknown
>
)['output'],
).toContain(MASKING_INDICATOR_TAG);
});
});

View File

@@ -12,7 +12,14 @@ import { debugLogger } from '../utils/debugLogger.js';
import { sanitizeFilenamePart } from '../utils/fileUtils.js';
import type { Config } from '../config/config.js';
import { logToolOutputMasking } from '../telemetry/loggers.js';
import { SHELL_TOOL_NAME } from '../tools/tool-names.js';
import {
SHELL_TOOL_NAME,
ACTIVATE_SKILL_TOOL_NAME,
MEMORY_TOOL_NAME,
ASK_USER_TOOL_NAME,
ENTER_PLAN_MODE_TOOL_NAME,
EXIT_PLAN_MODE_TOOL_NAME,
} from '../tools/tool-names.js';
import { ToolOutputMaskingEvent } from '../telemetry/types.js';
// Tool output masking defaults
@@ -23,6 +30,18 @@ export const MASKING_INDICATOR_TAG = 'tool_output_masked';
export const TOOL_OUTPUTS_DIR = 'tool-outputs';
/**
* Tools whose outputs are always high-signal and should never be masked,
* regardless of their position in the conversation history.
*/
const EXEMPT_TOOLS = new Set([
ACTIVATE_SKILL_TOOL_NAME,
MEMORY_TOOL_NAME,
ASK_USER_TOOL_NAME,
ENTER_PLAN_MODE_TOOL_NAME,
EXIT_PLAN_MODE_TOOL_NAME,
]);
export interface MaskingResult {
newHistory: Content[];
maskedCount: number;
@@ -89,6 +108,11 @@ export class ToolOutputMaskingService {
// core intent and logic, which are harder for the model to recover if lost.
if (!part.functionResponse) continue;
const toolName = part.functionResponse.name;
if (toolName && EXEMPT_TOOLS.has(toolName)) {
continue;
}
const toolOutputContent = this.getToolOutputContent(part);
if (!toolOutputContent || this.isAlreadyMasked(toolOutputContent)) {
continue;