feat(core): exempt high-signal tools from output masking (#18545)

2026-05-12 12:54:07 -07:00 · 2026-02-07 18:56:45 -05:00
parent 979bbee485
commit a1f2aacd1e
2 changed files with 139 additions and 2 deletions
@@ -12,7 +12,11 @@ import {
  ToolOutputMaskingService,
  MASKING_INDICATOR_TAG,
 } from './toolOutputMaskingService.js';
-import { SHELL_TOOL_NAME } from '../tools/tool-names.js';
+import {
  SHELL_TOOL_NAME,
  ACTIVATE_SKILL_TOOL_NAME,
  MEMORY_TOOL_NAME,
 } from '../tools/tool-names.js';
 import { estimateTokenCountSync } from '../utils/tokenCalculation.js';
 import type { Config } from '../config/config.js';
 import type { Content, Part } from '@google/genai';
@@ -511,4 +515,113 @@ describe('ToolOutputMaskingService', () => {
    const result = await service.mask(history, mockConfig);
    expect(result.maskedCount).toBe(0); // padding is protected, tiny_tool would increase size
  });
  it('should never mask exempt tools (like activate_skill) even if they are deep in history', async () => {
    const history: Content[] = [
      {
        role: 'user',
        parts: [
          {
            functionResponse: {
              name: ACTIVATE_SKILL_TOOL_NAME,
              response: { output: 'High value instructions for skill' },
            },
          },
        ],
      },
      {
        role: 'user',
        parts: [
          {
            functionResponse: {
              name: MEMORY_TOOL_NAME,
              response: { output: 'Important user preference' },
            },
          },
        ],
      },
      {
        role: 'user',
        parts: [
          {
            functionResponse: {
              name: 'bulky_tool',
              response: { output: 'A'.repeat(60000) },
            },
          },
        ],
      },
      // Protection buffer
      {
        role: 'user',
        parts: [
          {
            functionResponse: {
              name: 'padding',
              response: { output: 'B'.repeat(60000) },
            },
          },
        ],
      },
      { role: 'user', parts: [{ text: 'latest' }] },
    ];
    mockedEstimateTokenCountSync.mockImplementation((parts: Part[]) => {
      const resp = parts[0].functionResponse?.response as Record<
        string,
        unknown
      >;
      const content = (resp?.['output'] as string) ?? JSON.stringify(resp);
      if (content.includes(`<${MASKING_INDICATOR_TAG}`)) return 100;
      const name = parts[0].functionResponse?.name;
      if (name === ACTIVATE_SKILL_TOOL_NAME) return 1000;
      if (name === MEMORY_TOOL_NAME) return 500;
      if (name === 'bulky_tool') return 60000;
      if (name === 'padding') return 60000;
      return 10;
    });
    const result = await service.mask(history, mockConfig);
    // Both 'bulky_tool' and 'padding' should be masked.
    // 'padding' (Index 3) crosses the 50k protection boundary immediately.
    // ACTIVATE_SKILL and MEMORY are exempt.
    expect(result.maskedCount).toBe(2);
    expect(result.newHistory[0].parts?.[0].functionResponse?.name).toBe(
      ACTIVATE_SKILL_TOOL_NAME,
    );
    expect(
      (
        result.newHistory[0].parts?.[0].functionResponse?.response as Record<
          string,
          unknown
        >
      )['output'],
    ).toBe('High value instructions for skill');
    expect(result.newHistory[1].parts?.[0].functionResponse?.name).toBe(
      MEMORY_TOOL_NAME,
    );
    expect(
      (
        result.newHistory[1].parts?.[0].functionResponse?.response as Record<
          string,
          unknown
        >
      )['output'],
    ).toBe('Important user preference');
    expect(result.newHistory[2].parts?.[0].functionResponse?.name).toBe(
      'bulky_tool',
    );
    expect(
      (
        result.newHistory[2].parts?.[0].functionResponse?.response as Record<
          string,
          unknown
        >
      )['output'],
    ).toContain(MASKING_INDICATOR_TAG);
  });
 });
@@ -12,7 +12,14 @@ import { debugLogger } from '../utils/debugLogger.js';
 import { sanitizeFilenamePart } from '../utils/fileUtils.js';
 import type { Config } from '../config/config.js';
 import { logToolOutputMasking } from '../telemetry/loggers.js';
-import { SHELL_TOOL_NAME } from '../tools/tool-names.js';
+import {
  SHELL_TOOL_NAME,
  ACTIVATE_SKILL_TOOL_NAME,
  MEMORY_TOOL_NAME,
  ASK_USER_TOOL_NAME,
  ENTER_PLAN_MODE_TOOL_NAME,
  EXIT_PLAN_MODE_TOOL_NAME,
 } from '../tools/tool-names.js';
 import { ToolOutputMaskingEvent } from '../telemetry/types.js';
 // Tool output masking defaults
@@ -23,6 +30,18 @@ export const MASKING_INDICATOR_TAG = 'tool_output_masked';
 export const TOOL_OUTPUTS_DIR = 'tool-outputs';
 /**
 * Tools whose outputs are always high-signal and should never be masked,
 * regardless of their position in the conversation history.
 */
 const EXEMPT_TOOLS = new Set([
  ACTIVATE_SKILL_TOOL_NAME,
  MEMORY_TOOL_NAME,
  ASK_USER_TOOL_NAME,
  ENTER_PLAN_MODE_TOOL_NAME,
  EXIT_PLAN_MODE_TOOL_NAME,
 ]);
 export interface MaskingResult {
  newHistory: Content[];
  maskedCount: number;
@@ -89,6 +108,11 @@ export class ToolOutputMaskingService {
        // core intent and logic, which are harder for the model to recover if lost.
        if (!part.functionResponse) continue;
        const toolName = part.functionResponse.name;
        if (toolName && EXEMPT_TOOLS.has(toolName)) {
          continue;
        }
        const toolOutputContent = this.getToolOutputContent(part);
        if (!toolOutputContent || this.isAlreadyMasked(toolOutputContent)) {
          continue;