feat(core): exempt high-signal tools from output masking (#18545)

2026-05-04 00:44:05 -07:00 · 2026-02-07 18:56:45 -05:00
parent 979bbee485
commit a1f2aacd1e
2 changed files with 139 additions and 2 deletions
@@ -12,7 +12,11 @@ import {
  ToolOutputMaskingService,
  MASKING_INDICATOR_TAG,
 } from './toolOutputMaskingService.js';
-import { SHELL_TOOL_NAME } from '../tools/tool-names.js';
+import {
+  SHELL_TOOL_NAME,
+  ACTIVATE_SKILL_TOOL_NAME,
+  MEMORY_TOOL_NAME,
+} from '../tools/tool-names.js';
 import { estimateTokenCountSync } from '../utils/tokenCalculation.js';
 import type { Config } from '../config/config.js';
 import type { Content, Part } from '@google/genai';
@@ -511,4 +515,113 @@ describe('ToolOutputMaskingService', () => {
    const result = await service.mask(history, mockConfig);
    expect(result.maskedCount).toBe(0); // padding is protected, tiny_tool would increase size
  });
+
+  it('should never mask exempt tools (like activate_skill) even if they are deep in history', async () => {
+    const history: Content[] = [
+      {
+        role: 'user',
+        parts: [
+          {
+            functionResponse: {
+              name: ACTIVATE_SKILL_TOOL_NAME,
+              response: { output: 'High value instructions for skill' },
+            },
+          },
+        ],
+      },
+      {
+        role: 'user',
+        parts: [
+          {
+            functionResponse: {
+              name: MEMORY_TOOL_NAME,
+              response: { output: 'Important user preference' },
+            },
+          },
+        ],
+      },
+      {
+        role: 'user',
+        parts: [
+          {
+            functionResponse: {
+              name: 'bulky_tool',
+              response: { output: 'A'.repeat(60000) },
+            },
+          },
+        ],
+      },
+      // Protection buffer
+      {
+        role: 'user',
+        parts: [
+          {
+            functionResponse: {
+              name: 'padding',
+              response: { output: 'B'.repeat(60000) },
+            },
+          },
+        ],
+      },
+      { role: 'user', parts: [{ text: 'latest' }] },
+    ];
+
+    mockedEstimateTokenCountSync.mockImplementation((parts: Part[]) => {
+      const resp = parts[0].functionResponse?.response as Record<
+        string,
+        unknown
+      >;
+      const content = (resp?.['output'] as string) ?? JSON.stringify(resp);
+      if (content.includes(`<${MASKING_INDICATOR_TAG}`)) return 100;
+
+      const name = parts[0].functionResponse?.name;
+      if (name === ACTIVATE_SKILL_TOOL_NAME) return 1000;
+      if (name === MEMORY_TOOL_NAME) return 500;
+      if (name === 'bulky_tool') return 60000;
+      if (name === 'padding') return 60000;
+      return 10;
+    });
+
+    const result = await service.mask(history, mockConfig);
+
+    // Both 'bulky_tool' and 'padding' should be masked.
+    // 'padding' (Index 3) crosses the 50k protection boundary immediately.
+    // ACTIVATE_SKILL and MEMORY are exempt.
+    expect(result.maskedCount).toBe(2);
+    expect(result.newHistory[0].parts?.[0].functionResponse?.name).toBe(
+      ACTIVATE_SKILL_TOOL_NAME,
+    );
+    expect(
+      (
+        result.newHistory[0].parts?.[0].functionResponse?.response as Record<
+          string,
+          unknown
+        >
+      )['output'],
+    ).toBe('High value instructions for skill');
+
+    expect(result.newHistory[1].parts?.[0].functionResponse?.name).toBe(
+      MEMORY_TOOL_NAME,
+    );
+    expect(
+      (
+        result.newHistory[1].parts?.[0].functionResponse?.response as Record<
+          string,
+          unknown
+        >
+      )['output'],
+    ).toBe('Important user preference');
+
+    expect(result.newHistory[2].parts?.[0].functionResponse?.name).toBe(
+      'bulky_tool',
+    );
+    expect(
+      (
+        result.newHistory[2].parts?.[0].functionResponse?.response as Record<
+          string,
+          unknown
+        >
+      )['output'],
+    ).toContain(MASKING_INDICATOR_TAG);
+  });
 });
@@ -12,7 +12,14 @@ import { debugLogger } from '../utils/debugLogger.js';
 import { sanitizeFilenamePart } from '../utils/fileUtils.js';
 import type { Config } from '../config/config.js';
 import { logToolOutputMasking } from '../telemetry/loggers.js';
-import { SHELL_TOOL_NAME } from '../tools/tool-names.js';
+import {
+  SHELL_TOOL_NAME,
+  ACTIVATE_SKILL_TOOL_NAME,
+  MEMORY_TOOL_NAME,
+  ASK_USER_TOOL_NAME,
+  ENTER_PLAN_MODE_TOOL_NAME,
+  EXIT_PLAN_MODE_TOOL_NAME,
+} from '../tools/tool-names.js';
 import { ToolOutputMaskingEvent } from '../telemetry/types.js';

 // Tool output masking defaults
@@ -23,6 +30,18 @@ export const MASKING_INDICATOR_TAG = 'tool_output_masked';

 export const TOOL_OUTPUTS_DIR = 'tool-outputs';

+/**
+ * Tools whose outputs are always high-signal and should never be masked,
+ * regardless of their position in the conversation history.
+ */
+const EXEMPT_TOOLS = new Set([
+  ACTIVATE_SKILL_TOOL_NAME,
+  MEMORY_TOOL_NAME,
+  ASK_USER_TOOL_NAME,
+  ENTER_PLAN_MODE_TOOL_NAME,
+  EXIT_PLAN_MODE_TOOL_NAME,
+]);
+
 export interface MaskingResult {
  newHistory: Content[];
  maskedCount: number;
@@ -89,6 +108,11 @@ export class ToolOutputMaskingService {
        // core intent and logic, which are harder for the model to recover if lost.
        if (!part.functionResponse) continue;

+        const toolName = part.functionResponse.name;
+        if (toolName && EXEMPT_TOOLS.has(toolName)) {
+          continue;
+        }
+
        const toolOutputContent = this.getToolOutputContent(part);
        if (!toolOutputContent || this.isAlreadyMasked(toolOutputContent)) {
          continue;