feat(context): implement observation masking for tool outputs (#18389)

2026-05-13 05:12:55 -07:00 · 2026-02-05 20:53:11 -05:00
parent 289769f544
commit 8ec176e005
15 changed files with 1151 additions and 7 deletions
@@ -0,0 +1,7 @@
 {
  "experimental": {
    "toolOutputMasking": {
      "enabled": true
    }
  }
 }
@@ -806,6 +806,7 @@ export async function loadCliConfig(
    skillsSupport: settings.skills?.enabled ?? true,
    disabledSkills: settings.skills?.disabled,
    experimentalJitContext: settings.experimental?.jitContext,
    toolOutputMasking: settings.experimental?.toolOutputMasking,
    noBrowser: !!process.env['NO_BROWSER'],
    summarizeToolOutput: settings.model?.summarizeToolOutput,
    ideMode,
@@ -1462,6 +1462,58 @@ const SETTINGS_SCHEMA = {
    description: 'Setting to enable experimental features',
    showInDialog: false,
    properties: {
      toolOutputMasking: {
        type: 'object',
        label: 'Tool Output Masking',
        category: 'Experimental',
        requiresRestart: true,
        ignoreInDocs: true,
        default: {},
        description:
          'Advanced settings for tool output masking to manage context window efficiency.',
        showInDialog: false,
        properties: {
          enabled: {
            type: 'boolean',
            label: 'Enable Tool Output Masking',
            category: 'Experimental',
            requiresRestart: true,
            default: false,
            description: 'Enables tool output masking to save tokens.',
            showInDialog: false,
          },
          toolProtectionThreshold: {
            type: 'number',
            label: 'Tool Protection Threshold',
            category: 'Experimental',
            requiresRestart: true,
            default: 50000,
            description:
              'Minimum number of tokens to protect from masking (most recent tool outputs).',
            showInDialog: false,
          },
          minPrunableTokensThreshold: {
            type: 'number',
            label: 'Min Prunable Tokens Threshold',
            category: 'Experimental',
            requiresRestart: true,
            default: 30000,
            description:
              'Minimum prunable tokens required to trigger a masking pass.',
            showInDialog: false,
          },
          protectLatestTurn: {
            type: 'boolean',
            label: 'Protect Latest Turn',
            category: 'Experimental',
            requiresRestart: true,
            default: true,
            description:
              'Ensures the absolute latest turn is never masked, regardless of token count.',
            showInDialog: false,
          },
        },
      },
      enableAgents: {
        type: 'boolean',
        label: 'Enable Agents',
@@ -149,6 +149,13 @@ export interface OutputSettings {
  format?: OutputFormat;
 }
 export interface ToolOutputMaskingConfig {
  enabled: boolean;
  toolProtectionThreshold: number;
  minPrunableTokensThreshold: number;
  protectLatestTurn: boolean;
 }
 export interface ExtensionSetting {
  name: string;
  description: string;
@@ -273,6 +280,11 @@ import {
  DEFAULT_FILE_FILTERING_OPTIONS,
  DEFAULT_MEMORY_FILE_FILTERING_OPTIONS,
 } from './constants.js';
 import {
  DEFAULT_TOOL_PROTECTION_THRESHOLD,
  DEFAULT_MIN_PRUNABLE_TOKENS_THRESHOLD,
  DEFAULT_PROTECT_LATEST_TURN,
 } from '../services/toolOutputMaskingService.js';
 import {
  type ExtensionLoader,
@@ -462,6 +474,7 @@ export interface ConfigParameters {
  disabledSkills?: string[];
  adminSkillsEnabled?: boolean;
  experimentalJitContext?: boolean;
  toolOutputMasking?: Partial<ToolOutputMaskingConfig>;
  disableLLMCorrection?: boolean;
  plan?: boolean;
  onModelChange?: (model: string) => void;
@@ -599,6 +612,7 @@ export class Config {
  private pendingIncludeDirectories: string[];
  private readonly enableHooks: boolean;
  private readonly enableHooksUI: boolean;
  private readonly toolOutputMasking: ToolOutputMaskingConfig;
  private hooks: { [K in HookEventName]?: HookDefinition[] } | undefined;
  private projectHooks:
    | ({ [K in HookEventName]?: HookDefinition[] } & { disabled?: string[] })
@@ -721,6 +735,18 @@ export class Config {
    this.modelAvailabilityService = new ModelAvailabilityService();
    this.previewFeatures = params.previewFeatures ?? undefined;
    this.experimentalJitContext = params.experimentalJitContext ?? false;
    this.toolOutputMasking = {
      enabled: params.toolOutputMasking?.enabled ?? false,
      toolProtectionThreshold:
        params.toolOutputMasking?.toolProtectionThreshold ??
        DEFAULT_TOOL_PROTECTION_THRESHOLD,
      minPrunableTokensThreshold:
        params.toolOutputMasking?.minPrunableTokensThreshold ??
        DEFAULT_MIN_PRUNABLE_TOKENS_THRESHOLD,
      protectLatestTurn:
        params.toolOutputMasking?.protectLatestTurn ??
        DEFAULT_PROTECT_LATEST_TURN,
    };
    this.maxSessionTurns = params.maxSessionTurns ?? -1;
    this.experimentalZedIntegration =
      params.experimentalZedIntegration ?? false;
@@ -1445,6 +1471,14 @@ export class Config {
    return this.experimentalJitContext;
  }
  getToolOutputMaskingEnabled(): boolean {
    return this.toolOutputMasking.enabled;
  }
  getToolOutputMaskingConfig(): ToolOutputMaskingConfig {
    return this.toolOutputMasking;
  }
  getGeminiMdFileCount(): number {
    if (this.experimentalJitContext && this.contextManager) {
      return this.contextManager.getLoadedPaths().size;
@@ -213,6 +213,7 @@ describe('Gemini Client (client.ts)', () => {
      getGlobalMemory: vi.fn().mockReturnValue(''),
      getEnvironmentMemory: vi.fn().mockReturnValue(''),
      isJitContextEnabled: vi.fn().mockReturnValue(false),
      getToolOutputMaskingEnabled: vi.fn().mockReturnValue(false),
      getDisableLoopDetection: vi.fn().mockReturnValue(false),
      getSessionId: vi.fn().mockReturnValue('test-session-id'),
@@ -54,6 +54,7 @@ import { handleFallback } from '../fallback/handler.js';
 import type { RoutingContext } from '../routing/routingStrategy.js';
 import { debugLogger } from '../utils/debugLogger.js';
 import type { ModelConfigKey } from '../services/modelConfigService.js';
 import { ToolOutputMaskingService } from '../services/toolOutputMaskingService.js';
 import { calculateRequestTokenCount } from '../utils/tokenCalculation.js';
 import {
  applyModelSelection,
@@ -84,6 +85,7 @@ export class GeminiClient {
  private readonly loopDetector: LoopDetectionService;
  private readonly compressionService: ChatCompressionService;
  private readonly toolOutputMaskingService: ToolOutputMaskingService;
  private lastPromptId: string;
  private currentSequenceModel: string | null = null;
  private lastSentIdeContext: IdeContext | undefined;
@@ -98,6 +100,7 @@ export class GeminiClient {
  constructor(private readonly config: Config) {
    this.loopDetector = new LoopDetectionService(config);
    this.compressionService = new ChatCompressionService();
    this.toolOutputMaskingService = new ToolOutputMaskingService();
    this.lastPromptId = this.config.getSessionId();
    coreEvents.on(CoreEvent.ModelChanged, this.handleModelChanged);
@@ -562,6 +565,8 @@ export class GeminiClient {
    const remainingTokenCount =
      tokenLimit(modelForLimitCheck) - this.getChat().getLastPromptTokenCount();
    await this.tryMaskToolOutputs(this.getHistory());
    // Estimate tokens. For text-only requests, we estimate based on character length.
    // For requests with non-text parts (like images, tools), we use the countTokens API.
    const estimatedRequestTokenCount = await calculateRequestTokenCount(
@@ -1056,4 +1061,20 @@ export class GeminiClient {
    return info;
  }
  /**
   * Masks bulky tool outputs to save context window space.
   */
  private async tryMaskToolOutputs(history: Content[]): Promise<void> {
    if (!this.config.getToolOutputMaskingEnabled()) {
      return;
    }
    const result = await this.toolOutputMaskingService.mask(
      history,
      this.config,
    );
    if (result.maskedCount > 0) {
      this.getChat().setHistory(result.newHistory);
    }
  }
 }
@@ -0,0 +1,31 @@
 // Vitest Snapshot v1, https://vitest.dev/guide/snapshot.html
 exports[`ToolOutputMaskingService > should match the expected snapshot for a masked tool output 1`] = `
 "<tool_output_masked>
 Line
 Line
 Line
 Line
 Line
 Line
 Line
 Line
 Line
 Line
 ... [6 lines omitted] ...
 Line
 Line
 Line
 Line
 Line
 Line
 Line
 Line
 Line
 Output too large. Full output available at: /mock/history/tool-outputs/run_shell_command_deterministic.txt
 </tool_output_masked>"
 `;
@@ -0,0 +1,500 @@
 /**
 * @license
 * Copyright 2026 Google LLC
 * SPDX-License-Identifier: Apache-2.0
 */
 import { describe, it, expect, vi, beforeEach } from 'vitest';
 import {
  ToolOutputMaskingService,
  MASKING_INDICATOR_TAG,
 } from './toolOutputMaskingService.js';
 import { SHELL_TOOL_NAME } from '../tools/tool-names.js';
 import { estimateTokenCountSync } from '../utils/tokenCalculation.js';
 import type { Config } from '../config/config.js';
 import type { Content, Part } from '@google/genai';
 vi.mock('../utils/tokenCalculation.js', () => ({
  estimateTokenCountSync: vi.fn(),
 }));
 vi.mock('node:fs/promises', () => ({
  mkdir: vi.fn().mockResolvedValue(undefined),
  writeFile: vi.fn().mockResolvedValue(undefined),
 }));
 describe('ToolOutputMaskingService', () => {
  let service: ToolOutputMaskingService;
  let mockConfig: Config;
  const mockedEstimateTokenCountSync = vi.mocked(estimateTokenCountSync);
  beforeEach(() => {
    service = new ToolOutputMaskingService();
    mockConfig = {
      storage: {
        getHistoryDir: () => '/mock/history',
      },
      getUsageStatisticsEnabled: () => false,
      getToolOutputMaskingConfig: () => ({
        enabled: true,
        toolProtectionThreshold: 50000,
        minPrunableTokensThreshold: 30000,
        protectLatestTurn: true,
      }),
    } as unknown as Config;
    vi.clearAllMocks();
  });
  it('should not mask if total tool tokens are below protection threshold', async () => {
    const history: Content[] = [
      {
        role: 'user',
        parts: [
          {
            functionResponse: {
              name: 'test_tool',
              response: { output: 'small output' },
            },
          },
        ],
      },
    ];
    mockedEstimateTokenCountSync.mockReturnValue(100);
    const result = await service.mask(history, mockConfig);
    expect(result.maskedCount).toBe(0);
    expect(result.newHistory).toEqual(history);
  });
  const getToolResponse = (part: Part | undefined): string => {
    const resp = part?.functionResponse?.response as
      | { output: string }
      | undefined;
    return resp?.output ?? (resp as unknown as string) ?? '';
  };
  it('should protect the latest turn and mask older outputs beyond 50k window if total > 30k', async () => {
    // History:
    // Turn 1: 60k (Oldest)
    // Turn 2: 20k
    // Turn 3: 10k (Latest) - Protected because PROTECT_LATEST_TURN is true
    const history: Content[] = [
      {
        role: 'user',
        parts: [
          {
            functionResponse: {
              name: 't1',
              response: { output: 'A'.repeat(60000) },
            },
          },
        ],
      },
      {
        role: 'user',
        parts: [
          {
            functionResponse: {
              name: 't2',
              response: { output: 'B'.repeat(20000) },
            },
          },
        ],
      },
      {
        role: 'user',
        parts: [
          {
            functionResponse: {
              name: 't3',
              response: { output: 'C'.repeat(10000) },
            },
          },
        ],
      },
    ];
    mockedEstimateTokenCountSync.mockImplementation((parts: Part[]) => {
      const toolName = parts[0].functionResponse?.name;
      const resp = parts[0].functionResponse?.response as Record<
        string,
        unknown
      >;
      const content = (resp?.['output'] as string) ?? JSON.stringify(resp);
      if (content.includes(`<${MASKING_INDICATOR_TAG}`)) return 100;
      if (toolName === 't1') return 60000;
      if (toolName === 't2') return 20000;
      if (toolName === 't3') return 10000;
      return 0;
    });
    // Scanned: Turn 2 (20k), Turn 1 (60k). Total = 80k.
    // Turn 2: Cumulative = 20k. Protected (<= 50k).
    // Turn 1: Cumulative = 80k. Crossed 50k boundary. Prunabled.
    // Total Prunable = 60k (> 30k trigger).
    const result = await service.mask(history, mockConfig);
    expect(result.maskedCount).toBe(1);
    expect(getToolResponse(result.newHistory[0].parts?.[0])).toContain(
      `<${MASKING_INDICATOR_TAG}`,
    );
    expect(getToolResponse(result.newHistory[1].parts?.[0])).toEqual(
      'B'.repeat(20000),
    );
    expect(getToolResponse(result.newHistory[2].parts?.[0])).toEqual(
      'C'.repeat(10000),
    );
  });
  it('should perform global aggregation for many small parts once boundary is hit', async () => {
    // history.length = 12. Skip index 11 (latest).
    // Indices 0-10: 10k each.
    // Index 10: 10k (Sum 10k)
    // Index 9: 10k (Sum 20k)
    // Index 8: 10k (Sum 30k)
    // Index 7: 10k (Sum 40k)
    // Index 6: 10k (Sum 50k) - Boundary hit here?
    // Actually, Boundary is 50k. So Index 6 crosses it.
    // Index 6, 5, 4, 3, 2, 1, 0 are all prunable. (7 * 10k = 70k).
    const history: Content[] = Array.from({ length: 12 }, (_, i) => ({
      role: 'user',
      parts: [
        {
          functionResponse: {
            name: `tool${i}`,
            response: { output: 'A'.repeat(10000) },
          },
        },
      ],
    }));
    mockedEstimateTokenCountSync.mockImplementation((parts: Part[]) => {
      const resp = parts[0].functionResponse?.response as
        | { output?: string; result?: string }
        | string
        | undefined;
      const content =
        typeof resp === 'string'
          ? resp
          : resp?.output || resp?.result || JSON.stringify(resp);
      if (content?.includes(`<${MASKING_INDICATOR_TAG}`)) return 100;
      return content?.length || 0;
    });
    const result = await service.mask(history, mockConfig);
    expect(result.maskedCount).toBe(6); // boundary at 50k protects 0-5
    expect(result.tokensSaved).toBeGreaterThan(0);
  });
  it('should verify tool-aware previews (shell vs generic)', async () => {
    const shellHistory: Content[] = [
      {
        role: 'user',
        parts: [
          {
            functionResponse: {
              name: SHELL_TOOL_NAME,
              response: {
                output:
                  'Output: line1\nline2\nline3\nline4\nline5\nError: failed\nExit Code: 1',
              },
            },
          },
        ],
      },
      // Protection buffer
      {
        role: 'user',
        parts: [
          {
            functionResponse: {
              name: 'p',
              response: { output: 'p'.repeat(60000) },
            },
          },
        ],
      },
      // Latest turn
      {
        role: 'user',
        parts: [{ functionResponse: { name: 'l', response: { output: 'l' } } }],
      },
    ];
    mockedEstimateTokenCountSync.mockImplementation((parts: Part[]) => {
      const name = parts[0].functionResponse?.name;
      const resp = parts[0].functionResponse?.response as Record<
        string,
        unknown
      >;
      const content = (resp?.['output'] as string) ?? JSON.stringify(resp);
      if (content.includes(`<${MASKING_INDICATOR_TAG}`)) return 100;
      if (name === SHELL_TOOL_NAME) return 100000;
      if (name === 'p') return 60000;
      return 100;
    });
    const result = await service.mask(shellHistory, mockConfig);
    const maskedBash = getToolResponse(result.newHistory[0].parts?.[0]);
    expect(maskedBash).toContain('Output: line1\nline2\nline3\nline4\nline5');
    expect(maskedBash).toContain('Exit Code: 1');
    expect(maskedBash).toContain('Error: failed');
  });
  it('should skip already masked content and not count it towards totals', async () => {
    const history: Content[] = [
      {
        role: 'user',
        parts: [
          {
            functionResponse: {
              name: 'tool1',
              response: {
                output: `<${MASKING_INDICATOR_TAG}>...</${MASKING_INDICATOR_TAG}>`,
              },
            },
          },
        ],
      },
      {
        role: 'user',
        parts: [
          {
            functionResponse: {
              name: 'tool2',
              response: { output: 'A'.repeat(60000) },
            },
          },
        ],
      },
    ];
    mockedEstimateTokenCountSync.mockReturnValue(60000);
    const result = await service.mask(history, mockConfig);
    expect(result.maskedCount).toBe(0); // tool1 skipped, tool2 is the "latest" which is protected
  });
  it('should handle different response keys in masked update', async () => {
    const history: Content[] = [
      {
        role: 'model',
        parts: [
          {
            functionResponse: {
              name: 't1',
              response: { result: 'A'.repeat(60000) },
            },
          },
        ],
      },
      {
        role: 'model',
        parts: [
          {
            functionResponse: {
              name: 'p',
              response: { output: 'P'.repeat(60000) },
            },
          },
        ],
      },
      { role: 'user', parts: [{ text: 'latest' }] },
    ];
    mockedEstimateTokenCountSync.mockImplementation((parts: Part[]) => {
      const resp = parts[0].functionResponse?.response as Record<
        string,
        unknown
      >;
      const content =
        (resp?.['output'] as string) ??
        (resp?.['result'] as string) ??
        JSON.stringify(resp);
      if (content.includes(`<${MASKING_INDICATOR_TAG}`)) return 100;
      return 60000;
    });
    const result = await service.mask(history, mockConfig);
    expect(result.maskedCount).toBe(2); // both t1 and p are prunable (cumulative 60k and 120k)
    const responseObj = result.newHistory[0].parts?.[0].functionResponse
      ?.response as Record<string, unknown>;
    expect(Object.keys(responseObj)).toEqual(['output']);
  });
  it('should preserve multimodal parts while masking tool responses', async () => {
    const history: Content[] = [
      {
        role: 'user',
        parts: [
          {
            functionResponse: {
              name: 't1',
              response: { output: 'A'.repeat(60000) },
            },
          },
          {
            inlineData: {
              data: 'base64data',
              mimeType: 'image/png',
            },
          },
        ],
      },
      // Protection buffer
      {
        role: 'user',
        parts: [
          {
            functionResponse: {
              name: 'p',
              response: { output: 'p'.repeat(60000) },
            },
          },
        ],
      },
      // Latest turn
      { role: 'user', parts: [{ text: 'latest' }] },
    ];
    mockedEstimateTokenCountSync.mockImplementation((parts: Part[]) => {
      const resp = parts[0].functionResponse?.response as Record<
        string,
        unknown
      >;
      const content = (resp?.['output'] as string) ?? JSON.stringify(resp);
      if (content.includes(`<${MASKING_INDICATOR_TAG}`)) return 100;
      if (parts[0].functionResponse?.name === 't1') return 60000;
      if (parts[0].functionResponse?.name === 'p') return 60000;
      return 100;
    });
    const result = await service.mask(history, mockConfig);
    expect(result.maskedCount).toBe(2); //Both t1 and p are prunable (cumulative 60k each > 50k protection)
    expect(result.newHistory[0].parts).toHaveLength(2);
    expect(result.newHistory[0].parts?.[0].functionResponse).toBeDefined();
    expect(
      (
        result.newHistory[0].parts?.[0].functionResponse?.response as Record<
          string,
          unknown
        >
      )['output'],
    ).toContain(`<${MASKING_INDICATOR_TAG}`);
    expect(result.newHistory[0].parts?.[1].inlineData).toEqual({
      data: 'base64data',
      mimeType: 'image/png',
    });
  });
  it('should match the expected snapshot for a masked tool output', async () => {
    const history: Content[] = [
      {
        role: 'user',
        parts: [
          {
            functionResponse: {
              name: SHELL_TOOL_NAME,
              response: {
                output: 'Line\n'.repeat(25),
                exitCode: 0,
              },
            },
          },
        ],
      },
      // Buffer to push shell_tool into prunable territory
      {
        role: 'user',
        parts: [
          {
            functionResponse: {
              name: 'padding',
              response: { output: 'B'.repeat(60000) },
            },
          },
        ],
      },
      { role: 'user', parts: [{ text: 'latest' }] },
    ];
    mockedEstimateTokenCountSync.mockImplementation((parts: Part[]) => {
      const resp = parts[0].functionResponse?.response as Record<
        string,
        unknown
      >;
      const content = (resp?.['output'] as string) ?? JSON.stringify(resp);
      if (content.includes(`<${MASKING_INDICATOR_TAG}`)) return 100;
      if (parts[0].functionResponse?.name === SHELL_TOOL_NAME) return 1000;
      if (parts[0].functionResponse?.name === 'padding') return 60000;
      return 10;
    });
    const result = await service.mask(history, mockConfig);
    // Verify complete masking: only 'output' key should exist
    const responseObj = result.newHistory[0].parts?.[0].functionResponse
      ?.response as Record<string, unknown>;
    expect(Object.keys(responseObj)).toEqual(['output']);
    const response = responseObj['output'] as string;
    // We replace the random part of the filename for deterministic snapshots
    // and normalize path separators for cross-platform compatibility
    const deterministicResponse = response
      .replace(
        new RegExp(`${SHELL_TOOL_NAME}_[^\\s"]+\\.txt`, 'g'),
        `${SHELL_TOOL_NAME}_deterministic.txt`,
      )
      .replace(/\\/g, '/');
    expect(deterministicResponse).toMatchSnapshot();
  });
  it('should not mask if masking increases token count (due to overhead)', async () => {
    const history: Content[] = [
      {
        role: 'user',
        parts: [
          {
            functionResponse: {
              name: 'tiny_tool',
              response: { output: 'tiny' },
            },
          },
        ],
      },
      // Protection buffer to push tiny_tool into prunable territory
      {
        role: 'user',
        parts: [
          {
            functionResponse: {
              name: 'padding',
              response: { output: 'B'.repeat(60000) },
            },
          },
        ],
      },
      { role: 'user', parts: [{ text: 'latest' }] },
    ];
    mockedEstimateTokenCountSync.mockImplementation((parts: Part[]) => {
      if (parts[0].functionResponse?.name === 'tiny_tool') return 5;
      if (parts[0].functionResponse?.name === 'padding') return 60000;
      return 1000; // The masked version would be huge due to boilerplate
    });
    const result = await service.mask(history, mockConfig);
    expect(result.maskedCount).toBe(0); // padding is protected, tiny_tool would increase size
  });
 });
@@ -0,0 +1,344 @@
 /**
 * @license
 * Copyright 2026 Google LLC
 * SPDX-License-Identifier: Apache-2.0
 */
 import type { Content, Part } from '@google/genai';
 import path from 'node:path';
 import * as fsPromises from 'node:fs/promises';
 import { estimateTokenCountSync } from '../utils/tokenCalculation.js';
 import { debugLogger } from '../utils/debugLogger.js';
 import { sanitizeFilenamePart } from '../utils/fileUtils.js';
 import type { Config } from '../config/config.js';
 import { logToolOutputMasking } from '../telemetry/loggers.js';
 import { SHELL_TOOL_NAME } from '../tools/tool-names.js';
 import { ToolOutputMaskingEvent } from '../telemetry/types.js';
 // Tool output masking defaults
 export const DEFAULT_TOOL_PROTECTION_THRESHOLD = 50000;
 export const DEFAULT_MIN_PRUNABLE_TOKENS_THRESHOLD = 30000;
 export const DEFAULT_PROTECT_LATEST_TURN = true;
 export const MASKING_INDICATOR_TAG = 'tool_output_masked';
 export const TOOL_OUTPUTS_DIR = 'tool-outputs';
 export interface MaskingResult {
  newHistory: Content[];
  maskedCount: number;
  tokensSaved: number;
 }
 /**
 * Service to manage context window efficiency by masking bulky tool outputs (Tool Output Masking).
 *
 * It implements a "Hybrid Backward Scanned FIFO" algorithm to balance context relevance with
 * token savings:
 * 1. **Protection Window**: Protects the newest `toolProtectionThreshold` (default 50k) tool tokens
 *    from pruning. Optionally skips the entire latest conversation turn to ensure full context for
 *    the model's next response.
 * 2. **Global Aggregation**: Scans backwards past the protection window to identify all remaining
 *    tool outputs that haven't been masked yet.
 * 3. **Batch Trigger**: Trigger masking only if the total prunable tokens exceed
 *    `minPrunableTokensThreshold` (default 30k).
 *
 * @remarks
 * Effectively, this means masking only starts once the conversation contains approximately 80k
 * tokens of prunable tool outputs (50k protected + 30k prunable buffer). Small tool outputs
 * are preserved until they collectively reach the threshold.
 */
 export class ToolOutputMaskingService {
  async mask(history: Content[], config: Config): Promise<MaskingResult> {
    if (history.length === 0) {
      return { newHistory: history, maskedCount: 0, tokensSaved: 0 };
    }
    let cumulativeToolTokens = 0;
    let protectionBoundaryReached = false;
    let totalPrunableTokens = 0;
    let maskedCount = 0;
    const prunableParts: Array<{
      contentIndex: number;
      partIndex: number;
      tokens: number;
      content: string;
      originalPart: Part;
    }> = [];
    const maskingConfig = config.getToolOutputMaskingConfig();
    // Decide where to start scanning.
    // If PROTECT_LATEST_TURN is true, we skip the most recent message (index history.length - 1).
    const scanStartIdx = maskingConfig.protectLatestTurn
      ? history.length - 2
      : history.length - 1;
    // Backward scan to identify prunable tool outputs
    for (let i = scanStartIdx; i >= 0; i--) {
      const content = history[i];
      const parts = content.parts || [];
      for (let j = parts.length - 1; j >= 0; j--) {
        const part = parts[j];
        // Tool outputs (functionResponse) are the primary targets for pruning because
        // they often contain voluminous data (e.g., shell logs, file content) that
        // can exceed context limits. We preserve other parts—such as user text,
        // model reasoning, and multimodal data—because they define the conversation's
        // core intent and logic, which are harder for the model to recover if lost.
        if (!part.functionResponse) continue;
        const toolOutputContent = this.getToolOutputContent(part);
        if (!toolOutputContent || this.isAlreadyMasked(toolOutputContent)) {
          continue;
        }
        const partTokens = estimateTokenCountSync([part]);
        if (!protectionBoundaryReached) {
          cumulativeToolTokens += partTokens;
          if (cumulativeToolTokens > maskingConfig.toolProtectionThreshold) {
            protectionBoundaryReached = true;
            // The part that crossed the boundary is prunable.
            totalPrunableTokens += partTokens;
            prunableParts.push({
              contentIndex: i,
              partIndex: j,
              tokens: partTokens,
              content: toolOutputContent,
              originalPart: part,
            });
          }
        } else {
          totalPrunableTokens += partTokens;
          prunableParts.push({
            contentIndex: i,
            partIndex: j,
            tokens: partTokens,
            content: toolOutputContent,
            originalPart: part,
          });
        }
      }
    }
    // Trigger pruning only if we have accumulated enough savings to justify the
    // overhead of masking and file I/O (batch pruning threshold).
    if (totalPrunableTokens < maskingConfig.minPrunableTokensThreshold) {
      return { newHistory: history, maskedCount: 0, tokensSaved: 0 };
    }
    debugLogger.debug(
      `[ToolOutputMasking] Triggering masking. Prunable tool tokens: ${totalPrunableTokens.toLocaleString()} (> ${maskingConfig.minPrunableTokensThreshold.toLocaleString()})`,
    );
    // Perform masking and offloading
    const newHistory = [...history]; // Shallow copy of history
    let actualTokensSaved = 0;
    const toolOutputsDir = path.join(
      config.storage.getHistoryDir(),
      TOOL_OUTPUTS_DIR,
    );
    await fsPromises.mkdir(toolOutputsDir, { recursive: true });
    for (const item of prunableParts) {
      const { contentIndex, partIndex, content, tokens } = item;
      const contentRecord = newHistory[contentIndex];
      const part = contentRecord.parts![partIndex];
      if (!part.functionResponse) continue;
      const toolName = part.functionResponse.name || 'unknown_tool';
      const callId = part.functionResponse.id || Date.now().toString();
      const safeToolName = sanitizeFilenamePart(toolName).toLowerCase();
      const safeCallId = sanitizeFilenamePart(callId).toLowerCase();
      const fileName = `${safeToolName}_${safeCallId}_${Math.random()
        .toString(36)
        .substring(7)}.txt`;
      const filePath = path.join(toolOutputsDir, fileName);
      await fsPromises.writeFile(filePath, content, 'utf-8');
      const originalResponse =
        (part.functionResponse.response as Record<string, unknown>) || {};
      const totalLines = content.split('\n').length;
      const fileSizeMB = (
        Buffer.byteLength(content, 'utf8') /
        1024 /
        1024
      ).toFixed(2);
      let preview = '';
      if (toolName === SHELL_TOOL_NAME) {
        preview = this.formatShellPreview(originalResponse);
      } else {
        // General tools: Head + Tail preview (250 chars each)
        if (content.length > 500) {
          preview = `${content.slice(0, 250)}\n... [TRUNCATED] ...\n${content.slice(-250)}`;
        } else {
          preview = content;
        }
      }
      const maskedSnippet = this.formatMaskedSnippet({
        toolName,
        filePath,
        fileSizeMB,
        totalLines,
        tokens,
        preview,
      });
      const maskedPart = {
        ...part,
        functionResponse: {
          ...part.functionResponse,
          response: { output: maskedSnippet },
        },
      };
      const newTaskTokens = estimateTokenCountSync([maskedPart]);
      const savings = tokens - newTaskTokens;
      if (savings > 0) {
        const newParts = [...contentRecord.parts!];
        newParts[partIndex] = maskedPart;
        newHistory[contentIndex] = { ...contentRecord, parts: newParts };
        actualTokensSaved += savings;
        maskedCount++;
      }
    }
    debugLogger.debug(
      `[ToolOutputMasking] Masked ${maskedCount} tool outputs. Saved ~${actualTokensSaved.toLocaleString()} tokens.`,
    );
    const result = {
      newHistory,
      maskedCount,
      tokensSaved: actualTokensSaved,
    };
    if (actualTokensSaved <= 0) {
      return result;
    }
    logToolOutputMasking(
      config,
      new ToolOutputMaskingEvent({
        tokens_before: totalPrunableTokens,
        tokens_after: totalPrunableTokens - actualTokensSaved,
        masked_count: maskedCount,
        total_prunable_tokens: totalPrunableTokens,
      }),
    );
    return result;
  }
  private getToolOutputContent(part: Part): string | null {
    if (!part.functionResponse) return null;
    const response = part.functionResponse.response as Record<string, unknown>;
    if (!response) return null;
    // Stringify the entire response for saving.
    // This handles any tool output schema automatically.
    const content = JSON.stringify(response, null, 2);
    // Multimodal safety check: Sibling parts (inlineData, etc.) are handled by mask()
    // by keeping the original part structure and only replacing the functionResponse content.
    return content;
  }
  private isAlreadyMasked(content: string): boolean {
    return content.includes(`<${MASKING_INDICATOR_TAG}`);
  }
  private formatShellPreview(response: Record<string, unknown>): string {
    const content = (response['output'] || response['stdout'] || '') as string;
    if (typeof content !== 'string') {
      return typeof content === 'object'
        ? JSON.stringify(content)
        : String(content);
    }
    // The shell tool output is structured in shell.ts with specific section prefixes:
    const sectionRegex =
      /^(Output|Error|Exit Code|Signal|Background PIDs|Process Group PGID): /m;
    const parts = content.split(sectionRegex);
    if (parts.length < 3) {
      // Fallback to simple head/tail if not in expected shell.ts format
      return this.formatSimplePreview(content);
    }
    const previewParts: string[] = [];
    if (parts[0].trim()) {
      previewParts.push(this.formatSimplePreview(parts[0].trim()));
    }
    for (let i = 1; i < parts.length; i += 2) {
      const name = parts[i];
      const sectionContent = parts[i + 1]?.trim() || '';
      if (name === 'Output') {
        previewParts.push(
          `Output: ${this.formatSimplePreview(sectionContent)}`,
        );
      } else {
        // Keep other sections (Error, Exit Code, etc.) in full as they are usually high-signal and small
        previewParts.push(`${name}: ${sectionContent}`);
      }
    }
    let preview = previewParts.join('\n');
    // Also check root levels just in case some tool uses them or for future-proofing
    const exitCode = response['exitCode'] ?? response['exit_code'];
    const error = response['error'];
    if (
      exitCode !== undefined &&
      exitCode !== 0 &&
      exitCode !== null &&
      !content.includes(`Exit Code: ${exitCode}`)
    ) {
      preview += `\n[Exit Code: ${exitCode}]`;
    }
    if (error && !content.includes(`Error: ${error}`)) {
      preview += `\n[Error: ${error}]`;
    }
    return preview;
  }
  private formatSimplePreview(content: string): string {
    const lines = content.split('\n');
    if (lines.length <= 20) return content;
    const head = lines.slice(0, 10);
    const tail = lines.slice(-10);
    return `${head.join('\n')}\n\n... [${
      lines.length - head.length - tail.length
    } lines omitted] ...\n\n${tail.join('\n')}`;
  }
  private formatMaskedSnippet(params: MaskedSnippetParams): string {
    const { filePath, preview } = params;
    return `<${MASKING_INDICATOR_TAG}>
 ${preview}
 Output too large. Full output available at: ${filePath}
 </${MASKING_INDICATOR_TAG}>`;
  }
 }
 interface MaskedSnippetParams {
  toolName: string;
  filePath: string;
  fileSizeMB: string;
  totalLines: number;
  tokens: number;
  preview: string;
 }
@@ -46,6 +46,7 @@ import type {
  ApprovalModeSwitchEvent,
  ApprovalModeDurationEvent,
  PlanExecutionEvent,
  ToolOutputMaskingEvent,
 } from '../types.js';
 import { EventMetadataKey } from './event-metadata-key.js';
 import type { Config } from '../../config/config.js';
@@ -108,6 +109,7 @@ export enum EventNames {
  APPROVAL_MODE_SWITCH = 'approval_mode_switch',
  APPROVAL_MODE_DURATION = 'approval_mode_duration',
  PLAN_EXECUTION = 'plan_execution',
  TOOL_OUTPUT_MASKING = 'tool_output_masking',
 }
 export interface LogResponse {
@@ -1217,8 +1219,40 @@ export class ClearcutLogger {
      },
    ];
    const logEvent = this.createLogEvent(
      EventNames.TOOL_OUTPUT_TRUNCATED,
      data,
    );
    this.enqueueLogEvent(logEvent);
    this.flushIfNeeded();
  }
  logToolOutputMaskingEvent(event: ToolOutputMaskingEvent): void {
    const data: EventValue[] = [
      {
        gemini_cli_key:
          EventMetadataKey.GEMINI_CLI_TOOL_OUTPUT_MASKING_TOKENS_BEFORE,
        value: event.tokens_before.toString(),
      },
      {
        gemini_cli_key:
          EventMetadataKey.GEMINI_CLI_TOOL_OUTPUT_MASKING_TOKENS_AFTER,
        value: event.tokens_after.toString(),
      },
      {
        gemini_cli_key:
          EventMetadataKey.GEMINI_CLI_TOOL_OUTPUT_MASKING_MASKED_COUNT,
        value: event.masked_count.toString(),
      },
      {
        gemini_cli_key:
          EventMetadataKey.GEMINI_CLI_TOOL_OUTPUT_MASKING_TOTAL_PRUNABLE_TOKENS,
        value: event.total_prunable_tokens.toString(),
      },
    ];
    this.enqueueLogEvent(
-      this.createLogEvent(EventNames.TOOL_OUTPUT_TRUNCATED, data),
+      this.createLogEvent(EventNames.TOOL_OUTPUT_MASKING, data),
    );
    this.flushIfNeeded();
  }
@@ -7,7 +7,7 @@
 // Defines valid event metadata keys for Clearcut logging.
 export enum EventMetadataKey {
  // Deleted enums: 24
-  // Next ID: 148
+  // Next ID: 152
  GEMINI_CLI_KEY_UNKNOWN = 0,
@@ -561,4 +561,20 @@ export enum EventMetadataKey {
  // Logs the classifier threshold used.
  GEMINI_CLI_ROUTING_CLASSIFIER_THRESHOLD = 147,
  // ==========================================================================
  // Tool Output Masking Event Keys
  // ==========================================================================
  // Logs the total tokens in the prunable block before masking.
  GEMINI_CLI_TOOL_OUTPUT_MASKING_TOKENS_BEFORE = 148,
  // Logs the total tokens in the masked remnants after masking.
  GEMINI_CLI_TOOL_OUTPUT_MASKING_TOKENS_AFTER = 149,
  // Logs the number of tool outputs masked in this operation.
  GEMINI_CLI_TOOL_OUTPUT_MASKING_MASKED_COUNT = 150,
  // Logs the total prunable tokens identified at the trigger point.
  GEMINI_CLI_TOOL_OUTPUT_MASKING_TOTAL_PRUNABLE_TOKENS = 151,
 }
@@ -56,6 +56,7 @@ import type {
  StartupStatsEvent,
  LlmLoopCheckEvent,
  PlanExecutionEvent,
  ToolOutputMaskingEvent,
 } from './types.js';
 import {
  recordApiErrorMetrics,
@@ -163,6 +164,21 @@ export function logToolOutputTruncated(
  });
 }
 export function logToolOutputMasking(
  config: Config,
  event: ToolOutputMaskingEvent,
 ): void {
  ClearcutLogger.getInstance(config)?.logToolOutputMaskingEvent(event);
  bufferTelemetryEvent(() => {
    const logger = logs.getLogger(SERVICE_NAME);
    const logRecord: LogRecord = {
      body: event.toLogBody(),
      attributes: event.toOpenTelemetryAttributes(config),
    };
    logger.emit(logRecord);
  });
 }
 export function logFileOperation(
  config: Config,
  event: FileOperationEvent,
@@ -1376,6 +1376,49 @@ export class ToolOutputTruncatedEvent implements BaseTelemetryEvent {
  }
 }
 export const EVENT_TOOL_OUTPUT_MASKING = 'gemini_cli.tool_output_masking';
 export class ToolOutputMaskingEvent implements BaseTelemetryEvent {
  'event.name': 'tool_output_masking';
  'event.timestamp': string;
  tokens_before: number;
  tokens_after: number;
  masked_count: number;
  total_prunable_tokens: number;
  constructor(details: {
    tokens_before: number;
    tokens_after: number;
    masked_count: number;
    total_prunable_tokens: number;
  }) {
    this['event.name'] = 'tool_output_masking';
    this['event.timestamp'] = new Date().toISOString();
    this.tokens_before = details.tokens_before;
    this.tokens_after = details.tokens_after;
    this.masked_count = details.masked_count;
    this.total_prunable_tokens = details.total_prunable_tokens;
  }
  toOpenTelemetryAttributes(config: Config): LogAttributes {
    return {
      ...getCommonAttributes(config),
      'event.name': EVENT_TOOL_OUTPUT_MASKING,
      'event.timestamp': this['event.timestamp'],
      tokens_before: this.tokens_before,
      tokens_after: this.tokens_after,
      masked_count: this.masked_count,
      total_prunable_tokens: this.total_prunable_tokens,
    };
  }
  toLogBody(): string {
    return `Tool output masking (Masked ${this.masked_count} tool outputs. Saved ${
      this.tokens_before - this.tokens_after
    } tokens)`;
  }
 }
 export const EVENT_EXTENSION_UNINSTALL = 'gemini_cli.extension_uninstall';
 export class ExtensionUninstallEvent implements BaseTelemetryEvent {
  'event.name': 'extension_uninstall';
@@ -1602,6 +1645,7 @@ export type TelemetryEvent =
  | LlmLoopCheckEvent
  | StartupStatsEvent
  | WebFetchFallbackAttemptEvent
  | ToolOutputMaskingEvent
  | EditStrategyEvent
  | PlanExecutionEvent
  | RewindEvent
@@ -572,6 +572,14 @@ export async function fileExists(filePath: string): Promise<boolean> {
 const MAX_TRUNCATED_LINE_WIDTH = 1000;
 const MAX_TRUNCATED_CHARS = 4000;
 /**
 * Sanitizes a string for use as a filename part by removing path traversal
 * characters and other non-alphanumeric characters.
 */
 export function sanitizeFilenamePart(part: string): string {
  return part.replace(/[^a-zA-Z0-9_-]/g, '_');
 }
 /**
 * Formats a truncated message for tool output, handling multi-line and single-line (elephant) cases.
 */
@@ -623,11 +631,8 @@ export async function saveTruncatedToolOutput(
  id: string | number, // Accept string (callId) or number (truncationId)
  projectTempDir: string,
 ): Promise<{ outputFile: string; totalLines: number }> {
-  const safeToolName = toolName.replace(/[^a-z0-9]/gi, '_').toLowerCase();
+  const safeToolName = sanitizeFilenamePart(toolName).toLowerCase();
-  const safeId = id
+  const safeId = sanitizeFilenamePart(id.toString()).toLowerCase();
    .toString()
    .replace(/[^a-z0-9]/gi, '_')
    .toLowerCase();
  const fileName = `${safeToolName}_${safeId}.txt`;
  const toolOutputDir = path.join(projectTempDir, TOOL_OUTPUT_DIR);
  const outputFile = path.join(toolOutputDir, fileName);
@@ -1428,6 +1428,44 @@
      "default": {},
      "type": "object",
      "properties": {
        "toolOutputMasking": {
          "title": "Tool Output Masking",
          "description": "Advanced settings for tool output masking to manage context window efficiency.",
          "markdownDescription": "Advanced settings for tool output masking to manage context window efficiency.\n\n- Category: `Experimental`\n- Requires restart: `yes`\n- Default: `{}`",
          "default": {},
          "type": "object",
          "properties": {
            "enabled": {
              "title": "Enable Tool Output Masking",
              "description": "Enables tool output masking to save tokens.",
              "markdownDescription": "Enables tool output masking to save tokens.\n\n- Category: `Experimental`\n- Requires restart: `yes`\n- Default: `false`",
              "default": false,
              "type": "boolean"
            },
            "toolProtectionThreshold": {
              "title": "Tool Protection Threshold",
              "description": "Minimum number of tokens to protect from masking (most recent tool outputs).",
              "markdownDescription": "Minimum number of tokens to protect from masking (most recent tool outputs).\n\n- Category: `Experimental`\n- Requires restart: `yes`\n- Default: `50000`",
              "default": 50000,
              "type": "number"
            },
            "minPrunableTokensThreshold": {
              "title": "Min Prunable Tokens Threshold",
              "description": "Minimum prunable tokens required to trigger a masking pass.",
              "markdownDescription": "Minimum prunable tokens required to trigger a masking pass.\n\n- Category: `Experimental`\n- Requires restart: `yes`\n- Default: `30000`",
              "default": 30000,
              "type": "number"
            },
            "protectLatestTurn": {
              "title": "Protect Latest Turn",
              "description": "Ensures the absolute latest turn is never masked, regardless of token count.",
              "markdownDescription": "Ensures the absolute latest turn is never masked, regardless of token count.\n\n- Category: `Experimental`\n- Requires restart: `yes`\n- Default: `true`",
              "default": true,
              "type": "boolean"
            }
          },
          "additionalProperties": false
        },
        "enableAgents": {
          "title": "Enable Agents",
          "description": "Enable local and remote subagents. Warning: Experimental feature, uses YOLO mode for subagents",