feat(core): Unified Context Management and Tool Distillation. (#24157)

2026-06-12 20:37:08 -07:00 · 2026-03-30 15:29:59 -07:00
parent 117a2d3844
commit dfba0e91e2
22 changed files with 1717 additions and 314 deletions
@@ -11,6 +11,7 @@ import { inspect } from 'node:util';
 import process from 'node:process';
 import { z } from 'zod';
 import type { ConversationRecord } from '../services/chatRecordingService.js';
+import type { AgentHistoryProviderConfig } from '../services/types.js';
 export type { ConversationRecord };
 import {
  AuthType,
@@ -204,6 +205,23 @@ export interface OutputSettings {
  format?: OutputFormat;
 }

+export interface ContextManagementConfig {
+  enabled: boolean;
+  historyWindow: {
+    maxTokens: number;
+    retainedTokens: number;
+  };
+  messageLimits: {
+    normalMaxTokens: number;
+    retainedMaxTokens: number;
+    normalizationHeadRatio: number;
+  };
+  toolDistillation: {
+    maxOutputTokens: number;
+    summarizationThresholdTokens: number;
+  };
+}
+
 export interface ToolOutputMaskingConfig {
  enabled: boolean;
  toolProtectionThreshold: number;
@@ -674,6 +692,7 @@ export interface ConfigParameters {
  enableHooks?: boolean;
  enableHooksUI?: boolean;
  experiments?: Experiments;
+  contextManagement?: Partial<ContextManagementConfig>;
  hooks?: { [K in HookEventName]?: HookDefinition[] };
  disabledHooks?: string[];
  projectHooks?: { [K in HookEventName]?: HookDefinition[] };
@@ -683,6 +702,7 @@ export interface ConfigParameters {
  disabledSkills?: string[];
  adminSkillsEnabled?: boolean;
  experimentalJitContext?: boolean;
+  autoDistillation?: boolean;
  experimentalMemoryManager?: boolean;
  experimentalAgentHistoryTruncation?: boolean;
  experimentalAgentHistoryTruncationThreshold?: number;
@@ -919,13 +939,8 @@ export class Config implements McpContext, AgentLoopContext {
  private readonly skillsSupport: boolean;
  private disabledSkills: string[];
  private readonly adminSkillsEnabled: boolean;
-
  private readonly experimentalJitContext: boolean;
  private readonly experimentalMemoryManager: boolean;
-  private readonly experimentalAgentHistoryTruncation: boolean;
-  private readonly experimentalAgentHistoryTruncationThreshold: number;
-  private readonly experimentalAgentHistoryRetainedMessages: number;
-  private readonly experimentalAgentHistorySummarization: boolean;
  private readonly memoryBoundaryMarkers: readonly string[];
  private readonly topicUpdateNarration: boolean;
  private readonly disableLLMCorrection: boolean;
@@ -934,6 +949,7 @@ export class Config implements McpContext, AgentLoopContext {
  private readonly planModeRoutingEnabled: boolean;
  private readonly modelSteering: boolean;
  private contextManager?: ContextManager;
+  private readonly contextManagement: ContextManagementConfig;
  private terminalBackground: string | undefined = undefined;
  private remoteAdminSettings: AdminControlsSettings | undefined;
  private latestApiRequest: GenerateContentParameters | undefined;
@@ -1136,15 +1152,31 @@ export class Config implements McpContext, AgentLoopContext {

    this.experimentalJitContext = params.experimentalJitContext ?? true;
    this.experimentalMemoryManager = params.experimentalMemoryManager ?? false;
-    this.experimentalAgentHistoryTruncation =
-      params.experimentalAgentHistoryTruncation ?? false;
-    this.experimentalAgentHistoryTruncationThreshold =
-      params.experimentalAgentHistoryTruncationThreshold ?? 30;
-    this.experimentalAgentHistoryRetainedMessages =
-      params.experimentalAgentHistoryRetainedMessages ?? 15;
-    this.experimentalAgentHistorySummarization =
-      params.experimentalAgentHistorySummarization ?? false;
    this.memoryBoundaryMarkers = params.memoryBoundaryMarkers ?? ['.git'];
+    this.contextManagement = {
+      enabled: params.contextManagement?.enabled ?? false,
+      historyWindow: {
+        maxTokens: params.contextManagement?.historyWindow?.maxTokens ?? 150000,
+        retainedTokens:
+          params.contextManagement?.historyWindow?.retainedTokens ?? 40000,
+      },
+      messageLimits: {
+        normalMaxTokens:
+          params.contextManagement?.messageLimits?.normalMaxTokens ?? 2500,
+        retainedMaxTokens:
+          params.contextManagement?.messageLimits?.retainedMaxTokens ?? 12000,
+        normalizationHeadRatio:
+          params.contextManagement?.messageLimits?.normalizationHeadRatio ??
+          0.25,
+      },
+      toolDistillation: {
+        maxOutputTokens:
+          params.contextManagement?.toolDistillation?.maxOutputTokens ?? 10000,
+        summarizationThresholdTokens:
+          params.contextManagement?.toolDistillation
+            ?.summarizationThresholdTokens ?? 20000,
+      },
+    };
    this.topicUpdateNarration = params.topicUpdateNarration ?? false;
    this.modelSteering = params.modelSteering ?? false;
    this.injectionService = new InjectionService(() =>
@@ -2330,6 +2362,10 @@ export class Config implements McpContext, AgentLoopContext {
    return this.experimentalJitContext;
  }

+  isAutoDistillationEnabled(): boolean {
+    return this.contextManagement.enabled;
+  }
+
  getMemoryBoundaryMarkers(): readonly string[] {
    return this.memoryBoundaryMarkers;
  }
@@ -2338,20 +2374,22 @@ export class Config implements McpContext, AgentLoopContext {
    return this.experimentalMemoryManager;
  }

-  isExperimentalAgentHistoryTruncationEnabled(): boolean {
-    return this.experimentalAgentHistoryTruncation;
+  getContextManagementConfig(): ContextManagementConfig {
+    return this.contextManagement;
  }

-  getExperimentalAgentHistoryTruncationThreshold(): number {
-    return this.experimentalAgentHistoryTruncationThreshold;
-  }
-
-  getExperimentalAgentHistoryRetainedMessages(): number {
-    return this.experimentalAgentHistoryRetainedMessages;
-  }
-
-  isExperimentalAgentHistorySummarizationEnabled(): boolean {
-    return this.experimentalAgentHistorySummarization;
+  get agentHistoryProviderConfig(): AgentHistoryProviderConfig {
+    return {
+      isTruncationEnabled: this.contextManagement.enabled,
+      isSummarizationEnabled: this.contextManagement.enabled,
+      maxTokens: this.contextManagement.historyWindow.maxTokens,
+      retainedTokens: this.contextManagement.historyWindow.retainedTokens,
+      normalMessageTokens: this.contextManagement.messageLimits.normalMaxTokens,
+      maximumMessageTokens:
+        this.contextManagement.messageLimits.retainedMaxTokens,
+      normalizationHeadRatio:
+        this.contextManagement.messageLimits.normalizationHeadRatio,
+    };
  }

  isTopicUpdateNarrationEnabled(): boolean {
@@ -3241,6 +3279,14 @@ export class Config implements McpContext, AgentLoopContext {
    );
  }

+  getToolMaxOutputTokens(): number {
+    return this.contextManagement.toolDistillation.maxOutputTokens;
+  }
+
+  getToolSummarizationThresholdTokens(): number {
+    return this.contextManagement.toolDistillation.summarizationThresholdTokens;
+  }
+
  getNextCompressionTruncationId(): number {
    return ++this.compressionTruncationCounter;
  }
@@ -279,16 +279,9 @@ describe('Gemini Client (client.ts)', () => {
      getActiveModel: vi.fn().mockReturnValue('test-model'),
      setActiveModel: vi.fn(),
      resetTurn: vi.fn(),
-      isExperimentalAgentHistoryTruncationEnabled: vi
-        .fn()
-        .mockReturnValue(false),
-      getExperimentalAgentHistoryTruncationThreshold: vi
-        .fn()
-        .mockReturnValue(30),
-      getExperimentalAgentHistoryRetainedMessages: vi.fn().mockReturnValue(15),
-      isExperimentalAgentHistorySummarizationEnabled: vi
-        .fn()
-        .mockReturnValue(false),
+
+      isAutoDistillationEnabled: vi.fn().mockReturnValue(false),
+      getContextManagementConfig: vi.fn().mockReturnValue({ enabled: false }),
      getModelAvailabilityService: vi
        .fn()
        .mockReturnValue(createAvailabilityServiceMock()),
@@ -716,9 +709,9 @@ describe('Gemini Client (client.ts)', () => {
  describe('sendMessageStream', () => {
    it('calls AgentHistoryProvider.manageHistory when history truncation is enabled', async () => {
      // Arrange
-      mockConfig.isExperimentalAgentHistoryTruncationEnabled = vi
+      mockConfig.getContextManagementConfig = vi
        .fn()
-        .mockReturnValue(true);
+        .mockReturnValue({ enabled: true });
      const manageHistorySpy = vi
        .spyOn(
          // eslint-disable-next-line @typescript-eslint/no-explicit-any
@@ -115,12 +115,10 @@ export class GeminiClient {
  constructor(private readonly context: AgentLoopContext) {
    this.loopDetector = new LoopDetectionService(this.config);
    this.compressionService = new ChatCompressionService();
-    this.agentHistoryProvider = new AgentHistoryProvider(this.config, {
-      truncationThreshold:
-        this.config.getExperimentalAgentHistoryTruncationThreshold(),
-      retainedMessages:
-        this.config.getExperimentalAgentHistoryRetainedMessages(),
-    });
+    this.agentHistoryProvider = new AgentHistoryProvider(
+      this.config.agentHistoryProviderConfig,
+      this.config,
+    );
    this.toolOutputMaskingService = new ToolOutputMaskingService();
    this.lastPromptId = this.config.getSessionId();

@@ -621,7 +619,7 @@ export class GeminiClient {
    // Check for context window overflow
    const modelForLimitCheck = this._getActiveModelForCurrentTurn();

-    if (this.config.isExperimentalAgentHistoryTruncationEnabled()) {
+    if (this.config.getContextManagementConfig().enabled) {
      const newHistory = await this.agentHistoryProvider.manageHistory(
        this.getHistory(),
        signal,
@@ -75,6 +75,7 @@ function createMockConfig(overrides: Partial<Config> = {}): Config {
      ({
        check: async () => ({ decision: 'allow' }),
      }) as unknown as PolicyEngine,
+    isAutoDistillationEnabled: () => false,
  } as unknown as Config;

  const mockConfig = Object.assign({}, baseConfig, overrides) as Config;
@@ -75,6 +75,7 @@ describe('ToolExecutor', () => {
    vi.mocked(fileUtils.formatTruncatedToolOutput).mockReturnValue(
      'TruncatedContent...',
    );
+    vi.spyOn(config, 'isAutoDistillationEnabled').mockReturnValue(false);
  });

  afterEach(() => {
@@ -19,6 +19,7 @@ import {
 import { isAbortError } from '../utils/errors.js';
 import { SHELL_TOOL_NAME } from '../tools/tool-names.js';
 import { DiscoveredMCPTool } from '../tools/mcp-tool.js';
+import { ToolOutputDistillationService } from '../services/toolDistillationService.js';
 import { executeToolWithHooks } from '../core/coreToolHookTriggers.js';
 import {
  saveTruncatedToolOutput,
@@ -196,6 +197,15 @@ export class ToolExecutor {
    call: ToolCall,
    content: PartListUnion,
  ): Promise<{ truncatedContent: PartListUnion; outputFile?: string }> {
+    if (this.config.isAutoDistillationEnabled()) {
+      const distiller = new ToolOutputDistillationService(
+        this.config,
+        this.context.geminiClient,
+        this.context.promptId,
+      );
+      return distiller.distill(call.request.name, call.request.callId, content);
+    }
+
    const toolName = call.request.name;
    const callId = call.request.callId;
    let outputFile: string | undefined;
@@ -1,17 +0,0 @@
-// Vitest Snapshot v1, https://vitest.dev/guide/snapshot.html
-
-exports[`AgentHistoryProvider > should handle summarizer failures gracefully 1`] = `
-{
-  "parts": [
-    {
-      "text": "[System Note: Prior conversation history was truncated. The most recent user message before truncation was:]
-
-Message 18",
-    },
-    {
-      "text": "Message 20",
-    },
-  ],
-  "role": "user",
-}
-`;
@@ -6,13 +6,28 @@

 import { describe, it, expect, vi, beforeEach } from 'vitest';
 import { AgentHistoryProvider } from './agentHistoryProvider.js';
-import type { Content, GenerateContentResponse } from '@google/genai';
-import type { Config } from '../config/config.js';
+import { estimateTokenCountSync } from '../utils/tokenCalculation.js';
+
+vi.mock('../utils/tokenCalculation.js', () => ({
+  estimateTokenCountSync: vi.fn(),
+  ASCII_TOKENS_PER_CHAR: 0.25,
+  NON_ASCII_TOKENS_PER_CHAR: 1.3,
+}));
+
+import type { Content, GenerateContentResponse, Part } from '@google/genai';
+import type { Config, ContextManagementConfig } from '../config/config.js';
 import type { BaseLlmClient } from '../core/baseLlmClient.js';
+import type { AgentHistoryProviderConfig } from './types.js';
+import {
+  TEXT_TRUNCATION_PREFIX,
+  TOOL_TRUNCATION_PREFIX,
+  truncateProportionally,
+} from 'src/utils/truncation.js';

 describe('AgentHistoryProvider', () => {
  let config: Config;
  let provider: AgentHistoryProvider;
+  let providerConfig: AgentHistoryProviderConfig;
  let generateContentMock: ReturnType<typeof vi.fn>;

  beforeEach(() => {
@@ -20,12 +35,14 @@ describe('AgentHistoryProvider', () => {
      isExperimentalAgentHistoryTruncationEnabled: vi
        .fn()
        .mockReturnValue(false),
-      isExperimentalAgentHistorySummarizationEnabled: vi
-        .fn()
-        .mockReturnValue(false),
+      getContextManagementConfig: vi.fn().mockReturnValue(false),
      getBaseLlmClient: vi.fn(),
    } as unknown as Config;

+    // By default, messages are small
+    vi.mocked(estimateTokenCountSync).mockImplementation(
+      (parts: Part[]) => parts.length * 100,
+    );
    generateContentMock = vi.fn().mockResolvedValue({
      candidates: [{ content: { parts: [{ text: 'Mock intent summary' }] } }],
    } as unknown as GenerateContentResponse);
@@ -33,11 +50,16 @@ describe('AgentHistoryProvider', () => {
    config.getBaseLlmClient = vi.fn().mockReturnValue({
      generateContent: generateContentMock,
    } as unknown as BaseLlmClient);
-
-    provider = new AgentHistoryProvider(config, {
-      truncationThreshold: 30,
-      retainedMessages: 15,
-    });
+    providerConfig = {
+      maxTokens: 60000,
+      retainedTokens: 40000,
+      normalMessageTokens: 2500,
+      maximumMessageTokens: 10000,
+      normalizationHeadRatio: 0.2,
+      isSummarizationEnabled: false,
+      isTruncationEnabled: false,
+    };
+    provider = new AgentHistoryProvider(providerConfig, config);
  });

  const createMockHistory = (count: number): Content[] =>
@@ -47,10 +69,7 @@ describe('AgentHistoryProvider', () => {
    }));

  it('should return history unchanged if truncation is disabled', async () => {
-    vi.spyOn(
-      config,
-      'isExperimentalAgentHistoryTruncationEnabled',
-    ).mockReturnValue(false);
+    providerConfig.isTruncationEnabled = false;

    const history = createMockHistory(40);
    const result = await provider.manageHistory(history);
@@ -60,10 +79,7 @@ describe('AgentHistoryProvider', () => {
  });

  it('should return history unchanged if length is under threshold', async () => {
-    vi.spyOn(
-      config,
-      'isExperimentalAgentHistoryTruncationEnabled',
-    ).mockReturnValue(true);
+    providerConfig.isTruncationEnabled = true;

    const history = createMockHistory(20); // Threshold is 30
    const result = await provider.manageHistory(history);
@@ -72,60 +88,72 @@ describe('AgentHistoryProvider', () => {
    expect(result.length).toBe(20);
  });

-  it('should truncate mechanically to RETAINED_MESSAGES without summarization when sum flag is off', async () => {
-    vi.spyOn(
-      config,
-      'isExperimentalAgentHistoryTruncationEnabled',
-    ).mockReturnValue(true);
-    vi.spyOn(
-      config,
-      'isExperimentalAgentHistorySummarizationEnabled',
-    ).mockReturnValue(false);
+  it('should truncate when total tokens exceed budget, preserving structural integrity', async () => {
+    providerConfig.isTruncationEnabled = true;
+    providerConfig.maxTokens = 60000;
+    providerConfig.retainedTokens = 60000;
+    vi.spyOn(config, 'getContextManagementConfig').mockReturnValue({
+      enabled: false,
+    } as unknown as ContextManagementConfig);

-    const history = createMockHistory(35); // Above 30 threshold, should truncate to 15
+    // Make each message cost 4000 tokens
+    vi.mocked(estimateTokenCountSync).mockImplementation(
+      (parts: Part[]) => parts.length * 4000,
+    );
+    const history = createMockHistory(35); // 35 * 4000 = 140,000 total tokens > maxTokens
    const result = await provider.manageHistory(history);
+    // Budget = 60000. Each message costs 4000. 60000 / 4000 = 15.
+    // However, some messages get normalized.
+    // The grace period is 15 messages. Their target is MAXIMUM_MESSAGE_TOKENS (10000).
+    // So the 15 newest messages remain at 4000 tokens each.
+    // That's 15 * 4000 = 60000 tokens EXACTLY!
+    // The next older message will push it over budget.
+    // So EXACTLY 15 messages will be retained.
+    // If the 15th newest message is a user message with a functionResponse, it might pull in the model call.
+    // In our createMockHistory, we don't use functionResponses.

    expect(result.length).toBe(15);
    expect(generateContentMock).not.toHaveBeenCalled();

-    // Check fallback message logic
-    // Messages 20 to 34 are retained. Message 20 is 'user'.
    expect(result[0].role).toBe('user');
    expect(result[0].parts![0].text).toContain(
-      'System Note: Prior conversation history was truncated',
+      '### [System Note: Conversation History Truncated]',
    );
  });

  it('should call summarizer and prepend summary when summarization is enabled', async () => {
-    vi.spyOn(
-      config,
-      'isExperimentalAgentHistoryTruncationEnabled',
-    ).mockReturnValue(true);
-    vi.spyOn(
-      config,
-      'isExperimentalAgentHistorySummarizationEnabled',
-    ).mockReturnValue(true);
+    providerConfig.isTruncationEnabled = true;
+    providerConfig.isSummarizationEnabled = true;
+    providerConfig.maxTokens = 60000;
+    providerConfig.retainedTokens = 60000;
+    vi.spyOn(config, 'getContextManagementConfig').mockReturnValue({
+      enabled: true,
+    } as unknown as ContextManagementConfig);

+    vi.mocked(estimateTokenCountSync).mockImplementation(
+      (parts: Part[]) => parts.length * 4000,
+    );
    const history = createMockHistory(35);
    const result = await provider.manageHistory(history);

    expect(generateContentMock).toHaveBeenCalled();
-    expect(result.length).toBe(15); // retained messages
+    expect(result.length).toBe(15);
    expect(result[0].role).toBe('user');
    expect(result[0].parts![0].text).toContain('<intent_summary>');
    expect(result[0].parts![0].text).toContain('Mock intent summary');
  });

  it('should handle summarizer failures gracefully', async () => {
-    vi.spyOn(
-      config,
-      'isExperimentalAgentHistoryTruncationEnabled',
-    ).mockReturnValue(true);
-    vi.spyOn(
-      config,
-      'isExperimentalAgentHistorySummarizationEnabled',
-    ).mockReturnValue(true);
-
+    providerConfig.isTruncationEnabled = true;
+    providerConfig.isSummarizationEnabled = true;
+    providerConfig.maxTokens = 60000;
+    providerConfig.retainedTokens = 60000;
+    vi.spyOn(config, 'getContextManagementConfig').mockReturnValue({
+      enabled: true,
+    } as unknown as ContextManagementConfig);
+    vi.mocked(estimateTokenCountSync).mockImplementation(
+      (parts: Part[]) => parts.length * 4000,
+    );
    generateContentMock.mockRejectedValue(new Error('API Error'));

    const history = createMockHistory(35);
@@ -133,6 +161,346 @@ describe('AgentHistoryProvider', () => {

    expect(generateContentMock).toHaveBeenCalled();
    expect(result.length).toBe(15);
-    expect(result[0]).toMatchSnapshot();
+    // Should fallback to fallback text
+    expect(result[0].parts![0].text).toContain(
+      '[System Note: Conversation History Truncated]',
+    );
+  });
+
+  it('should pass the contextual bridge to the summarizer', async () => {
+    providerConfig.isTruncationEnabled = true;
+    providerConfig.isSummarizationEnabled = true;
+    vi.spyOn(config, 'getContextManagementConfig').mockReturnValue({
+      enabled: true,
+    } as unknown as ContextManagementConfig);
+
+    // Max tokens 30 means if total tokens > 30, it WILL truncate.
+    providerConfig.maxTokens = 30;
+    // budget 20 tokens means it will keep 2 messages if they are 10 each.
+    providerConfig.retainedTokens = 20;
+
+    vi.mocked(estimateTokenCountSync).mockImplementation(
+      (parts: Part[]) => parts.length * 10,
+    );
+    const history: Content[] = [
+      { role: 'user', parts: [{ text: 'Old Message' }] },
+      { role: 'model', parts: [{ text: 'Old Response' }] },
+      { role: 'user', parts: [{ text: 'Keep 1' }] },
+      { role: 'user', parts: [{ text: 'Keep 2' }] },
+    ];
+
+    await provider.manageHistory(history);
+
+    expect(generateContentMock).toHaveBeenCalled();
+    const callArgs = generateContentMock.mock.calls[0][0];
+    const prompt = callArgs.contents[0].parts[0].text;
+
+    expect(prompt).toContain('ACTIVE BRIDGE (LOOKAHEAD):');
+    expect(prompt).toContain('Keep 1');
+    expect(prompt).toContain('Keep 2');
+  });
+
+  it('should detect a previous summary in the truncated head', async () => {
+    providerConfig.isTruncationEnabled = true;
+    providerConfig.isSummarizationEnabled = true;
+    vi.spyOn(config, 'getContextManagementConfig').mockReturnValue({
+      enabled: true,
+    } as unknown as ContextManagementConfig);
+
+    providerConfig.maxTokens = 20;
+    providerConfig.retainedTokens = 10;
+
+    vi.mocked(estimateTokenCountSync).mockImplementation(
+      (parts: Part[]) => parts.length * 10,
+    );
+    const history: Content[] = [
+      {
+        role: 'user',
+        parts: [{ text: '<intent_summary>Previous Mandate</intent_summary>' }],
+      },
+      { role: 'model', parts: [{ text: 'Work' }] },
+      { role: 'user', parts: [{ text: 'New Work' }] },
+    ];
+
+    await provider.manageHistory(history);
+
+    expect(generateContentMock).toHaveBeenCalled();
+    const callArgs = generateContentMock.mock.calls[0][0];
+    const prompt = callArgs.contents[0].parts[0].text;
+
+    expect(prompt).toContain('1. **Previous Summary:**');
+    expect(prompt).toContain('PREVIOUS SUMMARY AND TRUNCATED HISTORY:');
+  });
+
+  it('should include the Action Path (necklace of function names) in the prompt', async () => {
+    providerConfig.isTruncationEnabled = true;
+    providerConfig.isSummarizationEnabled = true;
+    vi.spyOn(config, 'getContextManagementConfig').mockReturnValue({
+      enabled: true,
+    } as unknown as ContextManagementConfig);
+
+    providerConfig.maxTokens = 20;
+    providerConfig.retainedTokens = 10;
+
+    vi.mocked(estimateTokenCountSync).mockImplementation(
+      (parts: Part[]) => parts.length * 10,
+    );
+    const history: Content[] = [
+      {
+        role: 'model',
+        parts: [
+          { functionCall: { name: 'tool_a', args: {} } },
+          { functionCall: { name: 'tool_b', args: {} } },
+        ],
+      },
+      { role: 'user', parts: [{ text: 'Keep' }] },
+    ];
+
+    await provider.manageHistory(history);
+
+    expect(generateContentMock).toHaveBeenCalled();
+    const callArgs = generateContentMock.mock.calls[0][0];
+    const prompt = callArgs.contents[0].parts[0].text;
+
+    expect(prompt).toContain('The Action Path:');
+    expect(prompt).toContain('tool_a → tool_b');
+  });
+
+  describe('Tiered Normalization Logic', () => {
+    it('normalizes large messages incrementally: newest and exit-grace', async () => {
+      providerConfig.isTruncationEnabled = true;
+      providerConfig.retainedTokens = 30000;
+      providerConfig.maximumMessageTokens = 10000;
+      providerConfig.normalMessageTokens = 2500; // History of 35 messages.
+      // Index 34: Newest (Grace Zone) -> Target 10000 tokens (~40000 chars)
+      // Index 19: Exit Grace (35-1-15=19) -> Target 2500 tokens (~10000 chars)
+      // Index 10: Archived -> Should NOT be normalized in this turn (Incremental optimization)
+      const history = createMockHistory(35);
+      const hugeText = 'H'.repeat(100000);
+
+      history[34] = { role: 'user', parts: [{ text: hugeText }] };
+      history[19] = { role: 'model', parts: [{ text: hugeText }] };
+      history[10] = { role: 'user', parts: [{ text: hugeText }] };
+
+      // Mock token count to trigger normalization (100k chars = 25k tokens @ 4 chars/token)
+      vi.mocked(estimateTokenCountSync).mockImplementation((parts: Part[]) => {
+        if (!parts?.[0]) return 10;
+        const text = parts[0].text || '';
+        if (text.startsWith('H')) return 25000;
+        return 10;
+      });
+
+      const result = await provider.manageHistory(history);
+
+      // 1. Newest message (index 34) normalized to ~40000 chars
+      const normalizedLast = result[34].parts![0].text!;
+      expect(normalizedLast).toContain(TEXT_TRUNCATION_PREFIX);
+      expect(normalizedLast.length).toBeLessThan(50000);
+      expect(normalizedLast.length).toBeGreaterThan(30000);
+
+      // 2. Exit grace message (index 19) normalized to ~10000 chars
+      const normalizedArchived = result[19].parts![0].text!;
+      expect(normalizedArchived).toContain(TEXT_TRUNCATION_PREFIX);
+      expect(normalizedArchived.length).toBeLessThan(15000);
+      expect(normalizedArchived.length).toBeGreaterThan(8000);
+
+      // 3. Archived message (index 10) IS touched and normalized to ~10000 chars
+      const normalizedPastArchived = result[10].parts![0].text!;
+      expect(normalizedPastArchived).toContain(TEXT_TRUNCATION_PREFIX);
+      expect(normalizedPastArchived.length).toBeLessThan(15000);
+      expect(normalizedPastArchived.length).toBeGreaterThan(8000);
+    });
+
+    it('normalize function responses correctly by targeting large string values', async () => {
+      providerConfig.isTruncationEnabled = true;
+      providerConfig.maximumMessageTokens = 1000;
+
+      const hugeValue = 'O'.repeat(5000);
+      const history: Content[] = [
+        {
+          role: 'user',
+          parts: [
+            {
+              functionResponse: {
+                name: 'test_tool',
+                id: '1',
+                response: {
+                  stdout: hugeValue,
+                  stderr: 'small error',
+                  exitCode: 0,
+                },
+              },
+            },
+          ],
+        },
+      ];
+
+      vi.mocked(estimateTokenCountSync).mockImplementation(
+        (parts: readonly Part[]) => {
+          if (parts?.[0]?.functionResponse) return 5000;
+          return 10;
+        },
+      );
+
+      const result = await provider.manageHistory(history);
+
+      const fr = result[0].parts![0].functionResponse!;
+      const resp = fr.response as Record<string, unknown>;
+
+      // stdout should be truncated
+      expect(resp['stdout']).toContain(TOOL_TRUNCATION_PREFIX);
+      expect((resp['stdout'] as string).length).toBeLessThan(hugeValue.length);
+
+      // stderr and exitCode should be PRESERVED (JSON integrity)
+      expect(resp['stderr']).toBe('small error');
+      expect(resp['exitCode']).toBe(0);
+
+      // Schema should be intact
+      expect(fr.name).toBe('test_tool');
+      expect(fr.id).toBe('1');
+    });
+  });
+
+  describe('truncateProportionally', () => {
+    it('returns original string if under target chars', () => {
+      const str = 'A'.repeat(50);
+      expect(truncateProportionally(str, 100, TEXT_TRUNCATION_PREFIX)).toBe(
+        str,
+      );
+    });
+
+    it('truncates proportionally with prefix and ellipsis', () => {
+      const str = 'A'.repeat(500) + 'B'.repeat(500); // 1000 chars
+      const target = 100;
+      const result = truncateProportionally(
+        str,
+        target,
+        TEXT_TRUNCATION_PREFIX,
+      );
+
+      expect(result.startsWith(TEXT_TRUNCATION_PREFIX)).toBe(true);
+      expect(result).toContain('\n...\n');
+
+      // The prefix and ellipsis take up some space
+      // It should keep ~20% head and ~80% tail of the *available* space
+      const ellipsis = '\n...\n';
+      const overhead = TEXT_TRUNCATION_PREFIX.length + ellipsis.length + 1; // +1 for the newline after prefix
+      const availableChars = Math.max(0, target - overhead);
+      const expectedHeadChars = Math.floor(availableChars * 0.2);
+      const expectedTailChars = availableChars - expectedHeadChars;
+
+      // Extract parts around the ellipsis
+      const parts = result.split(ellipsis);
+      expect(parts.length).toBe(2);
+
+      // Remove prefix + newline from the first part to check head length
+      const actualHead = parts[0].replace(TEXT_TRUNCATION_PREFIX + '\n', '');
+      const actualTail = parts[1];
+
+      expect(actualHead.length).toBe(expectedHeadChars);
+      expect(actualTail.length).toBe(expectedTailChars);
+    });
+
+    it('handles very small targets gracefully by just returning prefix', () => {
+      const str = 'A'.repeat(100);
+      const result = truncateProportionally(str, 10, TEXT_TRUNCATION_PREFIX);
+      expect(result).toBe(TEXT_TRUNCATION_PREFIX);
+    });
+  });
+
+  describe('Multi-part Proportional Normalization', () => {
+    it('distributes token budget proportionally across multiple large parts', async () => {
+      providerConfig.isTruncationEnabled = true;
+      providerConfig.maximumMessageTokens = 2500; // Small limit to trigger normalization on last msg
+
+      const history = createMockHistory(35);
+
+      // Make newest message (index 34) have two large parts
+      // Part 1: 10000 chars (~2500 tokens at 4 chars/token)
+      // Part 2: 30000 chars (~7500 tokens at 4 chars/token)
+      // Total tokens = 10000. Target = 2500. Ratio = 0.25.
+      const part1Text = 'A'.repeat(10000);
+      const part2Text = 'B'.repeat(30000);
+
+      history[34] = {
+        role: 'user',
+        parts: [{ text: part1Text }, { text: part2Text }],
+      };
+
+      vi.mocked(estimateTokenCountSync).mockImplementation(
+        (parts: readonly Part[]) => {
+          if (!parts || parts.length === 0) return 0;
+          let tokens = 0;
+          for (const p of parts) {
+            if (p.text?.startsWith('A')) tokens += 2500;
+            else if (p.text?.startsWith('B')) tokens += 7500;
+            else tokens += 10;
+          }
+          return tokens;
+        },
+      );
+
+      const result = await provider.manageHistory(history);
+
+      const normalizedMsg = result[34];
+      expect(normalizedMsg.parts!.length).toBe(2);
+
+      const p1 = normalizedMsg.parts![0].text!;
+      const p2 = normalizedMsg.parts![1].text!;
+
+      expect(p1).toContain(TEXT_TRUNCATION_PREFIX);
+      expect(p2).toContain(TEXT_TRUNCATION_PREFIX);
+
+      // Part 1: Target chars ~ 2500 * 0.25 * 4 = 2500
+      // Part 2: Target chars ~ 7500 * 0.25 * 4 = 7500
+      expect(p1.length).toBeLessThan(3500);
+      expect(p2.length).toBeLessThan(9000);
+      expect(p1.length).toBeLessThan(p2.length);
+    });
+
+    it('preserves small parts while truncating large parts in the same message', async () => {
+      providerConfig.isTruncationEnabled = true;
+      providerConfig.maximumMessageTokens = 2500;
+
+      const history = createMockHistory(35);
+
+      const smallText = 'Hello I am small';
+      const hugeText = 'B'.repeat(40000); // 10000 tokens
+
+      history[34] = {
+        role: 'user',
+        parts: [{ text: smallText }, { text: hugeText }],
+      };
+
+      vi.mocked(estimateTokenCountSync).mockImplementation(
+        (parts: readonly Part[]) => {
+          if (!parts || parts.length === 0) return 0;
+          let tokens = 0;
+          for (const p of parts) {
+            if (p.text === smallText) tokens += 10;
+            else if (p.text?.startsWith('B')) tokens += 10000;
+            else tokens += 10;
+          }
+          return tokens;
+        },
+      );
+
+      const result = await provider.manageHistory(history);
+
+      const normalizedMsg = result[34];
+      expect(normalizedMsg.parts!.length).toBe(2);
+
+      const p1 = normalizedMsg.parts![0].text!;
+      const p2 = normalizedMsg.parts![1].text!;
+
+      // Small part should be preserved
+      expect(p1).toBe(smallText);
+
+      // Huge part should be truncated
+      expect(p2).toContain(TEXT_TRUNCATION_PREFIX);
+      // Target tokens for huge part = ~2500 * (10000/10010) = ~2500
+      // Target chars = ~10000
+      expect(p2.length).toBeLessThan(12000);
+    });
  });
 });
@@ -4,21 +4,27 @@
 * SPDX-License-Identifier: Apache-2.0
 */

-import type { Content } from '@google/genai';
-import type { Config } from '../config/config.js';
+import type { Content, Part } from '@google/genai';
 import { getResponseText } from '../utils/partUtils.js';
+import { estimateTokenCountSync } from '../utils/tokenCalculation.js';
 import { LlmRole } from '../telemetry/llmRole.js';
 import { debugLogger } from '../utils/debugLogger.js';
-
-export interface AgentHistoryProviderConfig {
-  truncationThreshold: number;
-  retainedMessages: number;
-}
+import type { AgentHistoryProviderConfig } from './types.js';
+import type { Config } from '../config/config.js';
+import {
+  MIN_TARGET_TOKENS,
+  MIN_CHARS_FOR_TRUNCATION,
+  TEXT_TRUNCATION_PREFIX,
+  estimateCharsFromTokens,
+  truncateProportionally,
+  normalizeFunctionResponse,
+} from '../utils/truncation.js';

 export class AgentHistoryProvider {
+  // TODO(joshualitt): just pass the BaseLlmClient instead of the whole Config.
  constructor(
-    private readonly config: Config,
    private readonly providerConfig: AgentHistoryProviderConfig,
+    private readonly config: Config,
  ) {}

  /**
@@ -29,12 +35,29 @@ export class AgentHistoryProvider {
    history: readonly Content[],
    abortSignal?: AbortSignal,
  ): Promise<readonly Content[]> {
-    if (!this.shouldTruncate(history)) {
+    if (!this.providerConfig.isTruncationEnabled || history.length === 0) {
      return history;
    }

+    // Step 1: Normalize newest messages.
+    const normalizedHistory = this.enforceMessageSizeLimits(history);
+
+    const totalTokens = estimateTokenCountSync(
+      normalizedHistory.flatMap((c) => c.parts || []),
+    );
+
+    // Step 2: Check if truncation is needed based on the token threshold (High Watermark)
+    if (totalTokens <= this.providerConfig.maxTokens) {
+      return normalizedHistory;
+    }
+
+    // Step 3: Split into keep/truncate boundaries
    const { messagesToKeep, messagesToTruncate } =
-      this.splitHistoryForTruncation(history);
+      this.splitHistoryForTruncation(normalizedHistory);
+
+    if (messagesToTruncate.length === 0) {
+      return messagesToKeep;
+    }

    debugLogger.log(
      `AgentHistoryProvider: Truncating ${messagesToTruncate.length} messages, retaining ${messagesToKeep.length} messages.`,
@@ -42,65 +65,230 @@ export class AgentHistoryProvider {

    const summaryText = await this.getSummaryText(
      messagesToTruncate,
+      messagesToKeep,
      abortSignal,
    );

    return this.mergeSummaryWithHistory(summaryText, messagesToKeep);
  }

-  private shouldTruncate(history: readonly Content[]): boolean {
-    if (!this.config.isExperimentalAgentHistoryTruncationEnabled()) {
-      return false;
+  /**
+   * Enforces message size limits on the most recent message and the message
+   * that just exited the grace zone.
+   * - Recent messages have a high MAXIMUM limit.
+   * - Older messages (already processed) are restricted to the NORMAL limit
+   *   once they exit the grace period.
+   */
+  private enforceMessageSizeLimits(
+    history: readonly Content[],
+  ): readonly Content[] {
+    if (history.length === 0) return history;
+
+    let hasChanges = false;
+    let accumulatedTokens = 0;
+
+    // Scan backwards to find the index where the token budget is exhausted
+    let graceStartIndex = 0;
+    for (let i = history.length - 1; i >= 0; i--) {
+      const msgTokens = estimateTokenCountSync(history[i].parts || []);
+      accumulatedTokens += msgTokens;
+      if (accumulatedTokens > this.providerConfig.retainedTokens) {
+        graceStartIndex = i + 1;
+        break;
+      }
    }
-    return history.length > this.providerConfig.truncationThreshold;
+
+    const newHistory = history.map((msg, i) => {
+      const targetTokens =
+        i < graceStartIndex
+          ? this.providerConfig.normalMessageTokens
+          : this.providerConfig.maximumMessageTokens;
+
+      const normalizedMsg = this.normalizeMessage(msg, targetTokens);
+      if (normalizedMsg !== msg) {
+        hasChanges = true;
+      }
+      return normalizedMsg;
+    });
+
+    return hasChanges ? newHistory : history;
  }

+  /**
+   * Normalizes a message by proportionally masking its text or function response
+   * if its total token count exceeds the target token limit.
+   */
+  private normalizeMessage(msg: Content, targetTokens: number): Content {
+    const currentTokens = estimateTokenCountSync(msg.parts || []);
+    if (currentTokens <= targetTokens) {
+      return msg;
+    }
+
+    // Calculate the compression ratio to apply to all large parts
+    const ratio = targetTokens / currentTokens;
+
+    // Proportional compression of the parts to fit the targetTokens budget
+    // while maintaining API structure (never dropping a part completely).
+    const newParts: Part[] = [];
+    for (const part of msg.parts || []) {
+      if (part.text) {
+        const partTokens = estimateTokenCountSync([part]);
+        const targetPartTokens = Math.max(
+          MIN_TARGET_TOKENS,
+          Math.floor(partTokens * ratio),
+        );
+        const targetChars = estimateCharsFromTokens(
+          part.text,
+          targetPartTokens,
+        );
+
+        if (
+          part.text.length > targetChars &&
+          targetChars > MIN_CHARS_FOR_TRUNCATION
+        ) {
+          const newText = truncateProportionally(
+            part.text,
+            targetChars,
+            TEXT_TRUNCATION_PREFIX,
+            this.providerConfig.normalizationHeadRatio,
+          );
+          newParts.push({ text: newText });
+        } else {
+          newParts.push(part);
+        }
+      } else if (part.functionResponse) {
+        newParts.push(
+          normalizeFunctionResponse(
+            part,
+            ratio,
+            this.providerConfig.normalizationHeadRatio,
+          ),
+        );
+      } else {
+        newParts.push(part);
+      }
+    }
+
+    return { ...msg, parts: newParts };
+  }
+
+  /**
+   * Determines the boundary for splitting history based on the token budget,
+   * keeping recent messages under a specific target token threshold,
+   * while ensuring structural integrity (e.g. keeping functionCall/functionResponse pairs).
+   */
  private splitHistoryForTruncation(history: readonly Content[]): {
    messagesToKeep: readonly Content[];
    messagesToTruncate: readonly Content[];
  } {
+    let accumulatedTokens = 0;
+    let truncationBoundary = 0; // The index of the first message to keep
+
+    // Scan backwards to calculate the boundary based on token budget
+    for (let i = history.length - 1; i >= 0; i--) {
+      const msg = history[i];
+      const msgTokens = estimateTokenCountSync(msg.parts || []);
+
+      // Token Budget
+      if (accumulatedTokens + msgTokens > this.providerConfig.retainedTokens) {
+        // Exceeded budget, stop retaining messages here.
+        truncationBoundary = i + 1;
+        break;
+      }
+
+      accumulatedTokens += msgTokens;
+    }
+
+    // Ensure structural integrity of the boundary
+    truncationBoundary = this.adjustBoundaryForIntegrity(
+      history,
+      truncationBoundary,
+    );
+
+    const messagesToKeep = history.slice(truncationBoundary);
+    const messagesToTruncate = history.slice(0, truncationBoundary);
+
    return {
-      messagesToKeep: history.slice(-this.providerConfig.retainedMessages),
-      messagesToTruncate: history.slice(
-        0,
-        history.length - this.providerConfig.retainedMessages,
-      ),
+      messagesToKeep,
+      messagesToTruncate,
    };
  }

+  /**
+   * Adjusts the truncation boundary backwards to prevent breaking functionCall/functionResponse pairs.
+   */
+  private adjustBoundaryForIntegrity(
+    history: readonly Content[],
+    boundary: number,
+  ): number {
+    let currentBoundary = boundary;
+    // Ensure we don't start at index 0 or out of bounds.
+    if (currentBoundary <= 0 || currentBoundary >= history.length) {
+      return currentBoundary;
+    }
+
+    while (
+      currentBoundary > 0 &&
+      currentBoundary < history.length &&
+      history[currentBoundary].role === 'user' &&
+      history[currentBoundary].parts?.some((p) => p.functionResponse) &&
+      history[currentBoundary - 1].role === 'model' &&
+      history[currentBoundary - 1].parts?.some((p) => p.functionCall)
+    ) {
+      currentBoundary--; // Include the functionCall in the retained history
+    }
+    return currentBoundary;
+  }
+
  private getFallbackSummaryText(
    messagesToTruncate: readonly Content[],
  ): string {
-    const defaultNote =
-      'System Note: Prior conversation history was truncated to maintain performance and focus. Important context should have been saved to memory.';
+    const userMessages = messagesToTruncate.filter((m) => m.role === 'user');
+    const modelMessages = messagesToTruncate.filter((m) => m.role === 'model');

-    let lastUserText = '';
-    for (let i = messagesToTruncate.length - 1; i >= 0; i--) {
-      const msg = messagesToTruncate[i];
-      if (msg.role === 'user') {
-        lastUserText =
-          msg.parts
-            ?.map((p) => p.text || '')
-            .join('')
-            .trim() || '';
-        if (lastUserText) {
-          break;
-        }
-      }
-    }
+    const lastUserText = userMessages
+      .slice(-1)[0]
+      ?.parts?.map((p) => p.text || '')
+      .join('')
+      .trim();
+
+    const actionPath = modelMessages
+      .flatMap(
+        (m) =>
+          m.parts
+            ?.filter((p) => p.functionCall)
+            .map((p) => p.functionCall!.name) || [],
+      )
+      .join(' → ');
+
+    const summaryParts = [
+      '### [System Note: Conversation History Truncated]',
+      'Prior context was offloaded to maintain performance. Key highlights from the truncated history:',
+    ];

    if (lastUserText) {
-      return `[System Note: Prior conversation history was truncated. The most recent user message before truncation was:]\n\n${lastUserText}`;
+      summaryParts.push(`- **Last User Intent:** "${lastUserText}"`);
    }

-    return defaultNote;
+    if (actionPath) {
+      summaryParts.push(`- **Action Path:** ${actionPath}`);
+    }
+
+    summaryParts.push(
+      '- **Notice:** For deeper context, review persistent memory or task-specific logs.',
+    );
+
+    return summaryParts.join('\n');
  }

  private async getSummaryText(
    messagesToTruncate: readonly Content[],
+    messagesToKeep: readonly Content[],
    abortSignal?: AbortSignal,
  ): Promise<string> {
-    if (!this.config.isExperimentalAgentHistorySummarizationEnabled()) {
+    if (messagesToTruncate.length === 0) return '';
+
+    if (!this.providerConfig.isSummarizationEnabled) {
      debugLogger.log(
        'AgentHistoryProvider: Summarization disabled, using fallback note.',
      );
@@ -108,12 +296,15 @@ export class AgentHistoryProvider {
    }

    try {
-      const summary = await this.generateIntentSummary(
+      // Use the first few messages of the Grace Zone as a "contextual bridge"
+      // to give the summarizer lookahead into the current state.
+      const bridge = messagesToKeep.slice(0, 5);
+
+      return await this.generateIntentSummary(
        messagesToTruncate,
+        bridge,
        abortSignal,
      );
-      debugLogger.log('AgentHistoryProvider: Summarization successful.');
-      return summary;
    } catch (error) {
      debugLogger.log('AgentHistoryProvider: Summarization failed.', error);
      return this.getFallbackSummaryText(messagesToTruncate);
@@ -124,6 +315,8 @@ export class AgentHistoryProvider {
    summaryText: string,
    messagesToKeep: readonly Content[],
  ): readonly Content[] {
+    if (!summaryText) return messagesToKeep;
+
    if (messagesToKeep.length === 0) {
      return [{ role: 'user', parts: [{ text: summaryText }] }];
    }
@@ -152,22 +345,57 @@ export class AgentHistoryProvider {

  private async generateIntentSummary(
    messagesToTruncate: readonly Content[],
+    bridge: readonly Content[],
    abortSignal?: AbortSignal,
  ): Promise<string> {
-    const prompt = `Create a succinct, agent-continuity focused intent summary of the truncated conversation history.
-Distill the essence of the ongoing work by capturing:
- The Original Mandate: What the user (or calling agent) originally requested and why.
- The Agent's Strategy: How you (the agent) are approaching the task and where the work is taking place (e.g., specific files, directories, or architectural layers).
- Evolving Context: Any significant shifts in the user's intent or the agent's technical approach over the course of the truncated history.
+    // 1. Identify and extract any existing summary from the truncated head
+    const firstMsg = messagesToTruncate[0];
+    const firstPartText = firstMsg?.parts?.[0]?.text || '';
+    const hasPreviousSummary = firstPartText.includes('<intent_summary>');

-Write this summary to orient the active agent. Do NOT predict next steps or summarize the current task state, as those are covered by the active history. Focus purely on foundational context and strategic continuity.`;
+    // 2. Extract "The Action Path" (necklace of function names)
+    const actionPath = messagesToTruncate
+      .filter((m) => m.role === 'model')
+      .flatMap(
+        (m) =>
+          m.parts
+            ?.filter((p) => p.functionCall)
+            .map((p) => p.functionCall!.name) || [],
+      )
+      .join(' → ');
+
+    const prompt = `### State Update: Agent Continuity
+
+The conversation history has been truncated. You are generating a highly factual state summary to preserve the agent's exact working context.
+
+You have these signals to synthesize:
+${hasPreviousSummary ? '1. **Previous Summary:** The existing state before this truncation.\n' : ''}2. **The Action Path:** A chronological list of tools called: [${actionPath}]
+3. **Truncated History:** The specific actions, tool inputs, and tool outputs being offloaded.
+4. **Active Bridge:** The first few turns of the "Grace Zone" (what follows immediately after this summary), showing the current tactical moment.
+
+### Your Goal:
+Distill these into a high-density Markdown block that orientates the agent on the CONCRETE STATE of the workspace:
+- **Primary Goal:** The ultimate objective requested by the user.
+- **Verified Facts:** What has been definitively completed or proven (e.g., "File X was created", "Bug Y was reproduced").
+- **Working Set:** The exact file paths currently being analyzed or modified.
+- **Active Blockers:** Exact error messages or failing test names currently preventing progress.
+
+### Constraints:
+- **Format:** Wrap the entire response in <intent_summary> tags.
+- **Factuality:** Base all points strictly on the provided history. Do not invent rationale or assume success without proof. Use exact names and quotes.
+- **Brevity:** Maximum 15 lines. No conversational preamble.
+
+${hasPreviousSummary ? 'PREVIOUS SUMMARY AND TRUNCATED HISTORY:' : 'TRUNCATED HISTORY:'}
+${JSON.stringify(messagesToTruncate)}
+
+ACTIVE BRIDGE (LOOKAHEAD):
+${JSON.stringify(bridge)}`;

    const summaryResponse = await this.config
      .getBaseLlmClient()
      .generateContent({
        modelConfigKey: { model: 'agent-history-provider-summarizer' },
        contents: [
-          ...messagesToTruncate,
          {
            role: 'user',
            parts: [{ text: prompt }],
@@ -179,7 +407,16 @@ Write this summary to orient the active agent. Do NOT predict next steps or summ
      });

    let summary = getResponseText(summaryResponse) ?? '';
-    summary = summary.replace(/<\/?intent_summary>/g, '').trim();
-    return `<intent_summary>\n${summary}\n</intent_summary>`;
+    // Clean up if the model included extra tags or markdown
+    summary = summary
+      .replace(/```markdown/g, '')
+      .replace(/```/g, '')
+      .trim();
+
+    if (!summary.includes('<intent_summary>')) {
+      summary = `<intent_summary>\n${summary}\n</intent_summary>`;
+    }
+
+    return summary;
  }
 }
@@ -0,0 +1,101 @@
+/**
+ * @license
+ * Copyright 2026 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import { describe, it, expect, vi, beforeEach } from 'vitest';
+import { ToolOutputDistillationService } from './toolDistillationService.js';
+import type { Config, Part } from '../index.js';
+import type { GeminiClient } from '../core/client.js';
+
+describe('ToolOutputDistillationService', () => {
+  let mockConfig: Config;
+  let mockGeminiClient: GeminiClient;
+  let service: ToolOutputDistillationService;
+
+  beforeEach(() => {
+    mockConfig = {
+      getToolMaxOutputTokens: vi.fn().mockReturnValue(100),
+      getToolSummarizationThresholdTokens: vi.fn().mockReturnValue(100),
+      getUsageStatisticsEnabled: vi.fn().mockReturnValue(false),
+      storage: {
+        getProjectTempDir: vi.fn().mockReturnValue('/tmp/gemini'),
+      },
+      telemetry: {
+        logEvent: vi.fn(),
+      },
+    } as unknown as Config;
+    mockGeminiClient = {
+      generateContent: vi.fn().mockResolvedValue({
+        candidates: [{ content: { parts: [{ text: 'Mock Intent Summary' }] } }],
+      }),
+    } as unknown as GeminiClient;
+    service = new ToolOutputDistillationService(
+      mockConfig,
+      mockGeminiClient,
+      'test-prompt-id',
+    );
+  });
+
+  it('should generate a structural map for oversized content within limits', async () => {
+    // > threshold * SUMMARIZATION_THRESHOLD (100 * 4 = 400)
+    const largeContent = 'A'.repeat(500);
+    const result = await service.distill('test-tool', 'call-1', largeContent);
+
+    expect(mockGeminiClient.generateContent).toHaveBeenCalled();
+    const text =
+      typeof result.truncatedContent === 'string'
+        ? result.truncatedContent
+        : (result.truncatedContent as Array<{ text: string }>)[0].text;
+    expect(text).toContain('Strategic Significance');
+  });
+
+  it('should structurally truncate functionResponse while preserving schema', async () => {
+    // threshold is 100
+    const hugeValue = 'H'.repeat(1000);
+    const content = [
+      {
+        functionResponse: {
+          name: 'test_tool',
+          id: '123',
+          response: {
+            stdout: hugeValue,
+            stderr: 'no error',
+          },
+        },
+      },
+    ] as unknown as Part[];
+
+    const result = await service.distill('test-tool', 'call-1', content);
+    const truncatedParts = result.truncatedContent as Part[];
+    expect(truncatedParts.length).toBe(1);
+    const fr = truncatedParts[0].functionResponse!;
+    const resp = fr.response as Record<string, unknown>;
+    expect(fr.name).toBe('test_tool');
+    expect(resp['stderr']).toBe('no error');
+    expect(resp['stdout'] as string).toContain('[Message Normalized');
+    expect(resp['stdout'] as string).toContain('Full output saved to');
+  });
+
+  it('should skip structural map for extremely large content exceeding MAX_DISTILLATION_SIZE', async () => {
+    const massiveContent = 'A'.repeat(1_000_001); // > MAX_DISTILLATION_SIZE
+    const result = await service.distill('test-tool', 'call-2', massiveContent);
+
+    expect(mockGeminiClient.generateContent).not.toHaveBeenCalled();
+    const text =
+      typeof result.truncatedContent === 'string'
+        ? result.truncatedContent
+        : (result.truncatedContent as Array<{ text: string }>)[0].text;
+    expect(text).not.toContain('Strategic Significance');
+  });
+
+  it('should skip structural map for content below summarization threshold', async () => {
+    // > threshold but < threshold * SUMMARIZATION_THRESHOLD
+    const mediumContent = 'A'.repeat(110);
+    const result = await service.distill('test-tool', 'call-3', mediumContent);
+
+    expect(mockGeminiClient.generateContent).not.toHaveBeenCalled();
+    expect(result.truncatedContent).not.toContain('Mock Intent Summary');
+  });
+});
@@ -0,0 +1,293 @@
+/**
+ * @license
+ * Copyright 2026 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import {
+  LlmRole,
+  ToolOutputTruncatedEvent,
+  logToolOutputTruncated,
+  debugLogger,
+  type Config,
+} from '../index.js';
+import type { PartListUnion } from '@google/genai';
+import { type GeminiClient } from '../core/client.js';
+import { saveTruncatedToolOutput } from '../utils/fileUtils.js';
+import {
+  READ_FILE_TOOL_NAME,
+  READ_MANY_FILES_TOOL_NAME,
+} from '../tools/tool-names.js';
+
+import {
+  truncateProportionally,
+  TOOL_TRUNCATION_PREFIX,
+  MIN_TARGET_TOKENS,
+  estimateCharsFromTokens,
+  normalizeFunctionResponse,
+} from '../utils/truncation.js';
+
+// Skip structural map generation for outputs larger than this threshold (in characters)
+// as it consumes excessive tokens and may not be representative of the full content.
+const MAX_DISTILLATION_SIZE = 1_000_000;
+
+export interface DistilledToolOutput {
+  truncatedContent: PartListUnion;
+  outputFile?: string;
+}
+
+export class ToolOutputDistillationService {
+  constructor(
+    private readonly config: Config,
+    private readonly geminiClient: GeminiClient,
+    private readonly promptId: string,
+  ) {}
+
+  /**
+   * Distills a tool's output if it exceeds configured length thresholds, preserving
+   * the agent's context window. This includes saving the raw output to disk, replacing
+   * the output with a truncated placeholder, and optionally summarizing the output
+   * via a secondary LLM call if the output is massively oversized.
+   */
+  async distill(
+    toolName: string,
+    callId: string,
+    content: PartListUnion,
+  ): Promise<DistilledToolOutput> {
+    // Explicitly bypass escape hatches that natively handle large outputs
+    if (this.isExemptFromDistillation(toolName)) {
+      return { truncatedContent: content };
+    }
+
+    const maxTokens = this.config.getToolMaxOutputTokens();
+    const thresholdChars = maxTokens * 4;
+    if (thresholdChars <= 0) {
+      return { truncatedContent: content };
+    }
+
+    const originalContentLength = this.calculateContentLength(content);
+
+    if (originalContentLength > thresholdChars) {
+      return this.performDistillation(
+        toolName,
+        callId,
+        content,
+        originalContentLength,
+        thresholdChars,
+      );
+    }
+
+    return { truncatedContent: content };
+  }
+
+  private isExemptFromDistillation(toolName: string): boolean {
+    return (
+      toolName === READ_FILE_TOOL_NAME || toolName === READ_MANY_FILES_TOOL_NAME
+    );
+  }
+
+  private calculateContentLength(content: PartListUnion): number {
+    if (typeof content === 'string') {
+      return content.length;
+    }
+
+    if (Array.isArray(content)) {
+      return content.reduce((acc, part) => {
+        if (typeof part === 'string') return acc + part.length;
+        if (part.text) return acc + part.text.length;
+        if (part.functionResponse?.response) {
+          // Estimate length of the response object
+          return acc + JSON.stringify(part.functionResponse.response).length;
+        }
+        return acc;
+      }, 0);
+    }
+
+    return 0;
+  }
+
+  private stringifyContent(content: PartListUnion): string {
+    if (typeof content === 'string') return content;
+    // For arrays or other objects, we preserve the structural JSON to maintain
+    // the ability to reconstruct the parts if needed from the saved output.
+    return JSON.stringify(content, null, 2);
+  }
+
+  private async performDistillation(
+    toolName: string,
+    callId: string,
+    content: PartListUnion,
+    originalContentLength: number,
+    threshold: number,
+  ): Promise<DistilledToolOutput> {
+    const stringifiedContent = this.stringifyContent(content);
+
+    // Save the raw, untruncated string to disk for human review
+    const { outputFile: savedPath } = await saveTruncatedToolOutput(
+      stringifiedContent,
+      toolName,
+      callId,
+      this.config.storage.getProjectTempDir(),
+      this.promptId,
+    );
+
+    // If the output is massively oversized, attempt to generate an intent summary
+    let intentSummaryText = '';
+    const summarizationThresholdTokens =
+      this.config.getToolSummarizationThresholdTokens();
+    const summarizationThresholdChars = summarizationThresholdTokens * 4;
+
+    if (
+      originalContentLength > summarizationThresholdChars &&
+      originalContentLength <= MAX_DISTILLATION_SIZE
+    ) {
+      const summary = await this.generateIntentSummary(
+        toolName,
+        stringifiedContent,
+        Math.floor(MAX_DISTILLATION_SIZE),
+      );
+
+      if (summary) {
+        intentSummaryText = `\n\n--- Strategic Significance of Truncated Content ---\n${summary}`;
+      }
+    }
+
+    // Perform structural truncation
+    const ratio = threshold / originalContentLength;
+    const truncatedContent = this.truncateContentStructurally(
+      content,
+      ratio,
+      savedPath || 'Output offloaded to disk',
+      intentSummaryText,
+    );
+
+    logToolOutputTruncated(
+      this.config,
+      new ToolOutputTruncatedEvent(this.promptId, {
+        toolName,
+        originalContentLength,
+        truncatedContentLength: this.calculateContentLength(truncatedContent),
+        threshold,
+      }),
+    );
+
+    return {
+      truncatedContent,
+      outputFile: savedPath,
+    };
+  }
+
+  /**
+   * Truncates content while maintaining its Part structure.
+   */
+  private truncateContentStructurally(
+    content: PartListUnion,
+    ratio: number,
+    savedPath: string,
+    intentSummary: string,
+  ): PartListUnion {
+    if (typeof content === 'string') {
+      const targetTokens = Math.max(
+        MIN_TARGET_TOKENS,
+        Math.floor((content.length / 4) * ratio),
+      );
+      const targetChars = estimateCharsFromTokens(content, targetTokens);
+
+      return (
+        truncateProportionally(content, targetChars, TOOL_TRUNCATION_PREFIX) +
+        `\n\nFull output saved to: ${savedPath}` +
+        intentSummary
+      );
+    }
+
+    if (!Array.isArray(content)) return content;
+
+    return content.map((part) => {
+      if (typeof part === 'string') {
+        const text = part;
+        const targetTokens = Math.max(
+          MIN_TARGET_TOKENS,
+          Math.floor((text.length / 4) * ratio),
+        );
+        const targetChars = estimateCharsFromTokens(text, targetTokens);
+        return truncateProportionally(
+          text,
+          targetChars,
+          TOOL_TRUNCATION_PREFIX,
+        );
+      }
+
+      if (part.text) {
+        const text = part.text;
+        const targetTokens = Math.max(
+          MIN_TARGET_TOKENS,
+          Math.floor((text.length / 4) * ratio),
+        );
+        const targetChars = estimateCharsFromTokens(text, targetTokens);
+        return {
+          text:
+            truncateProportionally(text, targetChars, TOOL_TRUNCATION_PREFIX) +
+            `\n\nFull output saved to: ${savedPath}` +
+            intentSummary,
+        };
+      }
+
+      if (part.functionResponse) {
+        return normalizeFunctionResponse(
+          part,
+          ratio,
+          0.2, // default headRatio
+          savedPath,
+          intentSummary,
+        );
+      }
+
+      return part;
+    });
+  }
+
+  /**
+   * Calls the secondary model to distill the strategic "why" signals and intent
+   * of the truncated content before it is offloaded.
+   */
+  private async generateIntentSummary(
+    toolName: string,
+    stringifiedContent: string,
+    maxPreviewLen: number,
+  ): Promise<string | undefined> {
+    try {
+      const controller = new AbortController();
+      const timeoutId = setTimeout(() => controller.abort(), 15000); // 15s timeout
+
+      const promptText = `The following output from the tool '${toolName}' is large and has been truncated. Extract the most critical factual information from this output so the main agent doesn't lose context.
+
+Focus strictly on concrete data points:
+1. Exact error messages, exception types, or exit codes.
+2. Specific file paths or line numbers mentioned.
+3. Definitive outcomes (e.g., 'Compilation succeeded', '3 tests failed').
+
+Do not philosophize about the strategic intent. Keep the extraction under 10 lines and use exact quotes where helpful.
+
+Output to summarize:
+${stringifiedContent.slice(0, maxPreviewLen)}...`;
+
+      const summaryResponse = await this.geminiClient.generateContent(
+        { model: 'agent-history-provider-summarizer' },
+        [{ role: 'user', parts: [{ text: promptText }] }],
+        controller.signal,
+        LlmRole.UTILITY_COMPRESSOR,
+      );
+
+      clearTimeout(timeoutId);
+
+      return summaryResponse.candidates?.[0]?.content?.parts?.[0]?.text;
+    } catch (e) {
+      // Fail gracefully, summarization is a progressive enhancement
+      debugLogger.debug(
+        'Failed to generate intent summary for truncated output:',
+        e instanceof Error ? e.message : String(e),
+      );
+      return undefined;
+    }
+  }
+}
@@ -0,0 +1,15 @@
+/**
+ * @license
+ * Copyright 2026 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+export interface AgentHistoryProviderConfig {
+  maxTokens: number;
+  retainedTokens: number;
+  normalMessageTokens: number;
+  maximumMessageTokens: number;
+  normalizationHeadRatio: number;
+  isSummarizationEnabled: boolean;
+  isTruncationEnabled: boolean;
+}
@@ -293,6 +293,7 @@ describe('WebFetchTool', () => {
        })),
      },
      isInteractive: () => false,
+      isAutoDistillationEnabled: vi.fn().mockReturnValue(false),
    } as unknown as Config;
  });

@@ -1118,5 +1119,40 @@ describe('WebFetchTool', () => {
      );
      expect(result.error?.type).toBe(ToolErrorType.WEB_FETCH_PROCESSING_ERROR);
    });
+
+    it('should bypass truncation if isAutoDistillationEnabled is true', async () => {
+      vi.spyOn(mockConfig, 'isAutoDistillationEnabled').mockReturnValue(true);
+      const largeContent = 'a'.repeat(300000); // Larger than MAX_CONTENT_LENGTH (250000)
+      mockFetch('https://example.com/large-text', {
+        status: 200,
+        headers: new Headers({ 'content-type': 'text/plain' }),
+        text: () => Promise.resolve(largeContent),
+      });
+
+      const tool = new WebFetchTool(mockConfig, bus);
+      const invocation = tool.build({ url: 'https://example.com/large-text' });
+      const result = await invocation.execute(new AbortController().signal);
+
+      expect((result.llmContent as string).length).toBe(300000); // No truncation
+    });
+
+    it('should truncate if isAutoDistillationEnabled is false', async () => {
+      vi.spyOn(mockConfig, 'isAutoDistillationEnabled').mockReturnValue(false);
+      const largeContent = 'a'.repeat(300000); // Larger than MAX_CONTENT_LENGTH (250000)
+      mockFetch('https://example.com/large-text2', {
+        status: 200,
+        headers: new Headers({ 'content-type': 'text/plain' }),
+        text: () => Promise.resolve(largeContent),
+      });
+
+      const tool = new WebFetchTool(mockConfig, bus);
+      const invocation = tool.build({ url: 'https://example.com/large-text2' });
+      const result = await invocation.execute(new AbortController().signal);
+
+      expect((result.llmContent as string).length).toBeLessThan(300000);
+      expect(result.llmContent).toContain(
+        '[Content truncated due to size limit]',
+      );
+    });
  });
 });
@@ -338,9 +338,15 @@ class WebFetchToolInvocation extends BaseToolInvocation<
      textContent = rawContent;
    }

-    // Cap at MAX_CONTENT_LENGTH initially to avoid excessive memory usage
-    // before the global budget allocation.
-    return truncateString(textContent, MAX_CONTENT_LENGTH, '');
+    if (!this.context.config.isAutoDistillationEnabled()) {
+      return truncateString(
+        textContent,
+        MAX_CONTENT_LENGTH,
+        TRUNCATION_WARNING,
+      );
+    }
+
+    return textContent;
  }

  private filterAndValidateUrls(urls: string[]): {
@@ -406,28 +412,32 @@ class WebFetchToolInvocation extends BaseToolInvocation<
      };
    }

-    // Smart Budget Allocation (Water-filling algorithm) for successes
-    const sortedSuccesses = [...successes].sort(
-      (a, b) => a.content.length - b.content.length,
-    );
-
-    let remainingBudget = MAX_CONTENT_LENGTH;
-    let remainingUrls = sortedSuccesses.length;
    const finalContentsByUrl = new Map<string, string>();
-
-    for (const success of sortedSuccesses) {
-      const fairShare = Math.floor(remainingBudget / remainingUrls);
-      const allocated = Math.min(success.content.length, fairShare);
-
-      const truncated = truncateString(
-        success.content,
-        allocated,
-        TRUNCATION_WARNING,
+    if (this.context.config.isAutoDistillationEnabled()) {
+      successes.forEach((success) =>
+        finalContentsByUrl.set(success.url, success.content),
      );
+    } else {
+      // Smart Budget Allocation (Water-filling algorithm) for successes
+      const sortedSuccesses = [...successes].sort(
+        (a, b) => a.content.length - b.content.length,
+      );
+      let remainingBudget = MAX_CONTENT_LENGTH;
+      let remainingUrls = sortedSuccesses.length;
+      for (const success of sortedSuccesses) {
+        const fairShare = Math.floor(remainingBudget / remainingUrls);
+        const allocated = Math.min(success.content.length, fairShare);

-      finalContentsByUrl.set(success.url, truncated);
-      remainingBudget -= truncated.length;
-      remainingUrls--;
+        const truncated = truncateString(
+          success.content,
+          allocated,
+          TRUNCATION_WARNING,
+        );
+
+        finalContentsByUrl.set(success.url, truncated);
+        remainingBudget -= truncated.length;
+        remainingUrls--;
+      }
    }

    const aggregatedContent = uniqueUrls
@@ -648,14 +658,21 @@ ${aggregatedContent}
      );

      if (status >= 400) {
-        const rawResponseText = bodyBuffer.toString('utf8');
+        let rawResponseText = bodyBuffer.toString('utf8');
+        if (!this.context.config.isAutoDistillationEnabled()) {
+          rawResponseText = truncateString(
+            rawResponseText,
+            10000,
+            '\n\n... [Error response truncated] ...',
+          );
+        }
        const headers: Record<string, string> = {};
        response.headers.forEach((value, key) => {
          headers[key] = value;
        });
        const errorContent = `Request failed with status ${status}
 Headers: ${JSON.stringify(headers, null, 2)}
-Response: ${truncateString(rawResponseText, 10000, '\n\n... [Error response truncated] ...')}`;
+Response: ${rawResponseText}`;
        debugLogger.error(
          `[WebFetchTool] Experimental fetch failed with status ${status} for ${url}`,
        );
@@ -671,11 +688,10 @@ Response: ${truncateString(rawResponseText, 10000, '\n\n... [Error response trun
        lowContentType.includes('text/plain') ||
        lowContentType.includes('application/json')
      ) {
-        const text = truncateString(
-          bodyBuffer.toString('utf8'),
-          MAX_CONTENT_LENGTH,
-          TRUNCATION_WARNING,
-        );
+        let text = bodyBuffer.toString('utf8');
+        if (!this.context.config.isAutoDistillationEnabled()) {
+          text = truncateString(text, MAX_CONTENT_LENGTH, TRUNCATION_WARNING);
+        }
        return {
          llmContent: text,
          returnDisplay: `Fetched ${contentType} content from ${url}`,
@@ -684,16 +700,19 @@ Response: ${truncateString(rawResponseText, 10000, '\n\n... [Error response trun

      if (lowContentType.includes('text/html')) {
        const html = bodyBuffer.toString('utf8');
-        const textContent = truncateString(
-          convert(html, {
-            wordwrap: false,
-            selectors: [
-              { selector: 'a', options: { ignoreHref: false, baseUrl: url } },
-            ],
-          }),
-          MAX_CONTENT_LENGTH,
-          TRUNCATION_WARNING,
-        );
+        let textContent = convert(html, {
+          wordwrap: false,
+          selectors: [
+            { selector: 'a', options: { ignoreHref: false, baseUrl: url } },
+          ],
+        });
+        if (!this.context.config.isAutoDistillationEnabled()) {
+          textContent = truncateString(
+            textContent,
+            MAX_CONTENT_LENGTH,
+            TRUNCATION_WARNING,
+          );
+        }
        return {
          llmContent: textContent,
          returnDisplay: `Fetched and converted HTML content from ${url}`,
@@ -718,11 +737,10 @@ Response: ${truncateString(rawResponseText, 10000, '\n\n... [Error response trun
      }

      // Fallback for unknown types - try as text
-      const text = truncateString(
-        bodyBuffer.toString('utf8'),
-        MAX_CONTENT_LENGTH,
-        TRUNCATION_WARNING,
-      );
+      let text = bodyBuffer.toString('utf8');
+      if (!this.context.config.isAutoDistillationEnabled()) {
+        text = truncateString(text, MAX_CONTENT_LENGTH, TRUNCATION_WARNING);
+      }
      return {
        llmContent: text,
        returnDisplay: `Fetched ${contentType || 'unknown'} content from ${url}`,
@@ -10,10 +10,10 @@ import { debugLogger } from './debugLogger.js';

 // Token estimation constants
 // ASCII characters (0-127) are roughly 4 chars per token
-const ASCII_TOKENS_PER_CHAR = 0.25;
+export const ASCII_TOKENS_PER_CHAR = 0.25;
 // Non-ASCII characters (including CJK) are often 1-2 tokens per char.
 // We use 1.3 as a conservative estimate to avoid underestimation.
-const NON_ASCII_TOKENS_PER_CHAR = 1.3;
+export const NON_ASCII_TOKENS_PER_CHAR = 1.3;
 // Fixed token estimate for images
 const IMAGE_TOKEN_ESTIMATE = 3000;
 // Fixed token estimate for PDFs (~100 pages at 258 tokens/page)
@@ -0,0 +1,142 @@
+/**
+ * @license
+ * Copyright 2026 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import type { Part } from '@google/genai';
+import {
+  estimateTokenCountSync,
+  ASCII_TOKENS_PER_CHAR,
+  NON_ASCII_TOKENS_PER_CHAR,
+} from './tokenCalculation.js';
+
+export const MIN_TARGET_TOKENS = 10;
+export const MIN_CHARS_FOR_TRUNCATION = 100;
+export const TEXT_TRUNCATION_PREFIX =
+  '[Message Normalized: Exceeded size limit]';
+export const TOOL_TRUNCATION_PREFIX =
+  '[Message Normalized: Tool output exceeded size limit]';
+
+/**
+ * Estimates the character limit for a target token count, accounting for ASCII vs Non-ASCII.
+ * Uses a weighted average based on the provided text to decide how many characters
+ * fit into the target token budget.
+ */
+export function estimateCharsFromTokens(
+  text: string,
+  targetTokens: number,
+): number {
+  if (text.length === 0) return 0;
+
+  // Count ASCII vs Non-ASCII in a sample of the text.
+  let asciiCount = 0;
+  const sampleLen = Math.min(text.length, 1000);
+  for (let i = 0; i < sampleLen; i++) {
+    if (text.charCodeAt(i) <= 127) {
+      asciiCount++;
+    }
+  }
+
+  const asciiRatio = asciiCount / sampleLen;
+  // Weighted tokens per character:
+  const avgTokensPerChar =
+    asciiRatio * ASCII_TOKENS_PER_CHAR +
+    (1 - asciiRatio) * NON_ASCII_TOKENS_PER_CHAR;
+
+  // Characters = Tokens / (Tokens per Character)
+  return Math.floor(targetTokens / avgTokensPerChar);
+}
+
+/**
+ * Truncates a string to a target length, keeping a proportional amount of the head and tail,
+ * and prepending a prefix.
+ */
+export function truncateProportionally(
+  str: string,
+  targetChars: number,
+  prefix: string,
+  headRatio: number = 0.2,
+): string {
+  if (str.length <= targetChars) return str;
+
+  const ellipsis = '\n...\n';
+  const overhead = prefix.length + ellipsis.length + 1; // +1 for the newline after prefix
+  const availableChars = Math.max(0, targetChars - overhead);
+
+  if (availableChars <= 0) {
+    return prefix; // Safe fallback if target is extremely small
+  }
+
+  const headChars = Math.floor(availableChars * headRatio);
+  const tailChars = availableChars - headChars;
+
+  return `${prefix}\n${str.substring(0, headChars)}${ellipsis}${str.substring(str.length - tailChars)}`;
+}
+
+/**
+ * Safely normalizes a function response by truncating large string values
+ * within the response object while maintaining its JSON structure.
+ */
+export function normalizeFunctionResponse(
+  part: Part,
+  ratio: number,
+  headRatio: number = 0.2,
+  savedPath?: string,
+  intentSummary?: string,
+): Part {
+  const fr = part.functionResponse;
+  if (!fr || !fr.response) return part;
+
+  const responseObj = fr.response;
+  if (typeof responseObj !== 'object' || responseObj === null) return part;
+
+  let hasChanges = false;
+  const newResponse: Record<string, unknown> = {};
+
+  // For function responses, we truncate individual string values that are large.
+  // This preserves the schema keys (stdout, stderr, etc).
+  for (const [key, value] of Object.entries(responseObj)) {
+    if (typeof value === 'string' && value.length > MIN_CHARS_FOR_TRUNCATION) {
+      const valueTokens = estimateTokenCountSync([{ text: value }]);
+      const targetValueTokens = Math.max(
+        MIN_TARGET_TOKENS,
+        Math.floor(valueTokens * ratio),
+      );
+      const targetChars = estimateCharsFromTokens(value, targetValueTokens);
+
+      if (value.length > targetChars) {
+        let truncated = truncateProportionally(
+          value,
+          targetChars,
+          TOOL_TRUNCATION_PREFIX,
+          headRatio,
+        );
+        if (savedPath) {
+          truncated += `\n\nFull output saved to: ${savedPath}`;
+        }
+        if (intentSummary) {
+          truncated += intentSummary;
+        }
+        newResponse[key] = truncated;
+        hasChanges = true;
+      } else {
+        newResponse[key] = value;
+      }
+    } else {
+      newResponse[key] = value;
+    }
+  }
+
+  if (!hasChanges) return part;
+
+  return {
+    functionResponse: {
+      // This spread should be safe as we mostly care about the function
+      // response properties.
+      // eslint-disable-next-line @typescript-eslint/no-misused-spread
+      ...fr,
+      response: newResponse,
+    },
+  };
+}