feat(core): Support auto-distillation for tool output.

2026-03-13 15:40:57 -07:00 · 2026-03-11 01:18:41 +00:00
parent e22d9917b7
commit ddb7b65897
13 changed files with 496 additions and 128 deletions
--- a/packages/cli/src/integration-tests/snapshots/autoDistillation.test.tsx.snap
+++ b/packages/cli/src/integration-tests/snapshots/autoDistillation.test.tsx.snap
@@ -0,0 +1,72 @@
+// Vitest Snapshot v1, https://vitest.dev/guide/snapshot.html
+
+exports[`Auto-distillation Integration > should truncate and summarize massive tool outputs, and we should golden the chat history 1`] = `
+[
+  {
+    "parts": [
+      {
+        "text": "<SESSION_CONTEXT>",
+      },
+    ],
+    "role": "user",
+  },
+  {
+    "parts": [
+      {
+        "text": "Fetch the massive file.",
+      },
+    ],
+    "role": "user",
+  },
+  {
+    "parts": [
+      {
+        "text": "I will now fetch the data.",
+      },
+      {
+        "functionCall": {
+          "args": {
+            "command": "cat large.txt",
+          },
+          "id": "<CALL_ID>",
+          "name": "run_shell_command",
+        },
+      },
+    ],
+    "role": "model",
+  },
+  {
+    "parts": [
+      {
+        "functionResponse": {
+          "id": "<CALL_ID>",
+          "name": "run_shell_command",
+          "response": {
+            "output": "Output too large. Showing first 10 and last 40 characters. For full output see: <TEST_DIR>/.gemini/tmp/<APP_RIG_ID>/tool-outputs/session-<SESSION_ID>/run_shell_command_<TIMESTAMP>_<INDEX>.txt
+Output: ca
+
+... [40 characters omitted] ...
+
+Exit Code: 1
+Process Group PGID: <PGID>
+
+--- Structural Map of Truncated Content ---
+- Line 1: Header
+- Lines 2-5000: User data
+- Line 5001: Footer",
+          },
+        },
+      },
+    ],
+    "role": "user",
+  },
+  {
+    "parts": [
+      {
+        "text": "I got the summarized output. Task complete.",
+      },
+    ],
+    "role": "model",
+  },
+]
+`;
--- a/packages/cli/src/integration-tests/autoDistillation.test.tsx
+++ b/packages/cli/src/integration-tests/autoDistillation.test.tsx
@@ -0,0 +1,64 @@
+/**
+ * @license
+ * Copyright 2026 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import { describe, it, expect, afterEach, vi } from 'vitest';
+import { AppRig } from '../test-utils/AppRig.js';
+import path from 'node:path';
+import { fileURLToPath } from 'node:url';
+import { PolicyDecision } from '@google/gemini-cli-core';
+
+const __dirname = path.dirname(fileURLToPath(import.meta.url));
+
+describe('Auto-distillation Integration', () => {
+  let rig: AppRig | undefined;
+
+  afterEach(async () => {
+    if (rig) {
+      await rig.unmount();
+    }
+    vi.restoreAllMocks();
+  });
+
+  it('should truncate and summarize massive tool outputs, and we should golden the chat history', async () => {
+    const fakeResponsesPath = path.join(
+      __dirname,
+      '../test-utils/fixtures/auto-distillation.responses',
+    );
+    rig = new AppRig({
+      fakeResponsesPath,
+    });
+
+    await rig.initialize();
+
+    const config = rig.getConfig();
+    // 50 chars threshold. > 75 chars triggers summarization
+    vi.spyOn(config, 'getTruncateToolOutputThreshold').mockReturnValue(50);
+
+    rig.setToolPolicy('run_shell_command', PolicyDecision.ASK_USER);
+
+    rig.setMockCommands([
+      {
+        command: /cat large.txt/,
+        result: {
+          output: 'A'.repeat(100),
+          exitCode: 0,
+        },
+      },
+    ]);
+
+    rig.render();
+    await rig.waitForIdle();
+
+    await rig.sendMessage('Fetch the massive file.');
+
+    await rig.waitForOutput('Shell');
+    await rig.resolveTool('Shell');
+
+    await rig.waitForOutput('Task complete.');
+
+    expect(rig.getCuratedHistory()).toMatchSnapshot();
+  });
+});
--- a/packages/cli/src/test-utils/AppRig.tsx
+++ b/packages/cli/src/test-utils/AppRig.tsx
@@ -30,6 +30,7 @@ import {
  IdeClient,
  debugLogger,
  CoreToolCallStatus,
+  ConsecaSafetyChecker,
 } from '@google/gemini-cli-core';
 import {
  type MockShellCommand,
@@ -47,6 +48,7 @@ import type {
  TrackedCompletedToolCall,
  TrackedToolCall,
 } from '../ui/hooks/useToolScheduler.js';
+import type { Content, GenerateContentParameters } from '@google/genai';

 // Global state observer for React-based signals
 const sessionStateMap = new Map<string, StreamingState>();
@@ -153,6 +155,7 @@ export class AppRig {
  private settings: LoadedSettings | undefined;
  private testDir: string;
  private sessionId: string;
+  private appRigId: string;

  private pendingConfirmations = new Map<string, PendingConfirmation>();
  private breakpointTools = new Set<string | undefined>();
@@ -168,6 +171,7 @@ export class AppRig {
    this.testDir = fs.mkdtempSync(
      path.join(os.tmpdir(), `gemini-app-rig-${uniqueId.slice(0, 8)}-`),
    );
+    this.appRigId = path.basename(this.testDir).toLowerCase();
    this.sessionId = `test-session-${uniqueId}`;
    activeRigs.set(this.sessionId, this);
  }
@@ -738,6 +742,10 @@ export class AppRig {
    // Forcefully clear IdeClient singleton promise
    // eslint-disable-next-line @typescript-eslint/no-explicit-any, @typescript-eslint/no-unsafe-type-assertion
    (IdeClient as any).instancePromise = null;
+
+    // Reset Conseca singleton to avoid leaking config/state across tests
+    ConsecaSafetyChecker.resetInstance();
+
    vi.clearAllMocks();

    this.config = undefined;
@@ -754,4 +762,82 @@ export class AppRig {
      }
    }
  }
+
+  getSentRequests() {
+    if (!this.config) throw new Error('AppRig not initialized');
+    return this.config.getContentGenerator().getSentRequests?.() || [];
+  }
+
+  /**
+   * Helper to get the curated history (contents) sent in the most recent model request.
+   * This method scrubs unstable data like temp paths and IDs for deterministic goldens.
+   */
+  getLastSentRequestContents() {
+    const requests = this.getSentRequests();
+    if (requests.length === 0) return [];
+    const contents = requests[requests.length - 1].contents || [];
+    return this.scrubUnstableData(contents);
+  }
+
+  /**
+   * Gets the final curated history of the active chat session.
+   */
+  getCuratedHistory() {
+    if (!this.config) throw new Error('AppRig not initialized');
+    const history = this.config.getGeminiClient().getChat().getHistory(true);
+    return this.scrubUnstableData(history);
+  }
+
+  private scrubUnstableData<
+    T extends
+      | Content[]
+      | GenerateContentParameters['contents']
+      | readonly Content[],
+  >(contents: T): T {
+    // Deeply scrub unstable data
+    const scrubbedString = JSON.stringify(contents)
+      .replace(new RegExp(this.testDir, 'g'), '<TEST_DIR>')
+      .replace(new RegExp(this.appRigId, 'g'), '<APP_RIG_ID>')
+      .replace(new RegExp(this.sessionId, 'g'), '<SESSION_ID>')
+      .replace(
+        /([a-zA-Z0-9_]+)_([0-9]{13})_([0-9]+)\.txt/g,
+        '$1_<TIMESTAMP>_<INDEX>.txt',
+      )
+      .replace(/Process Group PGID: \d+/g, 'Process Group PGID: <PGID>');
+
+    // eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion
+    const scrubbed = JSON.parse(scrubbedString) as T;
+
+    if (Array.isArray(scrubbed) && scrubbed.length > 0) {
+      // eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion
+      const firstItem = scrubbed[0] as Content;
+      if (firstItem.parts?.[0]?.text?.includes('<session_context>')) {
+        firstItem.parts[0].text = '<SESSION_CONTEXT>';
+      }
+
+      // eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion
+      for (const content of scrubbed as Content[]) {
+        if (content.parts) {
+          for (const part of content.parts) {
+            if (part.functionCall) {
+              part.functionCall.id = '<CALL_ID>';
+            }
+            if (part.functionResponse) {
+              part.functionResponse.id = '<CALL_ID>';
+              if (
+                part.functionResponse.response !== null &&
+                typeof part.functionResponse.response === 'object' &&
+                'original_output_file' in part.functionResponse.response
+              ) {
+                part.functionResponse.response['original_output_file'] =
+                  '<TMP_FILE>';
+              }
+            }
+          }
+        }
+      }
+    }
+
+    return scrubbed;
+  }
 }
--- a/packages/cli/src/test-utils/fixtures/auto-distillation.responses
+++ b/packages/cli/src/test-utils/fixtures/auto-distillation.responses
@@ -0,0 +1,3 @@
+{"method":"generateContentStream","response":[{"candidates":[{"content":{"role":"model","parts":[{"text":"I will now fetch the data."},{"functionCall":{"name":"run_shell_command","args":{"command":"cat large.txt"}}}]},"finishReason":"STOP"}]}]}
+{"method":"generateContent","response":{"candidates":[{"content":{"role":"model","parts":[{"text":"- Line 1: Header\n- Lines 2-5000: User data\n- Line 5001: Footer"}]},"finishReason":"STOP"}]}}
+{"method":"generateContentStream","response":[{"candidates":[{"content":{"role":"model","parts":[{"text":"I got the summarized output. Task complete."}]},"finishReason":"STOP"}]}]}