feat(core): distill tool

2026-04-10 21:30:40 -07:00 · 2026-03-07 19:33:00 +00:00
parent ca184a386e
commit 6d1c6a9b06
39 changed files with 1853 additions and 1120 deletions
--- a/packages/cli/src/integration-tests/snapshots/continuousSession.test.tsx.snap
+++ b/packages/cli/src/integration-tests/snapshots/continuousSession.test.tsx.snap
@@ -0,0 +1,107 @@
+// Vitest Snapshot v1, https://vitest.dev/guide/snapshot.html
+
+exports[`Continuous Session Integration > should handle checkpoint_state and manual compress tools correctly > 1-before-checkpoint 1`] = `
+[
+  {
+    "parts": [
+      {
+        "text": "<SESSION_CONTEXT>",
+      },
+      {
+        "text": "Start the mission PADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPAD",
+      },
+    ],
+    "role": "user",
+  },
+]
+`;
+
+exports[`Continuous Session Integration > should handle checkpoint_state and manual compress tools correctly > 2-with-checkpoint 1`] = `
+[
+  {
+    "parts": [
+      {
+        "text": "<SESSION_CONTEXT>",
+      },
+      {
+        "text": "<state_checkpoint>
+GOAL: Implementation of session continuity.
+PROGRESS: Tools implemented.
+CONSTRAINT: Use high-fidelity summary.
+</state_checkpoint>",
+      },
+      {
+        "text": "Start the mission PADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPAD",
+      },
+    ],
+    "role": "user",
+  },
+  {
+    "parts": [
+      {
+        "text": "I will now checkpoint our progress.",
+      },
+      {
+        "functionCall": {
+          "args": {
+            "summary": "GOAL: Implementation of session continuity.
+PROGRESS: Tools implemented.
+CONSTRAINT: Use high-fidelity summary.",
+          },
+          "id": "<CALL_ID>",
+          "name": "checkpoint_state",
+        },
+        "thoughtSignature": "skip_thought_signature_validator",
+      },
+    ],
+    "role": "model",
+  },
+  {
+    "parts": [
+      {
+        "functionResponse": {
+          "id": "<CALL_ID>",
+          "name": "checkpoint_state",
+          "response": {
+            "output": "First checkpoint created. No previous summary found.",
+          },
+        },
+      },
+    ],
+    "role": "user",
+  },
+]
+`;
+
+exports[`Continuous Session Integration > should handle checkpoint_state and manual compress tools correctly > final-curated-history 1`] = `
+[
+  {
+    "parts": [
+      {
+        "text": "<SESSION_CONTEXT>",
+      },
+      {
+        "text": "<state_checkpoint>
+<state_snapshot>
+<overall_goal>Implement session continuity</overall_goal>
+<active_constraints>Use high-fidelity summary</active_constraints>
+<key_knowledge>Tools implemented: checkpoint_state, compress</key_knowledge>
+<task_state>1. [DONE] Implement tools
+2. [IN PROGRESS] Verify continuity
+</task_state>
+</state_snapshot>
+</state_checkpoint>",
+      },
+    ],
+    "role": "user",
+  },
+  {
+    "parts": [
+      {
+        "text": "Compression successful. I have clear context and I remember our mission.",
+      },
+    ],
+    "role": "model",
+  },
+]
+`;
--- a/packages/cli/src/integration-tests/snapshots/distill_result.test.tsx.snap
+++ b/packages/cli/src/integration-tests/snapshots/distill_result.test.tsx.snap
@@ -0,0 +1,124 @@
+// Vitest Snapshot v1, https://vitest.dev/guide/snapshot.html
+
+exports[`Distill Result Integration > should surgically replace a noisy tool result with a distilled version > 1-initial-prompt 1`] = `
+[
+  {
+    "parts": [
+      {
+        "text": "<SESSION_CONTEXT>",
+      },
+      {
+        "text": "Audit src/ for secrets",
+      },
+    ],
+    "role": "user",
+  },
+]
+`;
+
+exports[`Distill Result Integration > should surgically replace a noisy tool result with a distilled version > 2-request-with-noise 1`] = `
+[
+  {
+    "parts": [
+      {
+        "text": "<SESSION_CONTEXT>",
+      },
+      {
+        "text": "Audit src/ for secrets",
+      },
+    ],
+    "role": "user",
+  },
+  {
+    "parts": [
+      {
+        "text": "**Thought**
+I need to search for SECRET in the src/ directory.",
+      },
+      {
+        "functionCall": {
+          "args": {
+            "file_path": "src/foo.txt",
+          },
+          "id": "<CALL_ID>",
+          "name": "read_file",
+        },
+        "thoughtSignature": "skip_thought_signature_validator",
+      },
+    ],
+    "role": "model",
+  },
+  {
+    "parts": [
+      {
+        "functionResponse": {
+          "id": "<CALL_ID>",
+          "name": "read_file",
+          "response": {
+            "error": "File not found: <TEST_DIR>/src/foo.txt",
+          },
+        },
+      },
+    ],
+    "role": "user",
+  },
+]
+`;
+
+exports[`Distill Result Integration > should surgically replace a noisy tool result with a distilled version > final-curated-history 1`] = `
+[
+  {
+    "parts": [
+      {
+        "text": "<SESSION_CONTEXT>",
+      },
+      {
+        "text": "Audit src/ for secrets",
+      },
+    ],
+    "role": "user",
+  },
+  {
+    "parts": [
+      {
+        "text": "**Thought**
+I need to search for SECRET in the src/ directory.",
+      },
+      {
+        "functionCall": {
+          "args": {
+            "file_path": "src/foo.txt",
+          },
+          "id": "<CALL_ID>",
+          "name": "read_file",
+        },
+      },
+    ],
+    "role": "model",
+  },
+  {
+    "parts": [
+      {
+        "functionResponse": {
+          "id": "<CALL_ID>",
+          "name": "read_file",
+          "response": {
+            "distilled": true,
+            "distilled_output": "Found SECRET_KEY="12345" in src/env.ts",
+            "original_output_file": "<TMP_FILE>",
+          },
+        },
+      },
+    ],
+    "role": "user",
+  },
+  {
+    "parts": [
+      {
+        "text": "I found the SECRET_KEY="12345" in src/env.ts after distilling the search results.",
+      },
+    ],
+    "role": "model",
+  },
+]
+`;
--- a/packages/cli/src/integration-tests/continuousSession.test.tsx
+++ b/packages/cli/src/integration-tests/continuousSession.test.tsx
@@ -4,7 +4,7 @@
 * SPDX-License-Identifier: Apache-2.0
 */

-import { describe, it, afterEach } from 'vitest';
+import { describe, it, expect, afterEach } from 'vitest';
 import { AppRig } from '../test-utils/AppRig.js';
 import path from 'node:path';
 import { fileURLToPath } from 'node:url';
@@ -26,26 +26,41 @@ describe('Continuous Session Integration', () => {
    );
    rig = new AppRig({
      fakeResponsesPath,
+      configOverrides: {
+        continuousSession: true,
+      },
    });
    await rig.initialize();
    rig.render();
    await rig.waitForIdle();

-    // Set policies to AUTO so it proceeds without asking user
-    rig.setToolPolicy('checkpoint_state', PolicyDecision.ALLOW);
-    rig.setToolPolicy('compress', PolicyDecision.ALLOW);
+    // Use ASK_USER to pause and inspect the curated history at key moments
+    rig.setToolPolicy('checkpoint_state', PolicyDecision.ASK_USER);
+    rig.setToolPolicy('compress', PolicyDecision.ASK_USER);

    // Start the quest
-    await rig.type('Start the mission');
+    await rig.type('Start the mission ' + 'PAD'.repeat(100));
    await rig.pressEnter();

    // 1. Wait for CheckpointState tool call
    await rig.waitForOutput('CheckpointState');
+    // Verify curated history BEFORE checkpoint is applied
+    expect(rig.getLastSentRequestContents()).toMatchSnapshot('1-before-checkpoint');
+    await rig.resolveTool('CheckpointState');

    // 2. Wait for Compress tool call
    await rig.waitForOutput('Compress');
+    // Verify curated history contains the checkpoint
+    expect(rig.getLastSentRequestContents()).toMatchSnapshot('2-with-checkpoint');
+    await rig.resolveTool('Compress');

    // 3. Wait for final model response after compression
    await rig.waitForOutput('Compression successful.');
+    await rig.waitForIdle();
+
+    // Verify the final curated history:
+    // - Should contain the high-fidelity snapshot
+    // - Should NOT contain pre-compression turns
+    expect(rig.getCuratedHistory()).toMatchSnapshot('final-curated-history');
  });
 });
--- a/packages/cli/src/integration-tests/distill_result.test.tsx
+++ b/packages/cli/src/integration-tests/distill_result.test.tsx
@@ -0,0 +1,86 @@
+/**
+ * @license
+ * Copyright 2026 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import { describe, it, expect, afterEach } from 'vitest';
+import path from 'node:path';
+import { fileURLToPath } from 'node:url';
+import { AppRig } from '../test-utils/AppRig.js';
+import { PolicyDecision } from '@google/gemini-cli-core';
+
+const __dirname = path.dirname(fileURLToPath(import.meta.url));
+
+describe('Distill Result Integration', () => {
+  let rig: AppRig | undefined;
+
+  afterEach(async () => {
+    await rig?.unmount();
+  });
+
+  it('should surgically replace a noisy tool result with a distilled version', async () => {
+    const fakeResponsesPath = path.resolve(
+      __dirname,
+      '../test-utils/fixtures/distill_result.responses',
+    );
+    
+    rig = new AppRig({
+      fakeResponsesPath,
+      configOverrides: { 
+        continuousSession: true,
+        modelSteering: true,
+      },
+    });
+    
+    await rig.initialize();
+    rig.render();
+    await rig.waitForIdle();
+
+    rig.setMockCommands([
+      {
+        command: /read_file/,
+        result: {
+          output: 'NOISE\n'.repeat(50) + 'SECRET_KEY="12345"\n' + 'NOISE\n'.repeat(50),
+          exitCode: 0,
+        },
+      },
+    ]);
+
+    // Use ASK_USER to pause and inspect the request before each model turn
+    rig.setToolPolicy('read_file', PolicyDecision.ASK_USER);
+    rig.setToolPolicy('distill_result', PolicyDecision.ASK_USER);
+
+    // 1. Initial Prompt: Audit for secrets
+    await rig.sendMessage('Audit src/ for secrets');
+
+    // 2. Model calls run_shell_command (the "Noise Bomb")
+    await rig.waitForOutput('ReadFile');
+    // Verify the curated history sent to model contains the initial user prompt
+    expect(rig.getLastSentRequestContents()).toMatchSnapshot('1-initial-prompt');
+    
+    await rig.resolveTool('ReadFile');
+    
+    // 3. Model realizes it's noisy and calls distill_result
+    await rig.waitForOutput('DistillResult');
+    // Verify history now includes the massive noise
+    expect(rig.getLastSentRequestContents()).toMatchSnapshot('2-request-with-noise');
+    
+    await rig.resolveTool('DistillResult');
+
+    // 4. Model continues from the distilled state and finishes
+    await rig.waitForOutput(/found the SECRET_KEY/i);
+    await rig.waitForIdle();
+
+    // Verify the final curated history:
+    // - NO noise from the original read_file
+    // - original read_file response is replaced with our universal distillation schema
+    // - intermediate thoughts and the distill_result turn itself are elided
+    expect(rig.getCuratedHistory()).toMatchSnapshot('final-curated-history');
+
+    // Verify final output contains the signal
+    const output = rig.getStaticOutput();
+    expect(output).toContain('SECRET_KEY');
+    expect(output).toContain('12345');
+  });
+});
--- a/packages/cli/src/test-utils/AppRig.tsx
+++ b/packages/cli/src/test-utils/AppRig.tsx
@@ -150,6 +150,7 @@ export class AppRig {
  private settings: LoadedSettings | undefined;
  private testDir: string;
  private sessionId: string;
+  private appRigId: string;

  private pendingConfirmations = new Map<string, PendingConfirmation>();
  private breakpointTools = new Set<string | undefined>();
@@ -165,6 +166,7 @@ export class AppRig {
    this.testDir = fs.mkdtempSync(
      path.join(os.tmpdir(), `gemini-app-rig-${uniqueId.slice(0, 8)}-`),
    );
+    this.appRigId = path.basename(this.testDir).toLowerCase();
    this.sessionId = `test-session-${uniqueId}`;
    activeRigs.set(this.sessionId, this);
  }
@@ -702,6 +704,67 @@ export class AppRig {
    await this.pressEnter();
  }

+  getSentRequests() {
+    if (!this.config) throw new Error('AppRig not initialized');
+    return this.config.getContentGenerator().getSentRequests?.() || [];
+  }
+
+  /**
+   * Helper to get the curated history (contents) sent in the most recent model request.
+   * This method scrubs unstable data like temp paths and IDs for deterministic goldens.
+   */
+  getLastSentRequestContents() {
+    const requests = this.getSentRequests();
+    if (requests.length === 0) return [];
+    const contents = requests[requests.length - 1].contents || [];
+    return this.scrubUnstableData(contents);
+  }
+
+  /**
+   * Gets the final curated history of the active chat session.
+   */
+  getCuratedHistory() {
+    if (!this.config) throw new Error('AppRig not initialized');
+    const history = this.config.getGeminiClient().getChat().getHistory(true);
+    return this.scrubUnstableData(history);
+  }
+
+  private scrubUnstableData(contents: any) {
+    // Deeply scrub unstable data
+    const scrubbed = JSON.parse(
+      JSON.stringify(contents)
+        .replace(new RegExp(this.testDir, 'g'), '<TEST_DIR>')
+        .replace(new RegExp(this.appRigId, 'g'), '<APP_RIG_ID>')
+        .replace(new RegExp(this.sessionId, 'g'), '<SESSION_ID>'),
+    );
+
+    if (scrubbed.length > 0) {
+      if (scrubbed[0].parts[0].text?.includes('<session_context>')) {
+        scrubbed[0].parts[0].text = '<SESSION_CONTEXT>';
+      }
+    }
+
+    const removeIds = (obj: any) => {
+      if (Array.isArray(obj)) {
+        obj.forEach(removeIds);
+      } else if (obj && typeof obj === 'object') {
+        if (obj.functionCall) {
+          obj.functionCall.id = '<CALL_ID>';
+        }
+        if (obj.functionResponse) {
+          obj.functionResponse.id = '<CALL_ID>';
+          if (obj.functionResponse?.response?.original_output_file) {
+            obj.functionResponse.response.original_output_file = '<TMP_FILE>';
+          }
+        }
+        Object.values(obj).forEach(removeIds);
+      }
+    };
+    removeIds(scrubbed);
+
+    return scrubbed;
+  }
+
  async unmount() {
    // Clean up global state for this session
    sessionStateMap.delete(this.sessionId);
--- a/packages/cli/src/test-utils/fixtures/continuous_session.responses
+++ b/packages/cli/src/test-utils/fixtures/continuous_session.responses
@@ -1,5 +1,4 @@
 {"method":"generateContentStream","response":[{"candidates":[{"content":{"role":"model","parts":[{"text":"I will now checkpoint our progress."},{"functionCall":{"name":"checkpoint_state","args":{"summary":"GOAL: Implementation of session continuity.\nPROGRESS: Tools implemented.\nCONSTRAINT: Use high-fidelity summary."}}}]},"finishReason":"STOP"}]}]}
-{"method":"generateContentStream","response":[{"candidates":[{"content":{"role":"model","parts":[{"text":"Checkpoint created. Now I will trigger compression to clear the context."},{"functionCall":{"name":"compress","args":{"force":true}}}]},"finishReason":"STOP"}]}]}
+{"method":"generateContentStream","response":[{"candidates":[{"content":{"role":"model","parts":[{"text":"Checkpoint created. Now I will trigger compression to clear the context."},{"functionCall":{"name":"compress","args":{}}}]},"finishReason":"STOP"}]}]}
 {"method":"generateContent","response":{"candidates":[{"content":{"role":"model","parts":[{"text":"<state_snapshot>\n<overall_goal>Implement session continuity</overall_goal>\n<active_constraints>Use high-fidelity summary</active_constraints>\n<key_knowledge>Tools implemented: checkpoint_state, compress</key_knowledge>\n<task_state>1. [DONE] Implement tools\n2. [IN PROGRESS] Verify continuity\n</task_state>\n</state_snapshot>"}]}}],"finishReason":"STOP"}}
-{"method":"generateContent","response":{"candidates":[{"content":{"role":"model","parts":[{"text":"The <state_snapshot> is accurate and preserves all critical details."}]},"finishReason":"STOP"}]}}
 {"method":"generateContentStream","response":[{"candidates":[{"content":{"role":"model","parts":[{"text":"Compression successful. I have clear context and I remember our mission."}]},"finishReason":"STOP"}]}]}
--- a/packages/cli/src/test-utils/fixtures/distill_result.responses
+++ b/packages/cli/src/test-utils/fixtures/distill_result.responses
@@ -0,0 +1,3 @@
+{"method":"generateContentStream","response":[{"candidates":[{"content":{"role":"model","parts":[{"text":"**Thought**\nI need to search for SECRET in the src/ directory."},{"functionCall":{"name":"read_file","args":{"file_path":"src/foo.txt"}}}]},"finishReason":"STOP"}]}]}
+{"method":"generateContentStream","response":[{"candidates":[{"content":{"role":"model","parts":[{"text":"**Thought**\nThe output is very noisy. I will distill it to keep the context clean."},{"functionCall":{"name":"distill_result","args":{"revised_text":"Found SECRET_KEY=\"12345\" in src/env.ts"}}}]},"finishReason":"STOP"}]}]}
+{"method":"generateContentStream","response":[{"candidates":[{"content":{"role":"model","parts":[{"text":"I found the SECRET_KEY=\"12345\" in src/env.ts after distilling the search results."}]},"finishReason":"STOP"}]}]}