feat(core): distill tool

2026-04-21 18:44:30 -07:00 · 2026-03-07 19:33:00 +00:00
parent ca184a386e
commit 6d1c6a9b06
39 changed files with 1853 additions and 1120 deletions
@@ -0,0 +1,107 @@
+// Vitest Snapshot v1, https://vitest.dev/guide/snapshot.html
+
+exports[`Continuous Session Integration > should handle checkpoint_state and manual compress tools correctly > 1-before-checkpoint 1`] = `
+[
+  {
+    "parts": [
+      {
+        "text": "<SESSION_CONTEXT>",
+      },
+      {
+        "text": "Start the mission PADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPAD",
+      },
+    ],
+    "role": "user",
+  },
+]
+`;
+
+exports[`Continuous Session Integration > should handle checkpoint_state and manual compress tools correctly > 2-with-checkpoint 1`] = `
+[
+  {
+    "parts": [
+      {
+        "text": "<SESSION_CONTEXT>",
+      },
+      {
+        "text": "<state_checkpoint>
+GOAL: Implementation of session continuity.
+PROGRESS: Tools implemented.
+CONSTRAINT: Use high-fidelity summary.
+</state_checkpoint>",
+      },
+      {
+        "text": "Start the mission PADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPAD",
+      },
+    ],
+    "role": "user",
+  },
+  {
+    "parts": [
+      {
+        "text": "I will now checkpoint our progress.",
+      },
+      {
+        "functionCall": {
+          "args": {
+            "summary": "GOAL: Implementation of session continuity.
+PROGRESS: Tools implemented.
+CONSTRAINT: Use high-fidelity summary.",
+          },
+          "id": "<CALL_ID>",
+          "name": "checkpoint_state",
+        },
+        "thoughtSignature": "skip_thought_signature_validator",
+      },
+    ],
+    "role": "model",
+  },
+  {
+    "parts": [
+      {
+        "functionResponse": {
+          "id": "<CALL_ID>",
+          "name": "checkpoint_state",
+          "response": {
+            "output": "First checkpoint created. No previous summary found.",
+          },
+        },
+      },
+    ],
+    "role": "user",
+  },
+]
+`;
+
+exports[`Continuous Session Integration > should handle checkpoint_state and manual compress tools correctly > final-curated-history 1`] = `
+[
+  {
+    "parts": [
+      {
+        "text": "<SESSION_CONTEXT>",
+      },
+      {
+        "text": "<state_checkpoint>
+<state_snapshot>
+<overall_goal>Implement session continuity</overall_goal>
+<active_constraints>Use high-fidelity summary</active_constraints>
+<key_knowledge>Tools implemented: checkpoint_state, compress</key_knowledge>
+<task_state>1. [DONE] Implement tools
+2. [IN PROGRESS] Verify continuity
+</task_state>
+</state_snapshot>
+</state_checkpoint>",
+      },
+    ],
+    "role": "user",
+  },
+  {
+    "parts": [
+      {
+        "text": "Compression successful. I have clear context and I remember our mission.",
+      },
+    ],
+    "role": "model",
+  },
+]
+`;
@@ -0,0 +1,124 @@
+// Vitest Snapshot v1, https://vitest.dev/guide/snapshot.html
+
+exports[`Distill Result Integration > should surgically replace a noisy tool result with a distilled version > 1-initial-prompt 1`] = `
+[
+  {
+    "parts": [
+      {
+        "text": "<SESSION_CONTEXT>",
+      },
+      {
+        "text": "Audit src/ for secrets",
+      },
+    ],
+    "role": "user",
+  },
+]
+`;
+
+exports[`Distill Result Integration > should surgically replace a noisy tool result with a distilled version > 2-request-with-noise 1`] = `
+[
+  {
+    "parts": [
+      {
+        "text": "<SESSION_CONTEXT>",
+      },
+      {
+        "text": "Audit src/ for secrets",
+      },
+    ],
+    "role": "user",
+  },
+  {
+    "parts": [
+      {
+        "text": "**Thought**
+I need to search for SECRET in the src/ directory.",
+      },
+      {
+        "functionCall": {
+          "args": {
+            "file_path": "src/foo.txt",
+          },
+          "id": "<CALL_ID>",
+          "name": "read_file",
+        },
+        "thoughtSignature": "skip_thought_signature_validator",
+      },
+    ],
+    "role": "model",
+  },
+  {
+    "parts": [
+      {
+        "functionResponse": {
+          "id": "<CALL_ID>",
+          "name": "read_file",
+          "response": {
+            "error": "File not found: <TEST_DIR>/src/foo.txt",
+          },
+        },
+      },
+    ],
+    "role": "user",
+  },
+]
+`;
+
+exports[`Distill Result Integration > should surgically replace a noisy tool result with a distilled version > final-curated-history 1`] = `
+[
+  {
+    "parts": [
+      {
+        "text": "<SESSION_CONTEXT>",
+      },
+      {
+        "text": "Audit src/ for secrets",
+      },
+    ],
+    "role": "user",
+  },
+  {
+    "parts": [
+      {
+        "text": "**Thought**
+I need to search for SECRET in the src/ directory.",
+      },
+      {
+        "functionCall": {
+          "args": {
+            "file_path": "src/foo.txt",
+          },
+          "id": "<CALL_ID>",
+          "name": "read_file",
+        },
+      },
+    ],
+    "role": "model",
+  },
+  {
+    "parts": [
+      {
+        "functionResponse": {
+          "id": "<CALL_ID>",
+          "name": "read_file",
+          "response": {
+            "distilled": true,
+            "distilled_output": "Found SECRET_KEY="12345" in src/env.ts",
+            "original_output_file": "<TMP_FILE>",
+          },
+        },
+      },
+    ],
+    "role": "user",
+  },
+  {
+    "parts": [
+      {
+        "text": "I found the SECRET_KEY="12345" in src/env.ts after distilling the search results.",
+      },
+    ],
+    "role": "model",
+  },
+]
+`;
@@ -4,7 +4,7 @@
 * SPDX-License-Identifier: Apache-2.0
 */

-import { describe, it, afterEach } from 'vitest';
+import { describe, it, expect, afterEach } from 'vitest';
 import { AppRig } from '../test-utils/AppRig.js';
 import path from 'node:path';
 import { fileURLToPath } from 'node:url';
@@ -26,26 +26,41 @@ describe('Continuous Session Integration', () => {
    );
    rig = new AppRig({
      fakeResponsesPath,
+      configOverrides: {
+        continuousSession: true,
+      },
    });
    await rig.initialize();
    rig.render();
    await rig.waitForIdle();

-    // Set policies to AUTO so it proceeds without asking user
-    rig.setToolPolicy('checkpoint_state', PolicyDecision.ALLOW);
-    rig.setToolPolicy('compress', PolicyDecision.ALLOW);
+    // Use ASK_USER to pause and inspect the curated history at key moments
+    rig.setToolPolicy('checkpoint_state', PolicyDecision.ASK_USER);
+    rig.setToolPolicy('compress', PolicyDecision.ASK_USER);

    // Start the quest
-    await rig.type('Start the mission');
+    await rig.type('Start the mission ' + 'PAD'.repeat(100));
    await rig.pressEnter();

    // 1. Wait for CheckpointState tool call
    await rig.waitForOutput('CheckpointState');
+    // Verify curated history BEFORE checkpoint is applied
+    expect(rig.getLastSentRequestContents()).toMatchSnapshot('1-before-checkpoint');
+    await rig.resolveTool('CheckpointState');

    // 2. Wait for Compress tool call
    await rig.waitForOutput('Compress');
+    // Verify curated history contains the checkpoint
+    expect(rig.getLastSentRequestContents()).toMatchSnapshot('2-with-checkpoint');
+    await rig.resolveTool('Compress');

    // 3. Wait for final model response after compression
    await rig.waitForOutput('Compression successful.');
+    await rig.waitForIdle();
+
+    // Verify the final curated history:
+    // - Should contain the high-fidelity snapshot
+    // - Should NOT contain pre-compression turns
+    expect(rig.getCuratedHistory()).toMatchSnapshot('final-curated-history');
  });
 });
@@ -0,0 +1,86 @@
+/**
+ * @license
+ * Copyright 2026 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import { describe, it, expect, afterEach } from 'vitest';
+import path from 'node:path';
+import { fileURLToPath } from 'node:url';
+import { AppRig } from '../test-utils/AppRig.js';
+import { PolicyDecision } from '@google/gemini-cli-core';
+
+const __dirname = path.dirname(fileURLToPath(import.meta.url));
+
+describe('Distill Result Integration', () => {
+  let rig: AppRig | undefined;
+
+  afterEach(async () => {
+    await rig?.unmount();
+  });
+
+  it('should surgically replace a noisy tool result with a distilled version', async () => {
+    const fakeResponsesPath = path.resolve(
+      __dirname,
+      '../test-utils/fixtures/distill_result.responses',
+    );
+    
+    rig = new AppRig({
+      fakeResponsesPath,
+      configOverrides: { 
+        continuousSession: true,
+        modelSteering: true,
+      },
+    });
+    
+    await rig.initialize();
+    rig.render();
+    await rig.waitForIdle();
+
+    rig.setMockCommands([
+      {
+        command: /read_file/,
+        result: {
+          output: 'NOISE\n'.repeat(50) + 'SECRET_KEY="12345"\n' + 'NOISE\n'.repeat(50),
+          exitCode: 0,
+        },
+      },
+    ]);
+
+    // Use ASK_USER to pause and inspect the request before each model turn
+    rig.setToolPolicy('read_file', PolicyDecision.ASK_USER);
+    rig.setToolPolicy('distill_result', PolicyDecision.ASK_USER);
+
+    // 1. Initial Prompt: Audit for secrets
+    await rig.sendMessage('Audit src/ for secrets');
+
+    // 2. Model calls run_shell_command (the "Noise Bomb")
+    await rig.waitForOutput('ReadFile');
+    // Verify the curated history sent to model contains the initial user prompt
+    expect(rig.getLastSentRequestContents()).toMatchSnapshot('1-initial-prompt');
+    
+    await rig.resolveTool('ReadFile');
+    
+    // 3. Model realizes it's noisy and calls distill_result
+    await rig.waitForOutput('DistillResult');
+    // Verify history now includes the massive noise
+    expect(rig.getLastSentRequestContents()).toMatchSnapshot('2-request-with-noise');
+    
+    await rig.resolveTool('DistillResult');
+
+    // 4. Model continues from the distilled state and finishes
+    await rig.waitForOutput(/found the SECRET_KEY/i);
+    await rig.waitForIdle();
+
+    // Verify the final curated history:
+    // - NO noise from the original read_file
+    // - original read_file response is replaced with our universal distillation schema
+    // - intermediate thoughts and the distill_result turn itself are elided
+    expect(rig.getCuratedHistory()).toMatchSnapshot('final-curated-history');
+
+    // Verify final output contains the signal
+    const output = rig.getStaticOutput();
+    expect(output).toContain('SECRET_KEY');
+    expect(output).toContain('12345');
+  });
+});