feat(core): distill tool

This commit is contained in:
Your Name
2026-03-07 19:33:00 +00:00
parent ca184a386e
commit 6d1c6a9b06
39 changed files with 1853 additions and 1120 deletions
@@ -0,0 +1,107 @@
// Vitest Snapshot v1, https://vitest.dev/guide/snapshot.html
exports[`Continuous Session Integration > should handle checkpoint_state and manual compress tools correctly > 1-before-checkpoint 1`] = `
[
{
"parts": [
{
"text": "<SESSION_CONTEXT>",
},
{
"text": "Start the mission PADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPAD",
},
],
"role": "user",
},
]
`;
exports[`Continuous Session Integration > should handle checkpoint_state and manual compress tools correctly > 2-with-checkpoint 1`] = `
[
{
"parts": [
{
"text": "<SESSION_CONTEXT>",
},
{
"text": "<state_checkpoint>
GOAL: Implementation of session continuity.
PROGRESS: Tools implemented.
CONSTRAINT: Use high-fidelity summary.
</state_checkpoint>",
},
{
"text": "Start the mission PADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPAD",
},
],
"role": "user",
},
{
"parts": [
{
"text": "I will now checkpoint our progress.",
},
{
"functionCall": {
"args": {
"summary": "GOAL: Implementation of session continuity.
PROGRESS: Tools implemented.
CONSTRAINT: Use high-fidelity summary.",
},
"id": "<CALL_ID>",
"name": "checkpoint_state",
},
"thoughtSignature": "skip_thought_signature_validator",
},
],
"role": "model",
},
{
"parts": [
{
"functionResponse": {
"id": "<CALL_ID>",
"name": "checkpoint_state",
"response": {
"output": "First checkpoint created. No previous summary found.",
},
},
},
],
"role": "user",
},
]
`;
exports[`Continuous Session Integration > should handle checkpoint_state and manual compress tools correctly > final-curated-history 1`] = `
[
{
"parts": [
{
"text": "<SESSION_CONTEXT>",
},
{
"text": "<state_checkpoint>
<state_snapshot>
<overall_goal>Implement session continuity</overall_goal>
<active_constraints>Use high-fidelity summary</active_constraints>
<key_knowledge>Tools implemented: checkpoint_state, compress</key_knowledge>
<task_state>1. [DONE] Implement tools
2. [IN PROGRESS] Verify continuity
</task_state>
</state_snapshot>
</state_checkpoint>",
},
],
"role": "user",
},
{
"parts": [
{
"text": "Compression successful. I have clear context and I remember our mission.",
},
],
"role": "model",
},
]
`;
@@ -0,0 +1,124 @@
// Vitest Snapshot v1, https://vitest.dev/guide/snapshot.html
exports[`Distill Result Integration > should surgically replace a noisy tool result with a distilled version > 1-initial-prompt 1`] = `
[
{
"parts": [
{
"text": "<SESSION_CONTEXT>",
},
{
"text": "Audit src/ for secrets",
},
],
"role": "user",
},
]
`;
exports[`Distill Result Integration > should surgically replace a noisy tool result with a distilled version > 2-request-with-noise 1`] = `
[
{
"parts": [
{
"text": "<SESSION_CONTEXT>",
},
{
"text": "Audit src/ for secrets",
},
],
"role": "user",
},
{
"parts": [
{
"text": "**Thought**
I need to search for SECRET in the src/ directory.",
},
{
"functionCall": {
"args": {
"file_path": "src/foo.txt",
},
"id": "<CALL_ID>",
"name": "read_file",
},
"thoughtSignature": "skip_thought_signature_validator",
},
],
"role": "model",
},
{
"parts": [
{
"functionResponse": {
"id": "<CALL_ID>",
"name": "read_file",
"response": {
"error": "File not found: <TEST_DIR>/src/foo.txt",
},
},
},
],
"role": "user",
},
]
`;
exports[`Distill Result Integration > should surgically replace a noisy tool result with a distilled version > final-curated-history 1`] = `
[
{
"parts": [
{
"text": "<SESSION_CONTEXT>",
},
{
"text": "Audit src/ for secrets",
},
],
"role": "user",
},
{
"parts": [
{
"text": "**Thought**
I need to search for SECRET in the src/ directory.",
},
{
"functionCall": {
"args": {
"file_path": "src/foo.txt",
},
"id": "<CALL_ID>",
"name": "read_file",
},
},
],
"role": "model",
},
{
"parts": [
{
"functionResponse": {
"id": "<CALL_ID>",
"name": "read_file",
"response": {
"distilled": true,
"distilled_output": "Found SECRET_KEY="12345" in src/env.ts",
"original_output_file": "<TMP_FILE>",
},
},
},
],
"role": "user",
},
{
"parts": [
{
"text": "I found the SECRET_KEY="12345" in src/env.ts after distilling the search results.",
},
],
"role": "model",
},
]
`;
@@ -4,7 +4,7 @@
* SPDX-License-Identifier: Apache-2.0
*/
import { describe, it, afterEach } from 'vitest';
import { describe, it, expect, afterEach } from 'vitest';
import { AppRig } from '../test-utils/AppRig.js';
import path from 'node:path';
import { fileURLToPath } from 'node:url';
@@ -26,26 +26,41 @@ describe('Continuous Session Integration', () => {
);
rig = new AppRig({
fakeResponsesPath,
configOverrides: {
continuousSession: true,
},
});
await rig.initialize();
rig.render();
await rig.waitForIdle();
// Set policies to AUTO so it proceeds without asking user
rig.setToolPolicy('checkpoint_state', PolicyDecision.ALLOW);
rig.setToolPolicy('compress', PolicyDecision.ALLOW);
// Use ASK_USER to pause and inspect the curated history at key moments
rig.setToolPolicy('checkpoint_state', PolicyDecision.ASK_USER);
rig.setToolPolicy('compress', PolicyDecision.ASK_USER);
// Start the quest
await rig.type('Start the mission');
await rig.type('Start the mission ' + 'PAD'.repeat(100));
await rig.pressEnter();
// 1. Wait for CheckpointState tool call
await rig.waitForOutput('CheckpointState');
// Verify curated history BEFORE checkpoint is applied
expect(rig.getLastSentRequestContents()).toMatchSnapshot('1-before-checkpoint');
await rig.resolveTool('CheckpointState');
// 2. Wait for Compress tool call
await rig.waitForOutput('Compress');
// Verify curated history contains the checkpoint
expect(rig.getLastSentRequestContents()).toMatchSnapshot('2-with-checkpoint');
await rig.resolveTool('Compress');
// 3. Wait for final model response after compression
await rig.waitForOutput('Compression successful.');
await rig.waitForIdle();
// Verify the final curated history:
// - Should contain the high-fidelity snapshot
// - Should NOT contain pre-compression turns
expect(rig.getCuratedHistory()).toMatchSnapshot('final-curated-history');
});
});
@@ -0,0 +1,86 @@
/**
* @license
* Copyright 2026 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import { describe, it, expect, afterEach } from 'vitest';
import path from 'node:path';
import { fileURLToPath } from 'node:url';
import { AppRig } from '../test-utils/AppRig.js';
import { PolicyDecision } from '@google/gemini-cli-core';
const __dirname = path.dirname(fileURLToPath(import.meta.url));
describe('Distill Result Integration', () => {
let rig: AppRig | undefined;
afterEach(async () => {
await rig?.unmount();
});
it('should surgically replace a noisy tool result with a distilled version', async () => {
const fakeResponsesPath = path.resolve(
__dirname,
'../test-utils/fixtures/distill_result.responses',
);
rig = new AppRig({
fakeResponsesPath,
configOverrides: {
continuousSession: true,
modelSteering: true,
},
});
await rig.initialize();
rig.render();
await rig.waitForIdle();
rig.setMockCommands([
{
command: /read_file/,
result: {
output: 'NOISE\n'.repeat(50) + 'SECRET_KEY="12345"\n' + 'NOISE\n'.repeat(50),
exitCode: 0,
},
},
]);
// Use ASK_USER to pause and inspect the request before each model turn
rig.setToolPolicy('read_file', PolicyDecision.ASK_USER);
rig.setToolPolicy('distill_result', PolicyDecision.ASK_USER);
// 1. Initial Prompt: Audit for secrets
await rig.sendMessage('Audit src/ for secrets');
// 2. Model calls run_shell_command (the "Noise Bomb")
await rig.waitForOutput('ReadFile');
// Verify the curated history sent to model contains the initial user prompt
expect(rig.getLastSentRequestContents()).toMatchSnapshot('1-initial-prompt');
await rig.resolveTool('ReadFile');
// 3. Model realizes it's noisy and calls distill_result
await rig.waitForOutput('DistillResult');
// Verify history now includes the massive noise
expect(rig.getLastSentRequestContents()).toMatchSnapshot('2-request-with-noise');
await rig.resolveTool('DistillResult');
// 4. Model continues from the distilled state and finishes
await rig.waitForOutput(/found the SECRET_KEY/i);
await rig.waitForIdle();
// Verify the final curated history:
// - NO noise from the original read_file
// - original read_file response is replaced with our universal distillation schema
// - intermediate thoughts and the distill_result turn itself are elided
expect(rig.getCuratedHistory()).toMatchSnapshot('final-curated-history');
// Verify final output contains the signal
const output = rig.getStaticOutput();
expect(output).toContain('SECRET_KEY');
expect(output).toContain('12345');
});
});