mirror of
https://github.com/google-gemini/gemini-cli.git
synced 2026-04-10 21:30:40 -07:00
feat(core): distill tool
This commit is contained in:
@@ -0,0 +1,107 @@
|
||||
// Vitest Snapshot v1, https://vitest.dev/guide/snapshot.html
|
||||
|
||||
exports[`Continuous Session Integration > should handle checkpoint_state and manual compress tools correctly > 1-before-checkpoint 1`] = `
|
||||
[
|
||||
{
|
||||
"parts": [
|
||||
{
|
||||
"text": "<SESSION_CONTEXT>",
|
||||
},
|
||||
{
|
||||
"text": "Start the mission PADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPAD",
|
||||
},
|
||||
],
|
||||
"role": "user",
|
||||
},
|
||||
]
|
||||
`;
|
||||
|
||||
exports[`Continuous Session Integration > should handle checkpoint_state and manual compress tools correctly > 2-with-checkpoint 1`] = `
|
||||
[
|
||||
{
|
||||
"parts": [
|
||||
{
|
||||
"text": "<SESSION_CONTEXT>",
|
||||
},
|
||||
{
|
||||
"text": "<state_checkpoint>
|
||||
GOAL: Implementation of session continuity.
|
||||
PROGRESS: Tools implemented.
|
||||
CONSTRAINT: Use high-fidelity summary.
|
||||
</state_checkpoint>",
|
||||
},
|
||||
{
|
||||
"text": "Start the mission PADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPADPAD",
|
||||
},
|
||||
],
|
||||
"role": "user",
|
||||
},
|
||||
{
|
||||
"parts": [
|
||||
{
|
||||
"text": "I will now checkpoint our progress.",
|
||||
},
|
||||
{
|
||||
"functionCall": {
|
||||
"args": {
|
||||
"summary": "GOAL: Implementation of session continuity.
|
||||
PROGRESS: Tools implemented.
|
||||
CONSTRAINT: Use high-fidelity summary.",
|
||||
},
|
||||
"id": "<CALL_ID>",
|
||||
"name": "checkpoint_state",
|
||||
},
|
||||
"thoughtSignature": "skip_thought_signature_validator",
|
||||
},
|
||||
],
|
||||
"role": "model",
|
||||
},
|
||||
{
|
||||
"parts": [
|
||||
{
|
||||
"functionResponse": {
|
||||
"id": "<CALL_ID>",
|
||||
"name": "checkpoint_state",
|
||||
"response": {
|
||||
"output": "First checkpoint created. No previous summary found.",
|
||||
},
|
||||
},
|
||||
},
|
||||
],
|
||||
"role": "user",
|
||||
},
|
||||
]
|
||||
`;
|
||||
|
||||
exports[`Continuous Session Integration > should handle checkpoint_state and manual compress tools correctly > final-curated-history 1`] = `
|
||||
[
|
||||
{
|
||||
"parts": [
|
||||
{
|
||||
"text": "<SESSION_CONTEXT>",
|
||||
},
|
||||
{
|
||||
"text": "<state_checkpoint>
|
||||
<state_snapshot>
|
||||
<overall_goal>Implement session continuity</overall_goal>
|
||||
<active_constraints>Use high-fidelity summary</active_constraints>
|
||||
<key_knowledge>Tools implemented: checkpoint_state, compress</key_knowledge>
|
||||
<task_state>1. [DONE] Implement tools
|
||||
2. [IN PROGRESS] Verify continuity
|
||||
</task_state>
|
||||
</state_snapshot>
|
||||
</state_checkpoint>",
|
||||
},
|
||||
],
|
||||
"role": "user",
|
||||
},
|
||||
{
|
||||
"parts": [
|
||||
{
|
||||
"text": "Compression successful. I have clear context and I remember our mission.",
|
||||
},
|
||||
],
|
||||
"role": "model",
|
||||
},
|
||||
]
|
||||
`;
|
||||
@@ -0,0 +1,124 @@
|
||||
// Vitest Snapshot v1, https://vitest.dev/guide/snapshot.html
|
||||
|
||||
exports[`Distill Result Integration > should surgically replace a noisy tool result with a distilled version > 1-initial-prompt 1`] = `
|
||||
[
|
||||
{
|
||||
"parts": [
|
||||
{
|
||||
"text": "<SESSION_CONTEXT>",
|
||||
},
|
||||
{
|
||||
"text": "Audit src/ for secrets",
|
||||
},
|
||||
],
|
||||
"role": "user",
|
||||
},
|
||||
]
|
||||
`;
|
||||
|
||||
exports[`Distill Result Integration > should surgically replace a noisy tool result with a distilled version > 2-request-with-noise 1`] = `
|
||||
[
|
||||
{
|
||||
"parts": [
|
||||
{
|
||||
"text": "<SESSION_CONTEXT>",
|
||||
},
|
||||
{
|
||||
"text": "Audit src/ for secrets",
|
||||
},
|
||||
],
|
||||
"role": "user",
|
||||
},
|
||||
{
|
||||
"parts": [
|
||||
{
|
||||
"text": "**Thought**
|
||||
I need to search for SECRET in the src/ directory.",
|
||||
},
|
||||
{
|
||||
"functionCall": {
|
||||
"args": {
|
||||
"file_path": "src/foo.txt",
|
||||
},
|
||||
"id": "<CALL_ID>",
|
||||
"name": "read_file",
|
||||
},
|
||||
"thoughtSignature": "skip_thought_signature_validator",
|
||||
},
|
||||
],
|
||||
"role": "model",
|
||||
},
|
||||
{
|
||||
"parts": [
|
||||
{
|
||||
"functionResponse": {
|
||||
"id": "<CALL_ID>",
|
||||
"name": "read_file",
|
||||
"response": {
|
||||
"error": "File not found: <TEST_DIR>/src/foo.txt",
|
||||
},
|
||||
},
|
||||
},
|
||||
],
|
||||
"role": "user",
|
||||
},
|
||||
]
|
||||
`;
|
||||
|
||||
exports[`Distill Result Integration > should surgically replace a noisy tool result with a distilled version > final-curated-history 1`] = `
|
||||
[
|
||||
{
|
||||
"parts": [
|
||||
{
|
||||
"text": "<SESSION_CONTEXT>",
|
||||
},
|
||||
{
|
||||
"text": "Audit src/ for secrets",
|
||||
},
|
||||
],
|
||||
"role": "user",
|
||||
},
|
||||
{
|
||||
"parts": [
|
||||
{
|
||||
"text": "**Thought**
|
||||
I need to search for SECRET in the src/ directory.",
|
||||
},
|
||||
{
|
||||
"functionCall": {
|
||||
"args": {
|
||||
"file_path": "src/foo.txt",
|
||||
},
|
||||
"id": "<CALL_ID>",
|
||||
"name": "read_file",
|
||||
},
|
||||
},
|
||||
],
|
||||
"role": "model",
|
||||
},
|
||||
{
|
||||
"parts": [
|
||||
{
|
||||
"functionResponse": {
|
||||
"id": "<CALL_ID>",
|
||||
"name": "read_file",
|
||||
"response": {
|
||||
"distilled": true,
|
||||
"distilled_output": "Found SECRET_KEY="12345" in src/env.ts",
|
||||
"original_output_file": "<TMP_FILE>",
|
||||
},
|
||||
},
|
||||
},
|
||||
],
|
||||
"role": "user",
|
||||
},
|
||||
{
|
||||
"parts": [
|
||||
{
|
||||
"text": "I found the SECRET_KEY="12345" in src/env.ts after distilling the search results.",
|
||||
},
|
||||
],
|
||||
"role": "model",
|
||||
},
|
||||
]
|
||||
`;
|
||||
@@ -4,7 +4,7 @@
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
import { describe, it, afterEach } from 'vitest';
|
||||
import { describe, it, expect, afterEach } from 'vitest';
|
||||
import { AppRig } from '../test-utils/AppRig.js';
|
||||
import path from 'node:path';
|
||||
import { fileURLToPath } from 'node:url';
|
||||
@@ -26,26 +26,41 @@ describe('Continuous Session Integration', () => {
|
||||
);
|
||||
rig = new AppRig({
|
||||
fakeResponsesPath,
|
||||
configOverrides: {
|
||||
continuousSession: true,
|
||||
},
|
||||
});
|
||||
await rig.initialize();
|
||||
rig.render();
|
||||
await rig.waitForIdle();
|
||||
|
||||
// Set policies to AUTO so it proceeds without asking user
|
||||
rig.setToolPolicy('checkpoint_state', PolicyDecision.ALLOW);
|
||||
rig.setToolPolicy('compress', PolicyDecision.ALLOW);
|
||||
// Use ASK_USER to pause and inspect the curated history at key moments
|
||||
rig.setToolPolicy('checkpoint_state', PolicyDecision.ASK_USER);
|
||||
rig.setToolPolicy('compress', PolicyDecision.ASK_USER);
|
||||
|
||||
// Start the quest
|
||||
await rig.type('Start the mission');
|
||||
await rig.type('Start the mission ' + 'PAD'.repeat(100));
|
||||
await rig.pressEnter();
|
||||
|
||||
// 1. Wait for CheckpointState tool call
|
||||
await rig.waitForOutput('CheckpointState');
|
||||
// Verify curated history BEFORE checkpoint is applied
|
||||
expect(rig.getLastSentRequestContents()).toMatchSnapshot('1-before-checkpoint');
|
||||
await rig.resolveTool('CheckpointState');
|
||||
|
||||
// 2. Wait for Compress tool call
|
||||
await rig.waitForOutput('Compress');
|
||||
// Verify curated history contains the checkpoint
|
||||
expect(rig.getLastSentRequestContents()).toMatchSnapshot('2-with-checkpoint');
|
||||
await rig.resolveTool('Compress');
|
||||
|
||||
// 3. Wait for final model response after compression
|
||||
await rig.waitForOutput('Compression successful.');
|
||||
await rig.waitForIdle();
|
||||
|
||||
// Verify the final curated history:
|
||||
// - Should contain the high-fidelity snapshot
|
||||
// - Should NOT contain pre-compression turns
|
||||
expect(rig.getCuratedHistory()).toMatchSnapshot('final-curated-history');
|
||||
});
|
||||
});
|
||||
|
||||
86
packages/cli/src/integration-tests/distill_result.test.tsx
Normal file
86
packages/cli/src/integration-tests/distill_result.test.tsx
Normal file
@@ -0,0 +1,86 @@
|
||||
/**
|
||||
* @license
|
||||
* Copyright 2026 Google LLC
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
import { describe, it, expect, afterEach } from 'vitest';
|
||||
import path from 'node:path';
|
||||
import { fileURLToPath } from 'node:url';
|
||||
import { AppRig } from '../test-utils/AppRig.js';
|
||||
import { PolicyDecision } from '@google/gemini-cli-core';
|
||||
|
||||
const __dirname = path.dirname(fileURLToPath(import.meta.url));
|
||||
|
||||
describe('Distill Result Integration', () => {
|
||||
let rig: AppRig | undefined;
|
||||
|
||||
afterEach(async () => {
|
||||
await rig?.unmount();
|
||||
});
|
||||
|
||||
it('should surgically replace a noisy tool result with a distilled version', async () => {
|
||||
const fakeResponsesPath = path.resolve(
|
||||
__dirname,
|
||||
'../test-utils/fixtures/distill_result.responses',
|
||||
);
|
||||
|
||||
rig = new AppRig({
|
||||
fakeResponsesPath,
|
||||
configOverrides: {
|
||||
continuousSession: true,
|
||||
modelSteering: true,
|
||||
},
|
||||
});
|
||||
|
||||
await rig.initialize();
|
||||
rig.render();
|
||||
await rig.waitForIdle();
|
||||
|
||||
rig.setMockCommands([
|
||||
{
|
||||
command: /read_file/,
|
||||
result: {
|
||||
output: 'NOISE\n'.repeat(50) + 'SECRET_KEY="12345"\n' + 'NOISE\n'.repeat(50),
|
||||
exitCode: 0,
|
||||
},
|
||||
},
|
||||
]);
|
||||
|
||||
// Use ASK_USER to pause and inspect the request before each model turn
|
||||
rig.setToolPolicy('read_file', PolicyDecision.ASK_USER);
|
||||
rig.setToolPolicy('distill_result', PolicyDecision.ASK_USER);
|
||||
|
||||
// 1. Initial Prompt: Audit for secrets
|
||||
await rig.sendMessage('Audit src/ for secrets');
|
||||
|
||||
// 2. Model calls run_shell_command (the "Noise Bomb")
|
||||
await rig.waitForOutput('ReadFile');
|
||||
// Verify the curated history sent to model contains the initial user prompt
|
||||
expect(rig.getLastSentRequestContents()).toMatchSnapshot('1-initial-prompt');
|
||||
|
||||
await rig.resolveTool('ReadFile');
|
||||
|
||||
// 3. Model realizes it's noisy and calls distill_result
|
||||
await rig.waitForOutput('DistillResult');
|
||||
// Verify history now includes the massive noise
|
||||
expect(rig.getLastSentRequestContents()).toMatchSnapshot('2-request-with-noise');
|
||||
|
||||
await rig.resolveTool('DistillResult');
|
||||
|
||||
// 4. Model continues from the distilled state and finishes
|
||||
await rig.waitForOutput(/found the SECRET_KEY/i);
|
||||
await rig.waitForIdle();
|
||||
|
||||
// Verify the final curated history:
|
||||
// - NO noise from the original read_file
|
||||
// - original read_file response is replaced with our universal distillation schema
|
||||
// - intermediate thoughts and the distill_result turn itself are elided
|
||||
expect(rig.getCuratedHistory()).toMatchSnapshot('final-curated-history');
|
||||
|
||||
// Verify final output contains the signal
|
||||
const output = rig.getStaticOutput();
|
||||
expect(output).toContain('SECRET_KEY');
|
||||
expect(output).toContain('12345');
|
||||
});
|
||||
});
|
||||
@@ -150,6 +150,7 @@ export class AppRig {
|
||||
private settings: LoadedSettings | undefined;
|
||||
private testDir: string;
|
||||
private sessionId: string;
|
||||
private appRigId: string;
|
||||
|
||||
private pendingConfirmations = new Map<string, PendingConfirmation>();
|
||||
private breakpointTools = new Set<string | undefined>();
|
||||
@@ -165,6 +166,7 @@ export class AppRig {
|
||||
this.testDir = fs.mkdtempSync(
|
||||
path.join(os.tmpdir(), `gemini-app-rig-${uniqueId.slice(0, 8)}-`),
|
||||
);
|
||||
this.appRigId = path.basename(this.testDir).toLowerCase();
|
||||
this.sessionId = `test-session-${uniqueId}`;
|
||||
activeRigs.set(this.sessionId, this);
|
||||
}
|
||||
@@ -702,6 +704,67 @@ export class AppRig {
|
||||
await this.pressEnter();
|
||||
}
|
||||
|
||||
getSentRequests() {
|
||||
if (!this.config) throw new Error('AppRig not initialized');
|
||||
return this.config.getContentGenerator().getSentRequests?.() || [];
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper to get the curated history (contents) sent in the most recent model request.
|
||||
* This method scrubs unstable data like temp paths and IDs for deterministic goldens.
|
||||
*/
|
||||
getLastSentRequestContents() {
|
||||
const requests = this.getSentRequests();
|
||||
if (requests.length === 0) return [];
|
||||
const contents = requests[requests.length - 1].contents || [];
|
||||
return this.scrubUnstableData(contents);
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the final curated history of the active chat session.
|
||||
*/
|
||||
getCuratedHistory() {
|
||||
if (!this.config) throw new Error('AppRig not initialized');
|
||||
const history = this.config.getGeminiClient().getChat().getHistory(true);
|
||||
return this.scrubUnstableData(history);
|
||||
}
|
||||
|
||||
private scrubUnstableData(contents: any) {
|
||||
// Deeply scrub unstable data
|
||||
const scrubbed = JSON.parse(
|
||||
JSON.stringify(contents)
|
||||
.replace(new RegExp(this.testDir, 'g'), '<TEST_DIR>')
|
||||
.replace(new RegExp(this.appRigId, 'g'), '<APP_RIG_ID>')
|
||||
.replace(new RegExp(this.sessionId, 'g'), '<SESSION_ID>'),
|
||||
);
|
||||
|
||||
if (scrubbed.length > 0) {
|
||||
if (scrubbed[0].parts[0].text?.includes('<session_context>')) {
|
||||
scrubbed[0].parts[0].text = '<SESSION_CONTEXT>';
|
||||
}
|
||||
}
|
||||
|
||||
const removeIds = (obj: any) => {
|
||||
if (Array.isArray(obj)) {
|
||||
obj.forEach(removeIds);
|
||||
} else if (obj && typeof obj === 'object') {
|
||||
if (obj.functionCall) {
|
||||
obj.functionCall.id = '<CALL_ID>';
|
||||
}
|
||||
if (obj.functionResponse) {
|
||||
obj.functionResponse.id = '<CALL_ID>';
|
||||
if (obj.functionResponse?.response?.original_output_file) {
|
||||
obj.functionResponse.response.original_output_file = '<TMP_FILE>';
|
||||
}
|
||||
}
|
||||
Object.values(obj).forEach(removeIds);
|
||||
}
|
||||
};
|
||||
removeIds(scrubbed);
|
||||
|
||||
return scrubbed;
|
||||
}
|
||||
|
||||
async unmount() {
|
||||
// Clean up global state for this session
|
||||
sessionStateMap.delete(this.sessionId);
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
{"method":"generateContentStream","response":[{"candidates":[{"content":{"role":"model","parts":[{"text":"I will now checkpoint our progress."},{"functionCall":{"name":"checkpoint_state","args":{"summary":"GOAL: Implementation of session continuity.\nPROGRESS: Tools implemented.\nCONSTRAINT: Use high-fidelity summary."}}}]},"finishReason":"STOP"}]}]}
|
||||
{"method":"generateContentStream","response":[{"candidates":[{"content":{"role":"model","parts":[{"text":"Checkpoint created. Now I will trigger compression to clear the context."},{"functionCall":{"name":"compress","args":{"force":true}}}]},"finishReason":"STOP"}]}]}
|
||||
{"method":"generateContentStream","response":[{"candidates":[{"content":{"role":"model","parts":[{"text":"Checkpoint created. Now I will trigger compression to clear the context."},{"functionCall":{"name":"compress","args":{}}}]},"finishReason":"STOP"}]}]}
|
||||
{"method":"generateContent","response":{"candidates":[{"content":{"role":"model","parts":[{"text":"<state_snapshot>\n<overall_goal>Implement session continuity</overall_goal>\n<active_constraints>Use high-fidelity summary</active_constraints>\n<key_knowledge>Tools implemented: checkpoint_state, compress</key_knowledge>\n<task_state>1. [DONE] Implement tools\n2. [IN PROGRESS] Verify continuity\n</task_state>\n</state_snapshot>"}]}}],"finishReason":"STOP"}}
|
||||
{"method":"generateContent","response":{"candidates":[{"content":{"role":"model","parts":[{"text":"The <state_snapshot> is accurate and preserves all critical details."}]},"finishReason":"STOP"}]}}
|
||||
{"method":"generateContentStream","response":[{"candidates":[{"content":{"role":"model","parts":[{"text":"Compression successful. I have clear context and I remember our mission."}]},"finishReason":"STOP"}]}]}
|
||||
|
||||
@@ -0,0 +1,3 @@
|
||||
{"method":"generateContentStream","response":[{"candidates":[{"content":{"role":"model","parts":[{"text":"**Thought**\nI need to search for SECRET in the src/ directory."},{"functionCall":{"name":"read_file","args":{"file_path":"src/foo.txt"}}}]},"finishReason":"STOP"}]}]}
|
||||
{"method":"generateContentStream","response":[{"candidates":[{"content":{"role":"model","parts":[{"text":"**Thought**\nThe output is very noisy. I will distill it to keep the context clean."},{"functionCall":{"name":"distill_result","args":{"revised_text":"Found SECRET_KEY=\"12345\" in src/env.ts"}}}]},"finishReason":"STOP"}]}]}
|
||||
{"method":"generateContentStream","response":[{"candidates":[{"content":{"role":"model","parts":[{"text":"I found the SECRET_KEY=\"12345\" in src/env.ts after distilling the search results."}]},"finishReason":"STOP"}]}]}
|
||||
Reference in New Issue
Block a user