mirror of
https://github.com/google-gemini/gemini-cli.git
synced 2026-03-13 15:40:57 -07:00
feat(core): Support auto-distillation for tool output.
This commit is contained in:
@@ -0,0 +1,72 @@
|
||||
// Vitest Snapshot v1, https://vitest.dev/guide/snapshot.html
|
||||
|
||||
exports[`Auto-distillation Integration > should truncate and summarize massive tool outputs, and we should golden the chat history 1`] = `
|
||||
[
|
||||
{
|
||||
"parts": [
|
||||
{
|
||||
"text": "<SESSION_CONTEXT>",
|
||||
},
|
||||
],
|
||||
"role": "user",
|
||||
},
|
||||
{
|
||||
"parts": [
|
||||
{
|
||||
"text": "Fetch the massive file.",
|
||||
},
|
||||
],
|
||||
"role": "user",
|
||||
},
|
||||
{
|
||||
"parts": [
|
||||
{
|
||||
"text": "I will now fetch the data.",
|
||||
},
|
||||
{
|
||||
"functionCall": {
|
||||
"args": {
|
||||
"command": "cat large.txt",
|
||||
},
|
||||
"id": "<CALL_ID>",
|
||||
"name": "run_shell_command",
|
||||
},
|
||||
},
|
||||
],
|
||||
"role": "model",
|
||||
},
|
||||
{
|
||||
"parts": [
|
||||
{
|
||||
"functionResponse": {
|
||||
"id": "<CALL_ID>",
|
||||
"name": "run_shell_command",
|
||||
"response": {
|
||||
"output": "Output too large. Showing first 10 and last 40 characters. For full output see: <TEST_DIR>/.gemini/tmp/<APP_RIG_ID>/tool-outputs/session-<SESSION_ID>/run_shell_command_<TIMESTAMP>_<INDEX>.txt
|
||||
Output: ca
|
||||
|
||||
... [40 characters omitted] ...
|
||||
|
||||
Exit Code: 1
|
||||
Process Group PGID: <PGID>
|
||||
|
||||
--- Structural Map of Truncated Content ---
|
||||
- Line 1: Header
|
||||
- Lines 2-5000: User data
|
||||
- Line 5001: Footer",
|
||||
},
|
||||
},
|
||||
},
|
||||
],
|
||||
"role": "user",
|
||||
},
|
||||
{
|
||||
"parts": [
|
||||
{
|
||||
"text": "I got the summarized output. Task complete.",
|
||||
},
|
||||
],
|
||||
"role": "model",
|
||||
},
|
||||
]
|
||||
`;
|
||||
64
packages/cli/src/integration-tests/autoDistillation.test.tsx
Normal file
64
packages/cli/src/integration-tests/autoDistillation.test.tsx
Normal file
@@ -0,0 +1,64 @@
|
||||
/**
|
||||
* @license
|
||||
* Copyright 2026 Google LLC
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
import { describe, it, expect, afterEach, vi } from 'vitest';
|
||||
import { AppRig } from '../test-utils/AppRig.js';
|
||||
import path from 'node:path';
|
||||
import { fileURLToPath } from 'node:url';
|
||||
import { PolicyDecision } from '@google/gemini-cli-core';
|
||||
|
||||
const __dirname = path.dirname(fileURLToPath(import.meta.url));
|
||||
|
||||
describe('Auto-distillation Integration', () => {
|
||||
let rig: AppRig | undefined;
|
||||
|
||||
afterEach(async () => {
|
||||
if (rig) {
|
||||
await rig.unmount();
|
||||
}
|
||||
vi.restoreAllMocks();
|
||||
});
|
||||
|
||||
it('should truncate and summarize massive tool outputs, and we should golden the chat history', async () => {
|
||||
const fakeResponsesPath = path.join(
|
||||
__dirname,
|
||||
'../test-utils/fixtures/auto-distillation.responses',
|
||||
);
|
||||
rig = new AppRig({
|
||||
fakeResponsesPath,
|
||||
});
|
||||
|
||||
await rig.initialize();
|
||||
|
||||
const config = rig.getConfig();
|
||||
// 50 chars threshold. > 75 chars triggers summarization
|
||||
vi.spyOn(config, 'getTruncateToolOutputThreshold').mockReturnValue(50);
|
||||
|
||||
rig.setToolPolicy('run_shell_command', PolicyDecision.ASK_USER);
|
||||
|
||||
rig.setMockCommands([
|
||||
{
|
||||
command: /cat large.txt/,
|
||||
result: {
|
||||
output: 'A'.repeat(100),
|
||||
exitCode: 0,
|
||||
},
|
||||
},
|
||||
]);
|
||||
|
||||
rig.render();
|
||||
await rig.waitForIdle();
|
||||
|
||||
await rig.sendMessage('Fetch the massive file.');
|
||||
|
||||
await rig.waitForOutput('Shell');
|
||||
await rig.resolveTool('Shell');
|
||||
|
||||
await rig.waitForOutput('Task complete.');
|
||||
|
||||
expect(rig.getCuratedHistory()).toMatchSnapshot();
|
||||
});
|
||||
});
|
||||
@@ -30,6 +30,7 @@ import {
|
||||
IdeClient,
|
||||
debugLogger,
|
||||
CoreToolCallStatus,
|
||||
ConsecaSafetyChecker,
|
||||
} from '@google/gemini-cli-core';
|
||||
import {
|
||||
type MockShellCommand,
|
||||
@@ -47,6 +48,7 @@ import type {
|
||||
TrackedCompletedToolCall,
|
||||
TrackedToolCall,
|
||||
} from '../ui/hooks/useToolScheduler.js';
|
||||
import type { Content, GenerateContentParameters } from '@google/genai';
|
||||
|
||||
// Global state observer for React-based signals
|
||||
const sessionStateMap = new Map<string, StreamingState>();
|
||||
@@ -153,6 +155,7 @@ export class AppRig {
|
||||
private settings: LoadedSettings | undefined;
|
||||
private testDir: string;
|
||||
private sessionId: string;
|
||||
private appRigId: string;
|
||||
|
||||
private pendingConfirmations = new Map<string, PendingConfirmation>();
|
||||
private breakpointTools = new Set<string | undefined>();
|
||||
@@ -168,6 +171,7 @@ export class AppRig {
|
||||
this.testDir = fs.mkdtempSync(
|
||||
path.join(os.tmpdir(), `gemini-app-rig-${uniqueId.slice(0, 8)}-`),
|
||||
);
|
||||
this.appRigId = path.basename(this.testDir).toLowerCase();
|
||||
this.sessionId = `test-session-${uniqueId}`;
|
||||
activeRigs.set(this.sessionId, this);
|
||||
}
|
||||
@@ -738,6 +742,10 @@ export class AppRig {
|
||||
// Forcefully clear IdeClient singleton promise
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any, @typescript-eslint/no-unsafe-type-assertion
|
||||
(IdeClient as any).instancePromise = null;
|
||||
|
||||
// Reset Conseca singleton to avoid leaking config/state across tests
|
||||
ConsecaSafetyChecker.resetInstance();
|
||||
|
||||
vi.clearAllMocks();
|
||||
|
||||
this.config = undefined;
|
||||
@@ -754,4 +762,82 @@ export class AppRig {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
getSentRequests() {
|
||||
if (!this.config) throw new Error('AppRig not initialized');
|
||||
return this.config.getContentGenerator().getSentRequests?.() || [];
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper to get the curated history (contents) sent in the most recent model request.
|
||||
* This method scrubs unstable data like temp paths and IDs for deterministic goldens.
|
||||
*/
|
||||
getLastSentRequestContents() {
|
||||
const requests = this.getSentRequests();
|
||||
if (requests.length === 0) return [];
|
||||
const contents = requests[requests.length - 1].contents || [];
|
||||
return this.scrubUnstableData(contents);
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the final curated history of the active chat session.
|
||||
*/
|
||||
getCuratedHistory() {
|
||||
if (!this.config) throw new Error('AppRig not initialized');
|
||||
const history = this.config.getGeminiClient().getChat().getHistory(true);
|
||||
return this.scrubUnstableData(history);
|
||||
}
|
||||
|
||||
private scrubUnstableData<
|
||||
T extends
|
||||
| Content[]
|
||||
| GenerateContentParameters['contents']
|
||||
| readonly Content[],
|
||||
>(contents: T): T {
|
||||
// Deeply scrub unstable data
|
||||
const scrubbedString = JSON.stringify(contents)
|
||||
.replace(new RegExp(this.testDir, 'g'), '<TEST_DIR>')
|
||||
.replace(new RegExp(this.appRigId, 'g'), '<APP_RIG_ID>')
|
||||
.replace(new RegExp(this.sessionId, 'g'), '<SESSION_ID>')
|
||||
.replace(
|
||||
/([a-zA-Z0-9_]+)_([0-9]{13})_([0-9]+)\.txt/g,
|
||||
'$1_<TIMESTAMP>_<INDEX>.txt',
|
||||
)
|
||||
.replace(/Process Group PGID: \d+/g, 'Process Group PGID: <PGID>');
|
||||
|
||||
// eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion
|
||||
const scrubbed = JSON.parse(scrubbedString) as T;
|
||||
|
||||
if (Array.isArray(scrubbed) && scrubbed.length > 0) {
|
||||
// eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion
|
||||
const firstItem = scrubbed[0] as Content;
|
||||
if (firstItem.parts?.[0]?.text?.includes('<session_context>')) {
|
||||
firstItem.parts[0].text = '<SESSION_CONTEXT>';
|
||||
}
|
||||
|
||||
// eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion
|
||||
for (const content of scrubbed as Content[]) {
|
||||
if (content.parts) {
|
||||
for (const part of content.parts) {
|
||||
if (part.functionCall) {
|
||||
part.functionCall.id = '<CALL_ID>';
|
||||
}
|
||||
if (part.functionResponse) {
|
||||
part.functionResponse.id = '<CALL_ID>';
|
||||
if (
|
||||
part.functionResponse.response !== null &&
|
||||
typeof part.functionResponse.response === 'object' &&
|
||||
'original_output_file' in part.functionResponse.response
|
||||
) {
|
||||
part.functionResponse.response['original_output_file'] =
|
||||
'<TMP_FILE>';
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return scrubbed;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,3 @@
|
||||
{"method":"generateContentStream","response":[{"candidates":[{"content":{"role":"model","parts":[{"text":"I will now fetch the data."},{"functionCall":{"name":"run_shell_command","args":{"command":"cat large.txt"}}}]},"finishReason":"STOP"}]}]}
|
||||
{"method":"generateContent","response":{"candidates":[{"content":{"role":"model","parts":[{"text":"- Line 1: Header\n- Lines 2-5000: User data\n- Line 5001: Footer"}]},"finishReason":"STOP"}]}}
|
||||
{"method":"generateContentStream","response":[{"candidates":[{"content":{"role":"model","parts":[{"text":"I got the summarized output. Task complete."}]},"finishReason":"STOP"}]}]}
|
||||
Reference in New Issue
Block a user