feat(core): Support auto-distillation for tool output.

This commit is contained in:
Your Name
2026-03-11 01:18:41 +00:00
parent e22d9917b7
commit ddb7b65897
13 changed files with 496 additions and 128 deletions

View File

@@ -0,0 +1,72 @@
// Vitest Snapshot v1, https://vitest.dev/guide/snapshot.html
exports[`Auto-distillation Integration > should truncate and summarize massive tool outputs, and we should golden the chat history 1`] = `
[
{
"parts": [
{
"text": "<SESSION_CONTEXT>",
},
],
"role": "user",
},
{
"parts": [
{
"text": "Fetch the massive file.",
},
],
"role": "user",
},
{
"parts": [
{
"text": "I will now fetch the data.",
},
{
"functionCall": {
"args": {
"command": "cat large.txt",
},
"id": "<CALL_ID>",
"name": "run_shell_command",
},
},
],
"role": "model",
},
{
"parts": [
{
"functionResponse": {
"id": "<CALL_ID>",
"name": "run_shell_command",
"response": {
"output": "Output too large. Showing first 10 and last 40 characters. For full output see: <TEST_DIR>/.gemini/tmp/<APP_RIG_ID>/tool-outputs/session-<SESSION_ID>/run_shell_command_<TIMESTAMP>_<INDEX>.txt
Output: ca
... [40 characters omitted] ...
Exit Code: 1
Process Group PGID: <PGID>
--- Structural Map of Truncated Content ---
- Line 1: Header
- Lines 2-5000: User data
- Line 5001: Footer",
},
},
},
],
"role": "user",
},
{
"parts": [
{
"text": "I got the summarized output. Task complete.",
},
],
"role": "model",
},
]
`;

View File

@@ -0,0 +1,64 @@
/**
* @license
* Copyright 2026 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import { describe, it, expect, afterEach, vi } from 'vitest';
import { AppRig } from '../test-utils/AppRig.js';
import path from 'node:path';
import { fileURLToPath } from 'node:url';
import { PolicyDecision } from '@google/gemini-cli-core';
const __dirname = path.dirname(fileURLToPath(import.meta.url));
describe('Auto-distillation Integration', () => {
let rig: AppRig | undefined;
afterEach(async () => {
if (rig) {
await rig.unmount();
}
vi.restoreAllMocks();
});
it('should truncate and summarize massive tool outputs, and we should golden the chat history', async () => {
const fakeResponsesPath = path.join(
__dirname,
'../test-utils/fixtures/auto-distillation.responses',
);
rig = new AppRig({
fakeResponsesPath,
});
await rig.initialize();
const config = rig.getConfig();
// 50 chars threshold. > 75 chars triggers summarization
vi.spyOn(config, 'getTruncateToolOutputThreshold').mockReturnValue(50);
rig.setToolPolicy('run_shell_command', PolicyDecision.ASK_USER);
rig.setMockCommands([
{
command: /cat large.txt/,
result: {
output: 'A'.repeat(100),
exitCode: 0,
},
},
]);
rig.render();
await rig.waitForIdle();
await rig.sendMessage('Fetch the massive file.');
await rig.waitForOutput('Shell');
await rig.resolveTool('Shell');
await rig.waitForOutput('Task complete.');
expect(rig.getCuratedHistory()).toMatchSnapshot();
});
});

View File

@@ -30,6 +30,7 @@ import {
IdeClient,
debugLogger,
CoreToolCallStatus,
ConsecaSafetyChecker,
} from '@google/gemini-cli-core';
import {
type MockShellCommand,
@@ -47,6 +48,7 @@ import type {
TrackedCompletedToolCall,
TrackedToolCall,
} from '../ui/hooks/useToolScheduler.js';
import type { Content, GenerateContentParameters } from '@google/genai';
// Global state observer for React-based signals
const sessionStateMap = new Map<string, StreamingState>();
@@ -153,6 +155,7 @@ export class AppRig {
private settings: LoadedSettings | undefined;
private testDir: string;
private sessionId: string;
private appRigId: string;
private pendingConfirmations = new Map<string, PendingConfirmation>();
private breakpointTools = new Set<string | undefined>();
@@ -168,6 +171,7 @@ export class AppRig {
this.testDir = fs.mkdtempSync(
path.join(os.tmpdir(), `gemini-app-rig-${uniqueId.slice(0, 8)}-`),
);
this.appRigId = path.basename(this.testDir).toLowerCase();
this.sessionId = `test-session-${uniqueId}`;
activeRigs.set(this.sessionId, this);
}
@@ -738,6 +742,10 @@ export class AppRig {
// Forcefully clear IdeClient singleton promise
// eslint-disable-next-line @typescript-eslint/no-explicit-any, @typescript-eslint/no-unsafe-type-assertion
(IdeClient as any).instancePromise = null;
// Reset Conseca singleton to avoid leaking config/state across tests
ConsecaSafetyChecker.resetInstance();
vi.clearAllMocks();
this.config = undefined;
@@ -754,4 +762,82 @@ export class AppRig {
}
}
}
getSentRequests() {
if (!this.config) throw new Error('AppRig not initialized');
return this.config.getContentGenerator().getSentRequests?.() || [];
}
/**
* Helper to get the curated history (contents) sent in the most recent model request.
* This method scrubs unstable data like temp paths and IDs for deterministic goldens.
*/
getLastSentRequestContents() {
const requests = this.getSentRequests();
if (requests.length === 0) return [];
const contents = requests[requests.length - 1].contents || [];
return this.scrubUnstableData(contents);
}
/**
* Gets the final curated history of the active chat session.
*/
getCuratedHistory() {
if (!this.config) throw new Error('AppRig not initialized');
const history = this.config.getGeminiClient().getChat().getHistory(true);
return this.scrubUnstableData(history);
}
private scrubUnstableData<
T extends
| Content[]
| GenerateContentParameters['contents']
| readonly Content[],
>(contents: T): T {
// Deeply scrub unstable data
const scrubbedString = JSON.stringify(contents)
.replace(new RegExp(this.testDir, 'g'), '<TEST_DIR>')
.replace(new RegExp(this.appRigId, 'g'), '<APP_RIG_ID>')
.replace(new RegExp(this.sessionId, 'g'), '<SESSION_ID>')
.replace(
/([a-zA-Z0-9_]+)_([0-9]{13})_([0-9]+)\.txt/g,
'$1_<TIMESTAMP>_<INDEX>.txt',
)
.replace(/Process Group PGID: \d+/g, 'Process Group PGID: <PGID>');
// eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion
const scrubbed = JSON.parse(scrubbedString) as T;
if (Array.isArray(scrubbed) && scrubbed.length > 0) {
// eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion
const firstItem = scrubbed[0] as Content;
if (firstItem.parts?.[0]?.text?.includes('<session_context>')) {
firstItem.parts[0].text = '<SESSION_CONTEXT>';
}
// eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion
for (const content of scrubbed as Content[]) {
if (content.parts) {
for (const part of content.parts) {
if (part.functionCall) {
part.functionCall.id = '<CALL_ID>';
}
if (part.functionResponse) {
part.functionResponse.id = '<CALL_ID>';
if (
part.functionResponse.response !== null &&
typeof part.functionResponse.response === 'object' &&
'original_output_file' in part.functionResponse.response
) {
part.functionResponse.response['original_output_file'] =
'<TMP_FILE>';
}
}
}
}
}
}
return scrubbed;
}
}

View File

@@ -0,0 +1,3 @@
{"method":"generateContentStream","response":[{"candidates":[{"content":{"role":"model","parts":[{"text":"I will now fetch the data."},{"functionCall":{"name":"run_shell_command","args":{"command":"cat large.txt"}}}]},"finishReason":"STOP"}]}]}
{"method":"generateContent","response":{"candidates":[{"content":{"role":"model","parts":[{"text":"- Line 1: Header\n- Lines 2-5000: User data\n- Line 5001: Footer"}]},"finishReason":"STOP"}]}}
{"method":"generateContentStream","response":[{"candidates":[{"content":{"role":"model","parts":[{"text":"I got the summarized output. Task complete."}]},"finishReason":"STOP"}]}]}