feat(core): Support auto-distillation for tool output.

This commit is contained in:
Your Name
2026-03-11 01:18:41 +00:00
parent e22d9917b7
commit ddb7b65897
13 changed files with 496 additions and 128 deletions

View File

@@ -46,6 +46,8 @@ export interface ContentGenerator {
embedContent(request: EmbedContentParameters): Promise<EmbedContentResponse>;
getSentRequests?(): GenerateContentParameters[];
userTier?: UserTierId;
userTierName?: string;

View File

@@ -42,12 +42,17 @@ export type FakeResponse =
// CLI argument.
export class FakeContentGenerator implements ContentGenerator {
private callCounter = 0;
private sentRequests: GenerateContentParameters[] = [];
userTier?: UserTierId;
userTierName?: string;
paidTier?: GeminiUserTier;
constructor(private readonly responses: FakeResponse[]) {}
getSentRequests(): GenerateContentParameters[] {
return this.sentRequests;
}
static async fromFile(filePath: string): Promise<FakeContentGenerator> {
const fileContent = await promises.readFile(filePath, 'utf-8');
const responses = fileContent
@@ -84,6 +89,7 @@ export class FakeContentGenerator implements ContentGenerator {
// eslint-disable-next-line @typescript-eslint/no-unused-vars
role: LlmRole,
): Promise<GenerateContentResponse> {
this.sentRequests.push(request);
// eslint-disable-next-line @typescript-eslint/no-unsafe-return
return Object.setPrototypeOf(
this.getNextResponse('generateContent', request),
@@ -97,6 +103,7 @@ export class FakeContentGenerator implements ContentGenerator {
// eslint-disable-next-line @typescript-eslint/no-unused-vars
role: LlmRole,
): Promise<AsyncGenerator<GenerateContentResponse>> {
this.sentRequests.push(request);
const responses = this.getNextResponse('generateContentStream', request);
async function* stream() {
for (const response of responses) {

View File

@@ -168,6 +168,10 @@ export class LoggingContentGenerator implements ContentGenerator {
return this.wrapped.paidTier;
}
getSentRequests?(): GenerateContentParameters[] {
return this.wrapped.getSentRequests?.() || [];
}
private logApiRequest(
contents: Content[],
model: string,

View File

@@ -39,6 +39,10 @@ export class RecordingContentGenerator implements ContentGenerator {
return this.realGenerator.userTierName;
}
getSentRequests?(): GenerateContentParameters[] {
return this.realGenerator.getSentRequests?.() || [];
}
async generateContent(
request: GenerateContentParameters,
userPromptId: string,

View File

@@ -22,6 +22,7 @@ export * from './policy/integrity.js';
export * from './billing/index.js';
export * from './confirmation-bus/types.js';
export * from './confirmation-bus/message-bus.js';
export * from './safety/conseca/conseca.js';
// Export Commands logic
export * from './commands/extensions.js';

View File

@@ -335,8 +335,10 @@ describe('ToolExecutor', () => {
it('should truncate large shell output', async () => {
// 1. Setup Config for Truncation
vi.spyOn(config, 'getTruncateToolOutputThreshold').mockReturnValue(10);
vi.spyOn(config.storage, 'getProjectTempDir').mockReturnValue('/tmp');
vi.spyOn(config.storage, 'getProjectTempDir').mockReturnValue('/tmp');
const mockTool = new MockTool({ name: SHELL_TOOL_NAME });
const invocation = mockTool.build({});
@@ -396,8 +398,10 @@ describe('ToolExecutor', () => {
it('should truncate large MCP tool output with single text Part', async () => {
// 1. Setup Config for Truncation
vi.spyOn(config, 'getTruncateToolOutputThreshold').mockReturnValue(10);
vi.spyOn(config.storage, 'getProjectTempDir').mockReturnValue('/tmp');
vi.spyOn(config.storage, 'getProjectTempDir').mockReturnValue('/tmp');
const mcpToolName = 'get_big_text';
const messageBus = createMockMessageBus();
@@ -440,8 +444,9 @@ describe('ToolExecutor', () => {
});
// 4. Verify Truncation Logic
const stringifiedLongText = JSON.stringify([{ text: longText }], null, 2);
expect(fileUtils.saveTruncatedToolOutput).toHaveBeenCalledWith(
longText,
stringifiedLongText,
mcpToolName,
'call-mcp-trunc',
expect.any(String),
@@ -449,7 +454,7 @@ describe('ToolExecutor', () => {
);
expect(fileUtils.formatTruncatedToolOutput).toHaveBeenCalledWith(
longText,
stringifiedLongText,
'/tmp/truncated_output.txt',
10,
);
@@ -460,8 +465,9 @@ describe('ToolExecutor', () => {
}
});
it('should not truncate MCP tool output with multiple Parts', async () => {
it('should truncate MCP tool output with multiple Parts', async () => {
vi.spyOn(config, 'getTruncateToolOutputThreshold').mockReturnValue(10);
vi.spyOn(config.storage, 'getProjectTempDir').mockReturnValue('/tmp');
const messageBus = createMockMessageBus();
const mcpTool = new DiscoveredMCPTool(
@@ -501,9 +507,26 @@ describe('ToolExecutor', () => {
onUpdateToolCall: vi.fn(),
});
// Should NOT have been truncated
expect(fileUtils.saveTruncatedToolOutput).not.toHaveBeenCalled();
expect(fileUtils.formatTruncatedToolOutput).not.toHaveBeenCalled();
const longText1 = 'This is long text that exceeds the threshold.';
const stringifiedLongText = JSON.stringify(
[{ text: longText1 }, { text: 'second part' }],
null,
2,
);
// Should HAVE been truncated now
expect(fileUtils.saveTruncatedToolOutput).toHaveBeenCalledWith(
stringifiedLongText,
'get_big_text',
'call-mcp-multi',
expect.any(String),
'test-session-id',
);
expect(fileUtils.formatTruncatedToolOutput).toHaveBeenCalledWith(
stringifiedLongText,
'/tmp/truncated_output.txt',
10,
);
expect(result.status).toBe(CoreToolCallStatus.Success);
});
@@ -668,8 +691,10 @@ describe('ToolExecutor', () => {
it('should truncate large shell output even on cancellation', async () => {
// 1. Setup Config for Truncation
vi.spyOn(config, 'getTruncateToolOutputThreshold').mockReturnValue(10);
vi.spyOn(config.storage, 'getProjectTempDir').mockReturnValue('/tmp');
vi.spyOn(config.storage, 'getProjectTempDir').mockReturnValue('/tmp');
const mockTool = new MockTool({ name: SHELL_TOOL_NAME });
const invocation = mockTool.build({});

View File

@@ -6,8 +6,6 @@
import {
ToolErrorType,
ToolOutputTruncatedEvent,
logToolOutputTruncated,
runInDevTraceSpan,
type ToolCallRequestInfo,
type ToolCallResponseInfo,
@@ -17,14 +15,11 @@ import {
type ToolLiveOutput,
} from '../index.js';
import { isAbortError } from '../utils/errors.js';
import { SHELL_TOOL_NAME } from '../tools/tool-names.js';
import { ToolOutputDistillationService } from '../services/toolDistillationService.js';
import { ShellToolInvocation } from '../tools/shell.js';
import { DiscoveredMCPTool } from '../tools/mcp-tool.js';
import { executeToolWithHooks } from '../core/coreToolHookTriggers.js';
import {
saveTruncatedToolOutput,
formatTruncatedToolOutput,
} from '../utils/fileUtils.js';
import { convertToFunctionResponse } from '../utils/generateContentResponseUtilities.js';
import {
CoreToolCallStatus,
@@ -195,90 +190,12 @@ export class ToolExecutor {
call: ToolCall,
content: PartListUnion,
): Promise<{ truncatedContent: PartListUnion; outputFile?: string }> {
const toolName = call.request.name;
const callId = call.request.callId;
let outputFile: string | undefined;
if (typeof content === 'string' && toolName === SHELL_TOOL_NAME) {
const threshold = this.config.getTruncateToolOutputThreshold();
if (threshold > 0 && content.length > threshold) {
const originalContentLength = content.length;
const { outputFile: savedPath } = await saveTruncatedToolOutput(
content,
toolName,
callId,
this.config.storage.getProjectTempDir(),
this.context.promptId,
);
outputFile = savedPath;
const truncatedContent = formatTruncatedToolOutput(
content,
outputFile,
threshold,
);
logToolOutputTruncated(
this.config,
new ToolOutputTruncatedEvent(call.request.prompt_id, {
toolName,
originalContentLength,
truncatedContentLength: truncatedContent.length,
threshold,
}),
);
return { truncatedContent, outputFile };
}
} else if (
Array.isArray(content) &&
content.length === 1 &&
'tool' in call &&
call.tool instanceof DiscoveredMCPTool
) {
const firstPart = content[0];
if (typeof firstPart === 'object' && typeof firstPart.text === 'string') {
const textContent = firstPart.text;
const threshold = this.config.getTruncateToolOutputThreshold();
if (threshold > 0 && textContent.length > threshold) {
const originalContentLength = textContent.length;
const { outputFile: savedPath } = await saveTruncatedToolOutput(
textContent,
toolName,
callId,
this.config.storage.getProjectTempDir(),
this.context.promptId,
);
outputFile = savedPath;
const truncatedText = formatTruncatedToolOutput(
textContent,
outputFile,
threshold,
);
// We need to return a NEW array to avoid mutating the original toolResult if it matters,
// though here we are creating the response so it's probably fine to mutate or return new.
const truncatedContent: Part[] = [
{ ...firstPart, text: truncatedText },
];
logToolOutputTruncated(
this.config,
new ToolOutputTruncatedEvent(call.request.prompt_id, {
toolName,
originalContentLength,
truncatedContentLength: truncatedText.length,
threshold,
}),
);
return { truncatedContent, outputFile };
}
}
}
return { truncatedContent: content, outputFile };
const distiller = new ToolOutputDistillationService(
this.config,
this.context.geminiClient,
this.context.promptId,
);
return distiller.distill(call.request.name, call.request.callId, content);
}
private async createCancelledResult(

View File

@@ -0,0 +1,203 @@
/**
* @license
* Copyright 2026 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import {
LlmRole,
ToolOutputTruncatedEvent,
logToolOutputTruncated,
debugLogger,
type Config,
} from '../index.js';
import type { PartListUnion } from '@google/genai';
import { type GeminiClient } from '../core/client.js';
import { DEFAULT_GEMINI_FLASH_LITE_MODEL } from '../config/models.js';
import {
saveTruncatedToolOutput,
formatTruncatedToolOutput,
} from '../utils/fileUtils.js';
import {
READ_FILE_TOOL_NAME,
READ_MANY_FILES_TOOL_NAME,
} from '../tools/tool-names.js';
export interface DistilledToolOutput {
truncatedContent: PartListUnion;
outputFile?: string;
}
export class ToolOutputDistillationService {
constructor(
private readonly config: Config,
private readonly geminiClient: GeminiClient,
private readonly promptId: string,
) {}
/**
* Distills a tool's output if it exceeds configured length thresholds, preserving
* the agent's context window. This includes saving the raw output to disk, replacing
* the output with a truncated placeholder, and optionally summarizing the output
* via a secondary LLM call if the output is massively oversized.
*/
async distill(
toolName: string,
callId: string,
content: PartListUnion,
): Promise<DistilledToolOutput> {
// Explicitly bypass escape hatches that natively handle large outputs
if (this.isExemptFromDistillation(toolName)) {
return { truncatedContent: content };
}
const threshold = this.config.getTruncateToolOutputThreshold();
if (threshold <= 0) {
return { truncatedContent: content };
}
const originalContentLength = this.calculateContentLength(content);
if (originalContentLength > threshold) {
return this.performDistillation(
toolName,
callId,
content,
originalContentLength,
threshold,
);
}
return { truncatedContent: content };
}
private isExemptFromDistillation(toolName: string): boolean {
return (
toolName === READ_FILE_TOOL_NAME || toolName === READ_MANY_FILES_TOOL_NAME
);
}
private calculateContentLength(content: PartListUnion): number {
if (typeof content === 'string') {
return content.length;
}
if (Array.isArray(content)) {
return content.reduce((acc, part) => {
if (
typeof part === 'object' &&
part !== null &&
'text' in part &&
typeof part.text === 'string'
) {
return acc + part.text.length;
}
return acc;
}, 0);
}
return 0;
}
private stringifyContent(content: PartListUnion): string {
return typeof content === 'string'
? content
: JSON.stringify(content, null, 2);
}
private async performDistillation(
toolName: string,
callId: string,
content: PartListUnion,
originalContentLength: number,
threshold: number,
): Promise<DistilledToolOutput> {
const stringifiedContent = this.stringifyContent(content);
// Save the raw, untruncated string to disk for human review
const { outputFile: savedPath } = await saveTruncatedToolOutput(
stringifiedContent,
toolName,
callId,
this.config.storage.getProjectTempDir(),
this.promptId,
);
let truncatedText = formatTruncatedToolOutput(
stringifiedContent,
savedPath,
threshold,
);
// If the output is massively oversized, attempt to generate a structural map
const summarizationThreshold = threshold * 1.5;
if (originalContentLength > summarizationThreshold) {
const summaryText = await this.generateStructuralMap(
toolName,
stringifiedContent,
Math.floor(summarizationThreshold),
);
if (summaryText) {
truncatedText += `\n\n--- Structural Map of Truncated Content ---\n${summaryText}`;
}
}
logToolOutputTruncated(
this.config,
new ToolOutputTruncatedEvent(this.promptId, {
toolName,
originalContentLength,
truncatedContentLength: truncatedText.length,
threshold,
}),
);
return {
truncatedContent:
typeof content === 'string' ? truncatedText : [{ text: truncatedText }],
outputFile: savedPath,
};
}
/**
* Calls a fast, internal model (Flash-Lite) to provide a high-level summary
* of the truncated content's structure.
*/
private async generateStructuralMap(
toolName: string,
stringifiedContent: string,
maxPreviewLen: number,
): Promise<string | undefined> {
try {
const controller = new AbortController();
const timeoutId = setTimeout(() => controller.abort(), 15000); // 15s timeout
const promptText = `The following output from the tool '${toolName}' is extremely large and has been truncated. Please provide a very brief, high-level structural map of its contents (e.g., key sections, JSON schema outline, or line number ranges for major components). Keep the summary under 10 lines. Do not attempt to summarize the specific data values, just the structure so another agent knows what is inside.
Output to summarize:
${stringifiedContent.slice(0, maxPreviewLen)}...`;
const summaryResponse = await this.geminiClient.generateContent(
{
model: DEFAULT_GEMINI_FLASH_LITE_MODEL,
overrideScope: 'internal-summarizer',
},
[{ parts: [{ text: promptText }] }],
controller.signal,
LlmRole.MAIN,
);
clearTimeout(timeoutId);
return summaryResponse.candidates?.[0]?.content?.parts?.[0]?.text;
} catch (e) {
// Fail gracefully, summarization is a progressive enhancement
debugLogger.debug(
'Failed to generate structural map for truncated output:',
e,
);
return undefined;
}
}
}

View File

@@ -22,7 +22,6 @@ import type { Config } from '../config/config.js';
import { ApprovalMode } from '../policy/types.js';
import { getResponseText } from '../utils/partUtils.js';
import { fetchWithTimeout, isPrivateIp } from '../utils/fetch.js';
import { truncateString } from '../utils/textUtils.js';
import { convert } from 'html-to-text';
import {
logWebFetchFallbackAttempt,
@@ -37,11 +36,10 @@ import { resolveToolDeclaration } from './definitions/resolver.js';
import { LRUCache } from 'mnemonist';
const URL_FETCH_TIMEOUT_MS = 10000;
const MAX_CONTENT_LENGTH = 100000;
const MAX_EXPERIMENTAL_FETCH_SIZE = 10 * 1024 * 1024; // 10MB
const USER_AGENT =
'Mozilla/5.0 (compatible; Google-Gemini-CLI/1.0; +https://github.com/google-gemini/gemini-cli)';
const TRUNCATION_WARNING = '\n\n... [Content truncated due to size limit] ...';
// Rate limiting configuration
const RATE_LIMIT_WINDOW_MS = 60000; // 1 minute
@@ -242,12 +240,6 @@ class WebFetchToolInvocation extends BaseToolInvocation<
textContent = rawContent;
}
textContent = truncateString(
textContent,
MAX_CONTENT_LENGTH,
TRUNCATION_WARNING,
);
const geminiClient = this.config.getGeminiClient();
const fallbackPrompt = `The user requested the following: "${this.params.prompt}".
@@ -441,7 +433,7 @@ ${textContent}
});
const errorContent = `Request failed with status ${status}
Headers: ${JSON.stringify(headers, null, 2)}
Response: ${truncateString(rawResponseText, 10000, '\n\n... [Error response truncated] ...')}`;
Response: ${rawResponseText}`;
return {
llmContent: errorContent,
returnDisplay: `Failed to fetch ${url} (Status: ${status})`,
@@ -454,11 +446,7 @@ Response: ${truncateString(rawResponseText, 10000, '\n\n... [Error response trun
lowContentType.includes('text/plain') ||
lowContentType.includes('application/json')
) {
const text = truncateString(
bodyBuffer.toString('utf8'),
MAX_CONTENT_LENGTH,
TRUNCATION_WARNING,
);
const text = bodyBuffer.toString('utf8');
return {
llmContent: text,
returnDisplay: `Fetched ${contentType} content from ${url}`,
@@ -467,16 +455,12 @@ Response: ${truncateString(rawResponseText, 10000, '\n\n... [Error response trun
if (lowContentType.includes('text/html')) {
const html = bodyBuffer.toString('utf8');
const textContent = truncateString(
convert(html, {
wordwrap: false,
selectors: [
{ selector: 'a', options: { ignoreHref: false, baseUrl: url } },
],
}),
MAX_CONTENT_LENGTH,
TRUNCATION_WARNING,
);
const textContent = convert(html, {
wordwrap: false,
selectors: [
{ selector: 'a', options: { ignoreHref: false, baseUrl: url } },
],
});
return {
llmContent: textContent,
returnDisplay: `Fetched and converted HTML content from ${url}`,
@@ -501,11 +485,7 @@ Response: ${truncateString(rawResponseText, 10000, '\n\n... [Error response trun
}
// Fallback for unknown types - try as text
const text = truncateString(
bodyBuffer.toString('utf8'),
MAX_CONTENT_LENGTH,
TRUNCATION_WARNING,
);
const text = bodyBuffer.toString('utf8');
return {
llmContent: text,
returnDisplay: `Fetched ${contentType || 'unknown'} content from ${url}`,