mirror of
https://github.com/google-gemini/gemini-cli.git
synced 2026-05-14 05:42:54 -07:00
a3d9cbfe55
fix(cli): Write shell command output to a file and limit memory buffered in UI Fixes. Checkpoint. fix(core, cli): await outputStream.end() to prevent race conditions This commit fixes a critical race condition where was called synchronously without being awaited. This led to potential file truncation or EBUSY errors on Windows when attempting to manipulate the file immediately after the call. Additionally, this change removes fixed wait times (`setTimeout`) that were previously used in test files as a band-aid. fix(core): stream processed xterm output to file to remove spurious escape codes test(core): update shell regression tests to use file_data events
402 lines
13 KiB
TypeScript
402 lines
13 KiB
TypeScript
/**
|
|
* @license
|
|
* Copyright 2026 Google LLC
|
|
* SPDX-License-Identifier: Apache-2.0
|
|
*/
|
|
|
|
import type { Content, Part } from '@google/genai';
|
|
import path from 'node:path';
|
|
import * as fsPromises from 'node:fs/promises';
|
|
import { estimateTokenCountSync } from '../utils/tokenCalculation.js';
|
|
import { debugLogger } from '../utils/debugLogger.js';
|
|
import { sanitizeFilenamePart } from '../utils/fileUtils.js';
|
|
import type { Config } from '../config/config.js';
|
|
import { logToolOutputMasking } from '../telemetry/loggers.js';
|
|
import {
|
|
SHELL_TOOL_NAME,
|
|
ACTIVATE_SKILL_TOOL_NAME,
|
|
MEMORY_TOOL_NAME,
|
|
ASK_USER_TOOL_NAME,
|
|
ENTER_PLAN_MODE_TOOL_NAME,
|
|
EXIT_PLAN_MODE_TOOL_NAME,
|
|
} from '../tools/tool-names.js';
|
|
import { ToolOutputMaskingEvent } from '../telemetry/types.js';
|
|
|
|
// Tool output masking defaults
|
|
export const DEFAULT_TOOL_PROTECTION_THRESHOLD = 50000;
|
|
export const DEFAULT_MIN_PRUNABLE_TOKENS_THRESHOLD = 30000;
|
|
export const DEFAULT_PROTECT_LATEST_TURN = true;
|
|
export const MASKING_INDICATOR_TAG = 'tool_output_masked';
|
|
|
|
export const TOOL_OUTPUTS_DIR = 'tool-outputs';
|
|
|
|
/**
|
|
* Tools whose outputs are always high-signal and should never be masked,
|
|
* regardless of their position in the conversation history.
|
|
*/
|
|
const EXEMPT_TOOLS = new Set([
|
|
ACTIVATE_SKILL_TOOL_NAME,
|
|
MEMORY_TOOL_NAME,
|
|
ASK_USER_TOOL_NAME,
|
|
ENTER_PLAN_MODE_TOOL_NAME,
|
|
EXIT_PLAN_MODE_TOOL_NAME,
|
|
]);
|
|
|
|
export interface MaskingResult {
|
|
newHistory: readonly Content[];
|
|
maskedCount: number;
|
|
tokensSaved: number;
|
|
}
|
|
|
|
/**
|
|
* Service to manage context window efficiency by masking bulky tool outputs (Tool Output Masking).
|
|
*
|
|
* It implements a "Hybrid Backward Scanned FIFO" algorithm to balance context relevance with
|
|
* token savings:
|
|
* 1. **Protection Window**: Protects the newest `protectionThresholdTokens` (default 50k) tool tokens
|
|
* from pruning. Optionally skips the entire latest conversation turn to ensure full context for
|
|
* the model's next response.
|
|
* 2. **Global Aggregation**: Scans backwards past the protection window to identify all remaining
|
|
* tool outputs that haven't been masked yet.
|
|
* 3. **Batch Trigger**: Trigger masking only if the total prunable tokens exceed
|
|
* `minPrunableThresholdTokens` (default 30k).
|
|
*
|
|
* @remarks
|
|
* Effectively, this means masking only starts once the conversation contains approximately 80k
|
|
* tokens of prunable tool outputs (50k protected + 30k prunable buffer). Small tool outputs
|
|
* are preserved until they collectively reach the threshold.
|
|
*/
|
|
export class ToolOutputMaskingService {
|
|
async mask(
|
|
history: readonly Content[],
|
|
config: Config,
|
|
): Promise<MaskingResult> {
|
|
if (history.length === 0) {
|
|
return { newHistory: history, maskedCount: 0, tokensSaved: 0 };
|
|
}
|
|
|
|
const maskingConfig = await config.getToolOutputMaskingConfig();
|
|
let cumulativeToolTokens = 0;
|
|
let protectionBoundaryReached = false;
|
|
let totalPrunableTokens = 0;
|
|
let maskedCount = 0;
|
|
|
|
const prunableParts: Array<{
|
|
contentIndex: number;
|
|
partIndex: number;
|
|
tokens: number;
|
|
content: string;
|
|
originalPart: Part;
|
|
}> = [];
|
|
|
|
// Decide where to start scanning.
|
|
// If PROTECT_LATEST_TURN is true, we skip the most recent message (index history.length - 1).
|
|
const scanStartIdx = maskingConfig.protectLatestTurn
|
|
? history.length - 2
|
|
: history.length - 1;
|
|
|
|
// Backward scan to identify prunable tool outputs
|
|
for (let i = scanStartIdx; i >= 0; i--) {
|
|
const content = history[i];
|
|
const parts = content.parts || [];
|
|
|
|
for (let j = parts.length - 1; j >= 0; j--) {
|
|
const part = parts[j];
|
|
|
|
// Tool outputs (functionResponse) are the primary targets for pruning because
|
|
// they often contain voluminous data (e.g., shell logs, file content) that
|
|
// can exceed context limits. We preserve other parts—such as user text,
|
|
// model reasoning, and multimodal data—because they define the conversation's
|
|
// core intent and logic, which are harder for the model to recover if lost.
|
|
if (!part.functionResponse) continue;
|
|
|
|
const toolName = part.functionResponse.name;
|
|
if (toolName && EXEMPT_TOOLS.has(toolName)) {
|
|
continue;
|
|
}
|
|
|
|
const toolOutputContent = this.getToolOutputContent(part);
|
|
if (!toolOutputContent || this.isAlreadyMasked(toolOutputContent)) {
|
|
continue;
|
|
}
|
|
|
|
const partTokens = estimateTokenCountSync([part]);
|
|
|
|
if (!protectionBoundaryReached) {
|
|
cumulativeToolTokens += partTokens;
|
|
if (cumulativeToolTokens > maskingConfig.protectionThresholdTokens) {
|
|
protectionBoundaryReached = true;
|
|
// The part that crossed the boundary is prunable.
|
|
totalPrunableTokens += partTokens;
|
|
prunableParts.push({
|
|
contentIndex: i,
|
|
partIndex: j,
|
|
tokens: partTokens,
|
|
content: toolOutputContent,
|
|
originalPart: part,
|
|
});
|
|
}
|
|
} else {
|
|
totalPrunableTokens += partTokens;
|
|
prunableParts.push({
|
|
contentIndex: i,
|
|
partIndex: j,
|
|
tokens: partTokens,
|
|
content: toolOutputContent,
|
|
originalPart: part,
|
|
});
|
|
}
|
|
}
|
|
}
|
|
|
|
// Trigger pruning only if we have accumulated enough savings to justify the
|
|
// overhead of masking and file I/O (batch pruning threshold).
|
|
if (totalPrunableTokens < maskingConfig.minPrunableThresholdTokens) {
|
|
return { newHistory: history, maskedCount: 0, tokensSaved: 0 };
|
|
}
|
|
|
|
debugLogger.debug(
|
|
`[ToolOutputMasking] Triggering masking. Prunable tool tokens: ${totalPrunableTokens.toLocaleString()} (> ${maskingConfig.minPrunableThresholdTokens.toLocaleString()})`,
|
|
);
|
|
|
|
// Perform masking and offloading
|
|
const newHistory = [...history]; // Shallow copy of history
|
|
let actualTokensSaved = 0;
|
|
let toolOutputsDir = path.join(
|
|
config.storage.getProjectTempDir(),
|
|
TOOL_OUTPUTS_DIR,
|
|
);
|
|
const sessionId = config.getSessionId();
|
|
if (sessionId) {
|
|
const safeSessionId = sanitizeFilenamePart(sessionId);
|
|
toolOutputsDir = path.join(toolOutputsDir, `session-${safeSessionId}`);
|
|
}
|
|
await fsPromises.mkdir(toolOutputsDir, { recursive: true });
|
|
|
|
for (const item of prunableParts) {
|
|
const { contentIndex, partIndex, content, tokens } = item;
|
|
const contentRecord = newHistory[contentIndex];
|
|
const part = contentRecord.parts![partIndex];
|
|
|
|
if (!part.functionResponse) continue;
|
|
|
|
const toolName = part.functionResponse.name || 'unknown_tool';
|
|
const callId = part.functionResponse.id || Date.now().toString();
|
|
|
|
const originalResponse =
|
|
// eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion
|
|
(part.functionResponse.response as Record<string, unknown>) || {};
|
|
|
|
let filePath = '';
|
|
let fileSizeMB = '0.00';
|
|
let totalLines = 0;
|
|
|
|
if (
|
|
typeof originalResponse['outputFile'] === 'string' &&
|
|
originalResponse['outputFile']
|
|
) {
|
|
filePath = originalResponse['outputFile'];
|
|
try {
|
|
const stats = await fsPromises.stat(filePath);
|
|
fileSizeMB = (stats.size / 1024 / 1024).toFixed(2);
|
|
// For truly full files, we don't count lines as it's too slow.
|
|
// We just indicate it's the full file.
|
|
totalLines = -1;
|
|
} catch {
|
|
// Fallback if file is gone
|
|
filePath = '';
|
|
}
|
|
}
|
|
|
|
if (!filePath) {
|
|
const safeToolName = sanitizeFilenamePart(toolName).toLowerCase();
|
|
const safeCallId = sanitizeFilenamePart(callId).toLowerCase();
|
|
const fileName = `${safeToolName}_${safeCallId}_${Math.random()
|
|
.toString(36)
|
|
.substring(7)}.txt`;
|
|
filePath = path.join(toolOutputsDir, fileName);
|
|
|
|
await fsPromises.writeFile(filePath, content, 'utf-8');
|
|
|
|
totalLines = content.split('\n').length;
|
|
fileSizeMB = (Buffer.byteLength(content, 'utf8') / 1024 / 1024).toFixed(
|
|
2,
|
|
);
|
|
}
|
|
|
|
let preview = '';
|
|
if (toolName === SHELL_TOOL_NAME) {
|
|
preview = this.formatShellPreview(originalResponse);
|
|
} else {
|
|
// General tools: Head + Tail preview (250 chars each)
|
|
if (content.length > 500) {
|
|
preview = `${content.slice(0, 250)}\n... [TRUNCATED] ...\n${content.slice(-250)}`;
|
|
} else {
|
|
preview = content;
|
|
}
|
|
}
|
|
|
|
const maskedSnippet = this.formatMaskedSnippet({
|
|
toolName,
|
|
filePath,
|
|
fileSizeMB,
|
|
totalLines,
|
|
tokens,
|
|
preview,
|
|
});
|
|
|
|
const maskedPart = {
|
|
...part,
|
|
functionResponse: {
|
|
// eslint-disable-next-line @typescript-eslint/no-misused-spread
|
|
...part.functionResponse,
|
|
response: { output: maskedSnippet },
|
|
},
|
|
};
|
|
|
|
const newTaskTokens = estimateTokenCountSync([maskedPart]);
|
|
const savings = tokens - newTaskTokens;
|
|
|
|
if (savings > 0) {
|
|
const newParts = [...contentRecord.parts!];
|
|
newParts[partIndex] = maskedPart;
|
|
newHistory[contentIndex] = { ...contentRecord, parts: newParts };
|
|
actualTokensSaved += savings;
|
|
maskedCount++;
|
|
}
|
|
}
|
|
|
|
debugLogger.debug(
|
|
`[ToolOutputMasking] Masked ${maskedCount} tool outputs. Saved ~${actualTokensSaved.toLocaleString()} tokens.`,
|
|
);
|
|
|
|
const result = {
|
|
newHistory,
|
|
maskedCount,
|
|
tokensSaved: actualTokensSaved,
|
|
};
|
|
|
|
if (actualTokensSaved <= 0) {
|
|
return result;
|
|
}
|
|
|
|
logToolOutputMasking(
|
|
config,
|
|
new ToolOutputMaskingEvent({
|
|
tokens_before: totalPrunableTokens,
|
|
tokens_after: totalPrunableTokens - actualTokensSaved,
|
|
masked_count: maskedCount,
|
|
total_prunable_tokens: totalPrunableTokens,
|
|
}),
|
|
);
|
|
|
|
return result;
|
|
}
|
|
|
|
private getToolOutputContent(part: Part): string | null {
|
|
if (!part.functionResponse) return null;
|
|
// eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion
|
|
const response = part.functionResponse.response as Record<string, unknown>;
|
|
if (!response) return null;
|
|
|
|
// Stringify the entire response for saving.
|
|
// This handles any tool output schema automatically.
|
|
const content = JSON.stringify(response, null, 2);
|
|
|
|
// Multimodal safety check: Sibling parts (inlineData, etc.) are handled by mask()
|
|
// by keeping the original part structure and only replacing the functionResponse content.
|
|
|
|
return content;
|
|
}
|
|
|
|
private isAlreadyMasked(content: string): boolean {
|
|
return content.includes(`<${MASKING_INDICATOR_TAG}`);
|
|
}
|
|
|
|
private formatShellPreview(response: Record<string, unknown>): string {
|
|
// eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion
|
|
const content = (response['output'] || response['stdout'] || '') as string;
|
|
if (typeof content !== 'string') {
|
|
return typeof content === 'object'
|
|
? JSON.stringify(content)
|
|
: String(content);
|
|
}
|
|
|
|
// The shell tool output is structured in shell.ts with specific section prefixes:
|
|
const sectionRegex =
|
|
/^(Output|Error|Exit Code|Signal|Background PIDs|Process Group PGID): /m;
|
|
const parts = content.split(sectionRegex);
|
|
|
|
if (parts.length < 3) {
|
|
// Fallback to simple head/tail if not in expected shell.ts format
|
|
return this.formatSimplePreview(content);
|
|
}
|
|
|
|
const previewParts: string[] = [];
|
|
if (parts[0].trim()) {
|
|
previewParts.push(this.formatSimplePreview(parts[0].trim()));
|
|
}
|
|
|
|
for (let i = 1; i < parts.length; i += 2) {
|
|
const name = parts[i];
|
|
const sectionContent = parts[i + 1]?.trim() || '';
|
|
|
|
if (name === 'Output') {
|
|
previewParts.push(
|
|
`Output: ${this.formatSimplePreview(sectionContent)}`,
|
|
);
|
|
} else {
|
|
// Keep other sections (Error, Exit Code, etc.) in full as they are usually high-signal and small
|
|
previewParts.push(`${name}: ${sectionContent}`);
|
|
}
|
|
}
|
|
|
|
let preview = previewParts.join('\n');
|
|
|
|
// Also check root levels just in case some tool uses them or for future-proofing
|
|
const exitCode = response['exitCode'] ?? response['exit_code'];
|
|
const error = response['error'];
|
|
if (
|
|
exitCode !== undefined &&
|
|
exitCode !== 0 &&
|
|
exitCode !== null &&
|
|
!content.includes(`Exit Code: ${exitCode}`)
|
|
) {
|
|
preview += `\n[Exit Code: ${exitCode}]`;
|
|
}
|
|
if (error && !content.includes(`Error: ${error}`)) {
|
|
preview += `\n[Error: ${error}]`;
|
|
}
|
|
|
|
return preview;
|
|
}
|
|
|
|
private formatSimplePreview(content: string): string {
|
|
const lines = content.split('\n');
|
|
if (lines.length <= 20) return content;
|
|
const head = lines.slice(0, 10);
|
|
const tail = lines.slice(-10);
|
|
return `${head.join('\n')}\n\n... [${
|
|
lines.length - head.length - tail.length
|
|
} lines omitted] ...\n\n${tail.join('\n')}`;
|
|
}
|
|
|
|
private formatMaskedSnippet(params: MaskedSnippetParams): string {
|
|
const { filePath, preview } = params;
|
|
return `<${MASKING_INDICATOR_TAG}>
|
|
${preview}
|
|
|
|
Output too large. Full output available at: ${filePath}
|
|
</${MASKING_INDICATOR_TAG}>`;
|
|
}
|
|
}
|
|
|
|
interface MaskedSnippetParams {
|
|
toolName: string;
|
|
filePath: string;
|
|
fileSizeMB: string;
|
|
totalLines: number;
|
|
tokens: number;
|
|
preview: string;
|
|
}
|