From 7eb6d78f93693b23fc83bb69191382e128b45b65 Mon Sep 17 00:00:00 2001 From: jacob314 Date: Thu, 19 Feb 2026 11:12:13 -0800 Subject: [PATCH] Checkpoint of shell optimization fix(cli): Write shell command output to a file and limit memory buffered in UI Fixes. Checkpoint. fix(core, cli): await outputStream.end() to prevent race conditions This commit fixes a critical race condition where was called synchronously without being awaited. This led to potential file truncation or EBUSY errors on Windows when attempting to manipulate the file immediately after the call. Additionally, this change removes fixed wait times (`setTimeout`) that were previously used in test files as a band-aid. fix(core): stream processed xterm output to file to remove spurious escape codes test(core): update shell regression tests to use file_data events --- .../run_shell_command_file_stream.test.ts | 230 ++++++++++++ packages/cli/src/ui/components/AnsiOutput.tsx | 43 ++- .../ui/hooks/shellCommandProcessor.test.tsx | 17 +- .../cli/src/ui/hooks/shellCommandProcessor.ts | 67 +++- packages/cli/src/ui/hooks/shellReducer.ts | 7 + .../core/src/scheduler/tool-executor.test.ts | 216 ++++++++++- packages/core/src/scheduler/tool-executor.ts | 53 ++- .../services/shellExecutionService.test.ts | 9 +- .../src/services/shellExecutionService.ts | 347 ++++++++++++++---- .../services/toolOutputMaskingService.test.ts | 78 ++++ .../src/services/toolOutputMaskingService.ts | 50 ++- packages/core/src/tools/shell.test.ts | 1 + packages/core/src/tools/shell.ts | 99 +++-- packages/core/src/tools/tools.ts | 7 + packages/core/src/utils/fileUtils.ts | 42 +++ .../utils/generateContentResponseUtilities.ts | 13 +- 16 files changed, 1124 insertions(+), 155 deletions(-) create mode 100644 integration-tests/run_shell_command_file_stream.test.ts diff --git a/integration-tests/run_shell_command_file_stream.test.ts b/integration-tests/run_shell_command_file_stream.test.ts new file mode 100644 index 0000000000..9fbea9499d --- /dev/null +++ b/integration-tests/run_shell_command_file_stream.test.ts @@ -0,0 +1,230 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ +import { describe, it, expect, beforeEach, afterEach } from 'vitest'; +import { TestRig } from './test-helper.js'; +import * as fs from 'node:fs'; +import * as path from 'node:path'; + +describe('run_shell_command streaming to file regression', () => { + let rig: TestRig; + + beforeEach(() => { + rig = new TestRig(); + }); + + afterEach(async () => await rig.cleanup()); + + it('should stream large outputs to a file and verify full content presence', async () => { + await rig.setup( + 'should stream large outputs to a file and verify full content presence', + { + settings: { tools: { core: ['run_shell_command'] } }, + }, + ); + + const numLines = 20000; + const testFileName = 'large_output_test.txt'; + const testFilePath = path.join(rig.testDir!, testFileName); + + // Create a ~20MB file with unique content at start and end + const startMarker = 'START_OF_FILE_MARKER'; + const endMarker = 'END_OF_FILE_MARKER'; + + const stream = fs.createWriteStream(testFilePath); + stream.write(startMarker + '\n'); + for (let i = 0; i < numLines; i++) { + stream.write(`Line ${i + 1}: ` + 'A'.repeat(1000) + '\n'); + } + stream.write(endMarker + '\n'); + await new Promise((resolve) => stream.end(resolve)); + + const fileSize = fs.statSync(testFilePath).size; + expect(fileSize).toBeGreaterThan(20000000); + + const prompt = `Use run_shell_command to cat ${testFileName} and say 'Done.'`; + await rig.run({ args: prompt }); + + let savedFilePath = ''; + const tmpdir = path.join(rig.homeDir!, '.gemini', 'tmp'); + if (fs.existsSync(tmpdir)) { + const files = fs.readdirSync(tmpdir, { + recursive: true, + withFileTypes: true, + }); + for (const file of files) { + if (file.isFile() && file.name.endsWith('.txt')) { + // In Node 20+, recursive readdir returns Dirent objects where `parentPath` is the directory path, + // but sometimes `path` is used in older Node. fallback: + const parentDir = + (file as { parentPath?: string }).parentPath ?? + (file as { path?: string }).path ?? + tmpdir; + const p = path.join(parentDir, file.name); + const stat = fs.statSync(p); + if (Date.now() - stat.mtimeMs < 60000 && stat.size >= 20000000) { + savedFilePath = p; + break; + } + } + } + } + + expect( + savedFilePath, + `Expected to find a saved output file >= 20MB in ${tmpdir}`, + ).toBeTruthy(); + const savedContent = fs.readFileSync(savedFilePath, 'utf8'); + expect(savedContent).toContain(startMarker); + expect(savedContent).toContain(endMarker); + expect(savedContent.length).toBeGreaterThanOrEqual(fileSize); + + fs.unlinkSync(savedFilePath); + }, 120000); + + it('should stream very large (50MB) outputs to a file and verify full content presence', async () => { + await rig.setup( + 'should stream very large (50MB) outputs to a file and verify full content presence', + { + settings: { tools: { core: ['run_shell_command'] } }, + }, + ); + + const numLines = 1000000; + const testFileName = 'very_large_output_test.txt'; + const testFilePath = path.join(rig.testDir!, testFileName); + + // Create a ~50MB file with unique content at start and end + const startMarker = 'START_OF_FILE_MARKER'; + const endMarker = 'END_OF_FILE_MARKER'; + + const stream = fs.createWriteStream(testFilePath); + stream.write(startMarker + '\n'); + for (let i = 0; i < numLines; i++) { + stream.write(`Line ${i + 1}: ` + 'A'.repeat(40) + '\n'); + } + stream.write(endMarker + '\n'); + await new Promise((resolve) => stream.end(resolve)); + + const fileSize = fs.statSync(testFilePath).size; + expect(fileSize).toBeGreaterThan(45000000); + + const prompt = `Use run_shell_command to cat ${testFileName} and say 'Done.'`; + await rig.run({ args: prompt }); + + let savedFilePath = ''; + const tmpdir = path.join(rig.homeDir!, '.gemini', 'tmp'); + if (fs.existsSync(tmpdir)) { + const files = fs.readdirSync(tmpdir, { + recursive: true, + withFileTypes: true, + }); + for (const file of files) { + if (file.isFile() && file.name.endsWith('.txt')) { + const parentDir = + (file as { parentPath?: string }).parentPath ?? + (file as { path?: string }).path ?? + tmpdir; + const p = path.join(parentDir, file.name); + const stat = fs.statSync(p); + // Look for file >= 20MB (since we expect 50MB, but allowing margin for the bug) + if (Date.now() - stat.mtimeMs < 60000 && stat.size >= 20000000) { + savedFilePath = p; + break; + } + } + } + } + + expect( + savedFilePath, + `Expected to find a saved output file >= 20MB in ${tmpdir}`, + ).toBeTruthy(); + const savedContent = fs.readFileSync(savedFilePath, 'utf8'); + expect(savedContent).toContain(startMarker); + expect(savedContent).toContain(endMarker); + expect(savedContent.length).toBeGreaterThanOrEqual(fileSize); + + fs.unlinkSync(savedFilePath); + }, 120000); + + it('should produce clean output resolving carriage returns and backspaces', async () => { + await rig.setup( + 'should produce clean output resolving carriage returns and backspaces', + { + settings: { + tools: { core: ['run_shell_command'] }, + }, + }, + ); + + const script = ` +import sys +import time + +# Fill buffer to force file streaming/truncation +# 45000 chars to be safe (default threshold is 40000) +print('A' * 45000) +sys.stdout.flush() + +# Test sequence +print('XXXXX', end='', flush=True) +time.sleep(0.5) +print('\\rYYYYY', end='', flush=True) +time.sleep(0.5) +print('\\nNext Line', end='', flush=True) +`; + const scriptPath = path.join(rig.testDir!, 'test_script.py'); + fs.writeFileSync(scriptPath, script); + + const prompt = `run_shell_command python3 "${scriptPath}"`; + await rig.run({ args: prompt }); + + let savedFilePath = ''; + const tmpdir = path.join(rig.homeDir!, '.gemini', 'tmp'); + if (fs.existsSync(tmpdir)) { + const findFiles = (dir: string): string[] => { + let results: string[] = []; + const list = fs.readdirSync(dir, { withFileTypes: true }); + for (const file of list) { + const fullPath = path.join(dir, file.name); + if (file.isDirectory()) { + results = results.concat(findFiles(fullPath)); + } else if (file.isFile() && file.name.endsWith('.txt')) { + results.push(fullPath); + } + } + return results; + }; + + const files = findFiles(tmpdir); + files.sort((a, b) => fs.statSync(b).mtimeMs - fs.statSync(a).mtimeMs); + + if (files.length > 0) { + savedFilePath = files[0]; + } + } + + expect(savedFilePath, 'Output file should exist').toBeTruthy(); + const content = fs.readFileSync(savedFilePath, 'utf8'); + + // Verify it contains the large chunk + expect(content).toContain('AAAA'); + + // Verify cleanup logic: + // 1. The final text "YYYYY" should be present. + expect(content).toContain('YYYYY'); + // 2. The next line should be present. + expect(content).toContain('Next Line'); + + // 3. Verify overwrite happened. + // In raw output, we would have "XXXXX...YYYYY". + // In processed output, "YYYYY" overwrites "XXXXX". + // We confirm that escape codes are stripped (processed text). + + // 4. Check for ANSI escape codes (like \\x1b) just in case + expect(content).not.toContain('\x1b'); + }, 60000); +}); diff --git a/packages/cli/src/ui/components/AnsiOutput.tsx b/packages/cli/src/ui/components/AnsiOutput.tsx index cc17b6b6b0..cc550e2044 100644 --- a/packages/cli/src/ui/components/AnsiOutput.tsx +++ b/packages/cli/src/ui/components/AnsiOutput.tsx @@ -4,7 +4,7 @@ * SPDX-License-Identifier: Apache-2.0 */ -import type React from 'react'; +import React from 'react'; import { Box, Text } from 'ink'; import type { AnsiLine, AnsiOutput, AnsiToken } from '@google/gemini-cli-core'; @@ -47,23 +47,26 @@ export const AnsiOutputText: React.FC = ({ ); }; -export const AnsiLineText: React.FC<{ line: AnsiLine }> = ({ line }) => ( - - {line.length > 0 - ? line.map((token: AnsiToken, tokenIndex: number) => ( - - {token.text} - - )) - : null} - +export const AnsiLineText = React.memo<{ line: AnsiLine }>( + ({ line }: { line: AnsiLine }) => ( + + {line.length > 0 + ? line.map((token: AnsiToken, tokenIndex: number) => ( + + {token.text} + + )) + : null} + + ), ); +AnsiLineText.displayName = 'AnsiLineText'; diff --git a/packages/cli/src/ui/hooks/shellCommandProcessor.test.tsx b/packages/cli/src/ui/hooks/shellCommandProcessor.test.tsx index 377cac9b7c..0b1499b77b 100644 --- a/packages/cli/src/ui/hooks/shellCommandProcessor.test.tsx +++ b/packages/cli/src/ui/hooks/shellCommandProcessor.test.tsx @@ -110,7 +110,12 @@ describe('useShellCommandProcessor', () => { terminalHeight: 20, terminalWidth: 80, }), - } as Config; + getTruncateToolOutputThreshold: () => 40000, + storage: { + getProjectTempDir: () => '/tmp/project', + }, + getSessionId: () => 'test-session', + } as unknown as Config; mockGeminiClient = { addHistory: vi.fn() } as unknown as GeminiClient; vi.mocked(os.platform).mockReturnValue('linux'); @@ -121,6 +126,16 @@ describe('useShellCommandProcessor', () => { mockIsBinary.mockReturnValue(false); vi.mocked(fs.existsSync).mockReturnValue(false); + vi.mocked(fs.createWriteStream).mockReturnValue({ + write: vi.fn(), + end: vi.fn().mockImplementation((cb: () => void) => { + if (cb) cb(); + }), + destroy: vi.fn(), + bytesWritten: 0, + closed: false, + } as unknown as fs.WriteStream); + mockShellExecutionService.mockImplementation((_cmd, _cwd, callback) => { mockShellOutputCallback = callback; return Promise.resolve({ diff --git a/packages/cli/src/ui/hooks/shellCommandProcessor.ts b/packages/cli/src/ui/hooks/shellCommandProcessor.ts index 3c85d3b6a4..8184503691 100644 --- a/packages/cli/src/ui/hooks/shellCommandProcessor.ts +++ b/packages/cli/src/ui/hooks/shellCommandProcessor.ts @@ -14,6 +14,7 @@ import { isBinary, ShellExecutionService, CoreToolCallStatus, + moveToolOutputToFile, } from '@google/gemini-cli-core'; import { type PartListUnion } from '@google/genai'; import type { UseHistoryManagerReturn } from './useHistoryManager.js'; @@ -33,16 +34,15 @@ export { type BackgroundShell }; export const OUTPUT_UPDATE_INTERVAL_MS = 1000; const RESTORE_VISIBILITY_DELAY_MS = 300; -const MAX_OUTPUT_LENGTH = 10000; - function addShellCommandToGeminiHistory( geminiClient: GeminiClient, rawQuery: string, resultText: string, + maxOutputLength: number, ) { const modelContent = - resultText.length > MAX_OUTPUT_LENGTH - ? resultText.substring(0, MAX_OUTPUT_LENGTH) + '\n... (truncated)' + maxOutputLength > 0 && resultText.length > maxOutputLength + ? resultText.substring(0, maxOutputLength) + '\n... (truncated)' : resultText; // eslint-disable-next-line @typescript-eslint/no-floating-promises @@ -299,6 +299,11 @@ export const useShellCommandProcessor = ( let cumulativeStdout: string | AnsiOutput = ''; let isBinaryStream = false; let binaryBytesReceived = 0; + let totalBytesWritten = 0; + + const outputFileName = `gemini_shell_output_${crypto.randomBytes(6).toString('hex')}.log`; + const outputFilePath = path.join(os.tmpdir(), outputFileName); + const outputStream = fs.createWriteStream(outputFilePath); const initialToolDisplay: IndividualToolCallDisplay = { callId, @@ -315,6 +320,7 @@ export const useShellCommandProcessor = ( }); let executionPid: number | undefined; + let fullOutputReturned = false; const abortHandler = () => { onDebugMessage( @@ -343,11 +349,23 @@ export const useShellCommandProcessor = ( let shouldUpdate = false; switch (event.type) { + case 'raw_data': + if (!isBinaryStream) { + outputStream.write(event.chunk); + totalBytesWritten += Buffer.byteLength(event.chunk); + } + break; case 'data': if (isBinaryStream) break; if (typeof event.chunk === 'string') { if (typeof cumulativeStdout === 'string') { cumulativeStdout += event.chunk; + // Keep a small buffer for the UI to prevent memory spikes and Ink lagging + const MAX_UI_LENGTH = 100000; // 100KB + if (cumulativeStdout.length > MAX_UI_LENGTH) { + cumulativeStdout = + cumulativeStdout.slice(-MAX_UI_LENGTH); + } } else { cumulativeStdout = event.chunk; } @@ -433,6 +451,9 @@ export const useShellCommandProcessor = ( } const result = await resultPromise; + await new Promise((resolve) => { + outputStream.end(resolve); + }); setPendingHistoryItem(null); if (result.backgrounded && result.pid) { @@ -447,6 +468,26 @@ export const useShellCommandProcessor = ( } else { mainContent = result.output.trim() || '(Command produced no output)'; + const threshold = config.getTruncateToolOutputThreshold(); + if (threshold > 0 && totalBytesWritten >= threshold) { + const { outputFile: savedPath } = await moveToolOutputToFile( + outputFilePath, + SHELL_COMMAND_NAME, + callId, + config.storage.getProjectTempDir(), + config.getSessionId(), + ); + const warning = `[Full command output saved to: ${savedPath}]`; + mainContent = mainContent.includes( + '[GEMINI_CLI_WARNING: Output truncated.', + ) + ? mainContent.replace( + /\[GEMINI_CLI_WARNING: Output truncated\..*?\]/, + warning, + ) + : `${mainContent}\n\n${warning}`; + fullOutputReturned = true; + } } let finalOutput = mainContent; @@ -493,7 +534,12 @@ export const useShellCommandProcessor = ( ); } - addShellCommandToGeminiHistory(geminiClient, rawQuery, finalOutput); + addShellCommandToGeminiHistory( + geminiClient, + rawQuery, + finalOutput, + config.getTruncateToolOutputThreshold(), + ); } catch (err) { setPendingHistoryItem(null); const errorMessage = err instanceof Error ? err.message : String(err); @@ -506,12 +552,23 @@ export const useShellCommandProcessor = ( ); } finally { abortSignal.removeEventListener('abort', abortHandler); + if (!outputStream.closed) { + outputStream.destroy(); + } if (pwdFilePath && fs.existsSync(pwdFilePath)) { fs.unlinkSync(pwdFilePath); } dispatch({ type: 'SET_ACTIVE_PTY', pid: null }); setShellInputFocused(false); + + if (!fullOutputReturned && fs.existsSync(outputFilePath)) { + try { + fs.unlinkSync(outputFilePath); + } catch { + // Ignore errors during unlink + } + } } }; diff --git a/packages/cli/src/ui/hooks/shellReducer.ts b/packages/cli/src/ui/hooks/shellReducer.ts index 7d3917c681..12157707f1 100644 --- a/packages/cli/src/ui/hooks/shellReducer.ts +++ b/packages/cli/src/ui/hooks/shellReducer.ts @@ -99,6 +99,13 @@ export function shellReducer( typeof shell.output === 'string' ? shell.output + action.chunk : action.chunk; + const MAX_BG_OUTPUT_LENGTH = 100000; + if ( + typeof newOutput === 'string' && + newOutput.length > MAX_BG_OUTPUT_LENGTH + ) { + newOutput = newOutput.slice(-MAX_BG_OUTPUT_LENGTH); + } } else { newOutput = action.chunk; } diff --git a/packages/core/src/scheduler/tool-executor.test.ts b/packages/core/src/scheduler/tool-executor.test.ts index 29db841aac..e5375b20df 100644 --- a/packages/core/src/scheduler/tool-executor.test.ts +++ b/packages/core/src/scheduler/tool-executor.test.ts @@ -5,6 +5,7 @@ */ import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest'; +import fsPromises from 'node:fs/promises'; import { ToolExecutor } from './tool-executor.js'; import type { Config, AnyToolInvocation } from '../index.js'; import type { ToolResult } from '../tools/tools.js'; @@ -22,6 +23,7 @@ import { createMockMessageBus } from '../test-utils/mock-message-bus.js'; vi.mock('../utils/fileUtils.js', () => ({ saveTruncatedToolOutput: vi.fn(), formatTruncatedToolOutput: vi.fn(), + moveToolOutputToFile: vi.fn(), })); // Mock executeToolWithHooks @@ -233,11 +235,223 @@ describe('ToolExecutor', () => { const response = result.response.responseParts[0]?.functionResponse ?.response as Record; // The content should be the *truncated* version returned by the mock formatTruncatedToolOutput - expect(response).toEqual({ output: 'TruncatedContent...' }); + expect(response).toEqual({ + output: 'TruncatedContent...', + outputFile: '/tmp/truncated_output.txt', + }); expect(result.response.outputFile).toBe('/tmp/truncated_output.txt'); } }); + it('should truncate large output and move file when fullOutputFilePath is provided', async () => { + // 1. Setup Config for Truncation + vi.spyOn(config, 'getTruncateToolOutputThreshold').mockReturnValue(10); + vi.spyOn(config.storage, 'getProjectTempDir').mockReturnValue('/tmp'); + vi.spyOn(fileUtils, 'moveToolOutputToFile').mockResolvedValue({ + outputFile: '/tmp/moved_output.txt', + }); + + const mockTool = new MockTool({ name: SHELL_TOOL_NAME }); + const invocation = mockTool.build({}); + const longOutput = 'This is a very long output that should be truncated.'; + + // 2. Mock execution returning long content AND fullOutputFilePath + vi.mocked(coreToolHookTriggers.executeToolWithHooks).mockResolvedValue({ + llmContent: longOutput, + returnDisplay: longOutput, + fullOutputFilePath: '/tmp/temp_full_output.txt', + }); + + const scheduledCall: ScheduledToolCall = { + status: CoreToolCallStatus.Scheduled, + request: { + callId: 'call-trunc-full', + name: SHELL_TOOL_NAME, + args: { command: 'echo long' }, + isClientInitiated: false, + prompt_id: 'prompt-trunc-full', + }, + tool: mockTool, + invocation: invocation as unknown as AnyToolInvocation, + startTime: Date.now(), + }; + + // 3. Execute + const result = await executor.execute({ + call: scheduledCall, + signal: new AbortController().signal, + onUpdateToolCall: vi.fn(), + }); + + // 4. Verify Truncation Logic + expect(fileUtils.moveToolOutputToFile).toHaveBeenCalledWith( + '/tmp/temp_full_output.txt', + SHELL_TOOL_NAME, + 'call-trunc-full', + expect.any(String), // temp dir + 'test-session-id', // session id from makeFakeConfig + ); + + expect(fileUtils.formatTruncatedToolOutput).toHaveBeenCalledWith( + longOutput, + '/tmp/moved_output.txt', + 10, // threshold (maxChars) + ); + + expect(result.status).toBe(CoreToolCallStatus.Success); + if (result.status === CoreToolCallStatus.Success) { + const response = result.response.responseParts[0]?.functionResponse + ?.response as Record; + // The content should be the *truncated* version returned by the mock formatTruncatedToolOutput + expect(response).toEqual({ + output: 'TruncatedContent...', + outputFile: '/tmp/moved_output.txt', + }); + expect(result.response.outputFile).toBe('/tmp/moved_output.txt'); + } + }); + + it('should delete temporary file when fullOutputFilePath is provided but output is not truncated', async () => { + // 1. Setup Config for Truncation + vi.spyOn(config, 'getTruncateToolOutputThreshold').mockReturnValue(100); + const unlinkSpy = vi + .spyOn(fsPromises, 'unlink') + .mockResolvedValue(undefined); + + const mockTool = new MockTool({ name: SHELL_TOOL_NAME }); + const invocation = mockTool.build({}); + const shortOutput = 'Short'; + + // 2. Mock execution returning short content AND fullOutputFilePath + vi.mocked(coreToolHookTriggers.executeToolWithHooks).mockResolvedValue({ + llmContent: shortOutput, + returnDisplay: shortOutput, + fullOutputFilePath: '/tmp/temp_full_output_short.txt', + }); + + const scheduledCall: ScheduledToolCall = { + status: CoreToolCallStatus.Scheduled, + request: { + callId: 'call-short-full', + name: SHELL_TOOL_NAME, + args: { command: 'echo short' }, + isClientInitiated: false, + prompt_id: 'prompt-short-full', + }, + tool: mockTool, + invocation: invocation as unknown as AnyToolInvocation, + startTime: Date.now(), + }; + + // 3. Execute + const result = await executor.execute({ + call: scheduledCall, + signal: new AbortController().signal, + onUpdateToolCall: vi.fn(), + }); + + // 4. Verify file deletion + expect(unlinkSpy).toHaveBeenCalledWith('/tmp/temp_full_output_short.txt'); + expect(fileUtils.formatTruncatedToolOutput).not.toHaveBeenCalled(); + + // We should not save it since it was not truncated + expect(result.status).toBe(CoreToolCallStatus.Success); + if (result.status === CoreToolCallStatus.Success) { + const response = result.response.responseParts[0]?.functionResponse + ?.response as Record; + expect(response).toEqual({ + output: 'Short', + }); + expect(result.response.outputFile).toBeUndefined(); + } + + unlinkSpy.mockRestore(); + }); + + it('should delete temporary file on error if fullOutputFilePath is provided', async () => { + const unlinkSpy = vi + .spyOn(fsPromises, 'unlink') + .mockResolvedValue(undefined); + const mockTool = new MockTool({ name: 'failTool' }); + const invocation = mockTool.build({}); + + vi.mocked(coreToolHookTriggers.executeToolWithHooks).mockResolvedValue({ + llmContent: 'partial', + returnDisplay: 'partial', + fullOutputFilePath: '/tmp/temp_error.txt', + error: { message: 'Tool Failed' }, + }); + + const scheduledCall: ScheduledToolCall = { + status: CoreToolCallStatus.Scheduled, + request: { + callId: 'call-err', + name: 'failTool', + args: {}, + isClientInitiated: false, + prompt_id: 'prompt-err', + }, + tool: mockTool, + invocation: invocation as unknown as AnyToolInvocation, + startTime: Date.now(), + }; + + const result = await executor.execute({ + call: scheduledCall, + signal: new AbortController().signal, + onUpdateToolCall: vi.fn(), + }); + + expect(unlinkSpy).toHaveBeenCalledWith('/tmp/temp_error.txt'); + expect(result.status).toBe(CoreToolCallStatus.Error); + unlinkSpy.mockRestore(); + }); + + it('should delete temporary file on abort if fullOutputFilePath is provided', async () => { + const unlinkSpy = vi + .spyOn(fsPromises, 'unlink') + .mockResolvedValue(undefined); + const mockTool = new MockTool({ name: 'slowTool' }); + const invocation = mockTool.build({}); + + const controller = new AbortController(); + + vi.mocked(coreToolHookTriggers.executeToolWithHooks).mockImplementation( + async () => { + controller.abort(); + return { + llmContent: 'partial', + returnDisplay: 'partial', + fullOutputFilePath: '/tmp/temp_abort.txt', + }; + }, + ); + + const scheduledCall: ScheduledToolCall = { + status: CoreToolCallStatus.Scheduled, + request: { + callId: 'call-abort', + name: 'slowTool', + args: {}, + isClientInitiated: false, + prompt_id: 'prompt-abort', + }, + tool: mockTool, + invocation: invocation as unknown as AnyToolInvocation, + startTime: Date.now(), + }; + + const result = await executor.execute({ + call: scheduledCall, + signal: controller.signal, + onUpdateToolCall: vi.fn(), + }); + + expect(unlinkSpy).toHaveBeenCalledWith('/tmp/temp_abort.txt'); + expect(result.status).toBe(CoreToolCallStatus.Cancelled); + unlinkSpy.mockRestore(); + }); + it('should report PID updates for shell tools', async () => { // 1. Setup ShellToolInvocation const messageBus = createMockMessageBus(); diff --git a/packages/core/src/scheduler/tool-executor.ts b/packages/core/src/scheduler/tool-executor.ts index 9ae00b24a7..52272f004d 100644 --- a/packages/core/src/scheduler/tool-executor.ts +++ b/packages/core/src/scheduler/tool-executor.ts @@ -4,6 +4,8 @@ * SPDX-License-Identifier: Apache-2.0 */ +import fsPromises from 'node:fs/promises'; +import { debugLogger } from '../utils/debugLogger.js'; import type { ToolCallRequestInfo, ToolCallResponseInfo, @@ -23,6 +25,7 @@ import { executeToolWithHooks } from '../core/coreToolHookTriggers.js'; import { saveTruncatedToolOutput, formatTruncatedToolOutput, + moveToolOutputToFile, } from '../utils/fileUtils.js'; import { convertToFunctionResponse } from '../utils/generateContentResponseUtilities.js'; import type { @@ -119,6 +122,16 @@ export class ToolExecutor { spanMetadata.output = toolResult; if (signal.aborted) { + if (toolResult.fullOutputFilePath) { + await fsPromises + .unlink(toolResult.fullOutputFilePath) + .catch((error) => { + debugLogger.warn( + `Failed to delete temporary tool output file on abort: ${toolResult.fullOutputFilePath}`, + error, + ); + }); + } return this.createCancelledResult( call, 'User cancelled tool execution.', @@ -126,6 +139,16 @@ export class ToolExecutor { } else if (toolResult.error === undefined) { return await this.createSuccessResult(call, toolResult); } else { + if (toolResult.fullOutputFilePath) { + await fsPromises + .unlink(toolResult.fullOutputFilePath) + .catch((error) => { + debugLogger.warn( + `Failed to delete temporary tool output file on error: ${toolResult.fullOutputFilePath}`, + error, + ); + }); + } const displayText = typeof toolResult.returnDisplay === 'string' ? toolResult.returnDisplay @@ -210,7 +233,34 @@ export class ToolExecutor { const toolName = call.request.originalRequestName || call.request.name; const callId = call.request.callId; - if (typeof content === 'string' && toolName === SHELL_TOOL_NAME) { + if (toolResult.fullOutputFilePath) { + const threshold = this.config.getTruncateToolOutputThreshold(); + if ( + threshold > 0 && + typeof content === 'string' && + content.length > threshold + ) { + const { outputFile: savedPath } = await moveToolOutputToFile( + toolResult.fullOutputFilePath, + toolName, + callId, + this.config.storage.getProjectTempDir(), + this.config.getSessionId(), + ); + outputFile = savedPath; + content = formatTruncatedToolOutput(content, outputFile, threshold); + } else { + // If the content is not truncated, we don't need the temporary file. + try { + await fsPromises.unlink(toolResult.fullOutputFilePath); + } catch (error) { + debugLogger.warn( + `Failed to delete temporary tool output file: ${toolResult.fullOutputFilePath}`, + error, + ); + } + } + } else if (typeof content === 'string' && toolName === SHELL_TOOL_NAME) { const threshold = this.config.getTruncateToolOutputThreshold(); if (threshold > 0 && content.length > threshold) { @@ -242,6 +292,7 @@ export class ToolExecutor { callId, content, this.config.getActiveModel(), + outputFile, ); const successResponse: ToolCallResponseInfo = { diff --git a/packages/core/src/services/shellExecutionService.test.ts b/packages/core/src/services/shellExecutionService.test.ts index 61186c9eb2..1285f2ac94 100644 --- a/packages/core/src/services/shellExecutionService.test.ts +++ b/packages/core/src/services/shellExecutionService.test.ts @@ -438,6 +438,7 @@ describe('ShellExecutionService', () => { ptyProcess: mockPtyProcess as any, // eslint-disable-next-line @typescript-eslint/no-explicit-any headlessTerminal: mockHeadlessTerminal as any, + lastCommittedLine: -1, }); }); @@ -679,9 +680,7 @@ describe('ShellExecutionService', () => { pty.onExit.mock.calls[0][0]({ exitCode: 0, signal: null }); }); - expect(result.rawOutput).toEqual( - Buffer.concat([binaryChunk1, binaryChunk2]), - ); + expect(result.rawOutput).toEqual(Buffer.concat([binaryChunk1])); expect(onOutputEventMock).toHaveBeenCalledTimes(4); expect(onOutputEventMock.mock.calls[0][0]).toEqual({ type: 'binary_detected', @@ -1197,9 +1196,7 @@ describe('ShellExecutionService child_process fallback', () => { cp.emit('exit', 0, null); }); - expect(result.rawOutput).toEqual( - Buffer.concat([binaryChunk1, binaryChunk2]), - ); + expect(result.rawOutput).toEqual(Buffer.concat([binaryChunk1])); expect(onOutputEventMock).toHaveBeenCalledTimes(4); expect(onOutputEventMock.mock.calls[0][0]).toEqual({ type: 'binary_detected', diff --git a/packages/core/src/services/shellExecutionService.ts b/packages/core/src/services/shellExecutionService.ts index c21eeb1136..768b4abb41 100644 --- a/packages/core/src/services/shellExecutionService.ts +++ b/packages/core/src/services/shellExecutionService.ts @@ -113,12 +113,24 @@ export interface ShellExecutionConfig { * Describes a structured event emitted during shell command execution. */ export type ShellOutputEvent = + | { + /** The event contains a chunk of raw output string before PTY render. */ + type: 'raw_data'; + /** The raw string chunk. */ + chunk: string; + } | { /** The event contains a chunk of output data. */ type: 'data'; /** The decoded string chunk. */ chunk: string | AnsiOutput; } + | { + /** The event contains a chunk of processed text data suitable for file logs. */ + type: 'file_data'; + /** The processed text chunk. */ + chunk: string; + } | { /** Signals that the output stream has been identified as binary. */ type: 'binary_detected'; @@ -142,6 +154,8 @@ interface ActivePty { ptyProcess: IPty; headlessTerminal: pkg.Terminal; maxSerializedLines?: number; + lastSerializedOutput?: AnsiOutput; + lastCommittedLine: number; } interface ActiveChildProcess { @@ -149,7 +163,6 @@ interface ActiveChildProcess { state: { output: string; truncated: boolean; - outputChunks: Buffer[]; }; } @@ -190,6 +203,54 @@ const getFullBufferText = (terminal: pkg.Terminal): string => { return lines.join('\n'); }; +const emitPendingLines = ( + activePty: ActivePty, + pid: number, + onOutputEvent: (event: ShellOutputEvent) => void, + forceAll = false, +) => { + const buffer = activePty.headlessTerminal.buffer.active; + // If forceAll is true, we emit everything up to the end of the buffer. + // Otherwise, we only emit lines that have scrolled into the backbuffer (baseY). + // This naturally protects against cursor modifications to visible lines. + const limit = forceAll ? buffer.length : buffer.baseY; + + let chunks = ''; + // We start from the line after the last one we committed. + for (let i = activePty.lastCommittedLine + 1; i < limit; i++) { + const line = buffer.getLine(i); + if (!line) continue; + + let trimRight = true; + // Check if the next line is wrapped. If so, this line continues on the next physical line. + // We should not append a newline character, and we should be careful about trimming. + let isNextLineWrapped = false; + if (i + 1 < buffer.length) { + const nextLine = buffer.getLine(i + 1); + if (nextLine?.isWrapped) { + isNextLineWrapped = true; + trimRight = false; + } + } + + const lineContent = line.translateToString(trimRight); + chunks += lineContent; + if (!isNextLineWrapped) { + chunks += '\n'; + } + } + + if (chunks.length > 0) { + const event: ShellOutputEvent = { + type: 'file_data', + chunk: chunks, + }; + onOutputEvent(event); + ShellExecutionService['emitEvent'](pid, event); + activePty.lastCommittedLine = limit - 1; + } +}; + /** * A centralized service for executing shell commands with robust process * management, cross-platform compatibility, and streaming output capabilities. @@ -325,7 +386,6 @@ export class ShellExecutionService { const state = { output: '', truncated: false, - outputChunks: [] as Buffer[], }; if (child.pid) { @@ -348,6 +408,8 @@ export class ShellExecutionService { let isStreamingRawContent = true; const MAX_SNIFF_SIZE = 4096; let sniffedBytes = 0; + const sniffChunks: Buffer[] = []; + let totalBytes = 0; const handleOutput = (data: Buffer, stream: 'stdout' | 'stderr') => { if (!stdoutDecoder || !stderrDecoder) { @@ -361,10 +423,11 @@ export class ShellExecutionService { } } - state.outputChunks.push(data); + totalBytes += data.length; if (isStreamingRawContent && sniffedBytes < MAX_SNIFF_SIZE) { - const sniffBuffer = Buffer.concat(state.outputChunks.slice(0, 20)); + sniffChunks.push(data); + const sniffBuffer = Buffer.concat(sniffChunks); sniffedBytes = sniffBuffer.length; if (isBinary(sniffBuffer)) { @@ -390,18 +453,30 @@ export class ShellExecutionService { } if (decodedChunk) { + const rawEvent: ShellOutputEvent = { + type: 'raw_data', + chunk: decodedChunk, + }; + onOutputEvent(rawEvent); + if (child.pid) + ShellExecutionService.emitEvent(child.pid, rawEvent); + const event: ShellOutputEvent = { type: 'data', chunk: decodedChunk, }; onOutputEvent(event); if (child.pid) ShellExecutionService.emitEvent(child.pid, event); + + const fileEvent: ShellOutputEvent = { + type: 'file_data', + chunk: stripAnsi(decodedChunk), + }; + onOutputEvent(fileEvent); + if (child.pid) + ShellExecutionService.emitEvent(child.pid, fileEvent); } } else { - const totalBytes = state.outputChunks.reduce( - (sum, chunk) => sum + chunk.length, - 0, - ); const event: ShellOutputEvent = { type: 'binary_progress', bytesReceived: totalBytes, @@ -489,6 +564,14 @@ export class ShellExecutionService { // If there's remaining output, we should technically emit it too, // but it's rare to have partial utf8 chars at the very end of stream. if (isStreamingRawContent && remaining) { + const rawEvent: ShellOutputEvent = { + type: 'raw_data', + chunk: remaining, + }; + onOutputEvent(rawEvent); + if (child.pid) + ShellExecutionService.emitEvent(child.pid, rawEvent); + const event: ShellOutputEvent = { type: 'data', chunk: remaining, @@ -496,6 +579,14 @@ export class ShellExecutionService { onOutputEvent(event); if (child.pid) ShellExecutionService.emitEvent(child.pid, event); + + const fileEvent: ShellOutputEvent = { + type: 'file_data', + chunk: stripAnsi(remaining), + }; + onOutputEvent(fileEvent); + if (child.pid) + ShellExecutionService.emitEvent(child.pid, fileEvent); } } } @@ -504,6 +595,14 @@ export class ShellExecutionService { if (remaining) { state.output += remaining; if (isStreamingRawContent && remaining) { + const rawEvent: ShellOutputEvent = { + type: 'raw_data', + chunk: remaining, + }; + onOutputEvent(rawEvent); + if (child.pid) + ShellExecutionService.emitEvent(child.pid, rawEvent); + const event: ShellOutputEvent = { type: 'data', chunk: remaining, @@ -511,11 +610,19 @@ export class ShellExecutionService { onOutputEvent(event); if (child.pid) ShellExecutionService.emitEvent(child.pid, event); + + const fileEvent: ShellOutputEvent = { + type: 'file_data', + chunk: stripAnsi(remaining), + }; + onOutputEvent(fileEvent); + if (child.pid) + ShellExecutionService.emitEvent(child.pid, fileEvent); } } } - const finalBuffer = Buffer.concat(state.outputChunks); + const finalBuffer = Buffer.concat(sniffChunks); return { finalBuffer }; } @@ -603,12 +710,13 @@ export class ShellExecutionService { ptyProcess, headlessTerminal, maxSerializedLines: shellExecutionConfig.maxSerializedLines, + lastCommittedLine: -1, }); - let processingChain = Promise.resolve(); let decoder: TextDecoder | null = null; let output: string | AnsiOutput | null = null; - const outputChunks: Buffer[] = []; + const sniffChunks: Buffer[] = []; + let totalBytes = 0; const error: Error | null = null; let exited = false; @@ -622,6 +730,11 @@ export class ShellExecutionService { const renderFn = () => { renderTimeout = null; + const activePty = this.activePtys.get(ptyProcess.pid); + if (activePty) { + emitPendingLines(activePty, ptyProcess.pid, onOutputEvent); + } + if (!isStreamingRawContent) { return; } @@ -663,6 +776,10 @@ export class ShellExecutionService { ); } + if (activePty) { + activePty.lastSerializedOutput = newOutput; + } + let lastNonEmptyLine = -1; for (let i = newOutput.length - 1; i >= 0; i--) { const line = newOutput[i]; @@ -720,66 +837,110 @@ export class ShellExecutionService { }, 68); }; - headlessTerminal.onScroll(() => { + let lastYdisp = 0; + let hasReachedMax = false; + const scrollbackLimit = + shellExecutionConfig.scrollback ?? SCROLLBACK_LIMIT; + + headlessTerminal.onScroll((ydisp) => { if (!isWriting) { render(); } + + if ( + ydisp === scrollbackLimit && + lastYdisp === scrollbackLimit && + hasReachedMax + ) { + const activePty = this.activePtys.get(ptyProcess.pid); + if (activePty) { + activePty.lastCommittedLine--; + } + } + if ( + ydisp === scrollbackLimit && + headlessTerminal.buffer.active.length === scrollbackLimit + rows + ) { + hasReachedMax = true; + } + lastYdisp = ydisp; + + // Emit pending lines immediately on scroll so we never lose lines + // that are about to fall off the top of the scrollback limit. + const activePtyForEmit = this.activePtys.get(ptyProcess.pid); + if (activePtyForEmit) { + emitPendingLines(activePtyForEmit, ptyProcess.pid, onOutputEvent); + } }); + let pendingWrites = 0; + let exitTrigger: (() => void) | null = null; + const handleOutput = (data: Buffer) => { - processingChain = processingChain.then( - () => - new Promise((resolve) => { - if (!decoder) { - const encoding = getCachedEncodingForBuffer(data); - try { - decoder = new TextDecoder(encoding); - } catch { - decoder = new TextDecoder('utf-8'); - } + if (!decoder) { + const encoding = getCachedEncodingForBuffer(data); + try { + decoder = new TextDecoder(encoding); + } catch { + decoder = new TextDecoder('utf-8'); + } + } + + totalBytes += data.length; + + if (isStreamingRawContent && sniffedBytes < MAX_SNIFF_SIZE) { + sniffChunks.push(data); + const sniffBuffer = Buffer.concat(sniffChunks); + sniffedBytes = sniffBuffer.length; + + if (isBinary(sniffBuffer)) { + isStreamingRawContent = false; + const event: ShellOutputEvent = { type: 'binary_detected' }; + onOutputEvent(event); + ShellExecutionService.emitEvent(ptyProcess.pid, event); + } + } + + if (isStreamingRawContent) { + const decodedChunk = decoder.decode(data, { stream: true }); + if (decodedChunk.length === 0) { + return; + } + isWriting = true; + pendingWrites++; + + // Emit raw_data for file streaming before pty render + const rawEvent: ShellOutputEvent = { + type: 'raw_data', + chunk: decodedChunk, + }; + onOutputEvent(rawEvent); + ShellExecutionService.emitEvent(ptyProcess.pid, rawEvent); + + headlessTerminal.write(decodedChunk, () => { + pendingWrites--; + + const activePty = this.activePtys.get(ptyProcess.pid); + if (activePty) { + emitPendingLines(activePty, ptyProcess.pid, onOutputEvent); + } + + render(); + if (pendingWrites === 0) { + isWriting = false; + if (exited && exitTrigger) { + exitTrigger(); } - - outputChunks.push(data); - - if (isStreamingRawContent && sniffedBytes < MAX_SNIFF_SIZE) { - const sniffBuffer = Buffer.concat(outputChunks.slice(0, 20)); - sniffedBytes = sniffBuffer.length; - - if (isBinary(sniffBuffer)) { - isStreamingRawContent = false; - const event: ShellOutputEvent = { type: 'binary_detected' }; - onOutputEvent(event); - ShellExecutionService.emitEvent(ptyProcess.pid, event); - } - } - - if (isStreamingRawContent) { - const decodedChunk = decoder.decode(data, { stream: true }); - if (decodedChunk.length === 0) { - resolve(); - return; - } - isWriting = true; - headlessTerminal.write(decodedChunk, () => { - render(); - isWriting = false; - resolve(); - }); - } else { - const totalBytes = outputChunks.reduce( - (sum, chunk) => sum + chunk.length, - 0, - ); - const event: ShellOutputEvent = { - type: 'binary_progress', - bytesReceived: totalBytes, - }; - onOutputEvent(event); - ShellExecutionService.emitEvent(ptyProcess.pid, event); - resolve(); - } - }), - ); + } + }); + } else { + const event: ShellOutputEvent = { + type: 'binary_progress', + bytesReceived: totalBytes, + }; + onOutputEvent(event); + ShellExecutionService.emitEvent(ptyProcess.pid, event); + } }; ptyProcess.onData((data: string) => { @@ -791,7 +952,6 @@ export class ShellExecutionService { ({ exitCode, signal }: { exitCode: number; signal?: number }) => { exited = true; abortSignal.removeEventListener('abort', abortHandler); - this.activePtys.delete(ptyProcess.pid); // Attempt to destroy the PTY to ensure FD is closed try { // eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion @@ -803,6 +963,16 @@ export class ShellExecutionService { const finalize = () => { render(true); + const activePty = this.activePtys.get(ptyProcess.pid); + if (activePty) { + emitPendingLines( + activePty, + ptyProcess.pid, + onOutputEvent, + true, + ); + } + // Store exit info for late subscribers (e.g. backgrounding race condition) this.exitedPtyInfo.set(ptyProcess.pid, { exitCode, signal }); setTimeout( @@ -824,7 +994,7 @@ export class ShellExecutionService { ShellExecutionService.emitEvent(ptyProcess.pid, event); this.activeListeners.delete(ptyProcess.pid); - const finalBuffer = Buffer.concat(outputChunks); + const finalBuffer = Buffer.concat(sniffChunks); resolve({ rawOutput: finalBuffer, @@ -837,6 +1007,8 @@ export class ShellExecutionService { pid: ptyProcess.pid, executionMethod: ptyInfo?.name ?? 'node-pty', }); + + headlessTerminal.dispose(); }; if (abortSignal.aborted) { @@ -844,21 +1016,26 @@ export class ShellExecutionService { return; } - const processingComplete = processingChain.then(() => 'processed'); - const abortFired = new Promise<'aborted'>((res) => { - if (abortSignal.aborted) { - res('aborted'); - return; + let abortListener: (() => void) | undefined; + const finalizeWhenReady = () => { + if (abortListener) { + abortSignal.removeEventListener('abort', abortListener); } - abortSignal.addEventListener('abort', () => res('aborted'), { + finalize(); + }; + + if (pendingWrites === 0) { + finalizeWhenReady(); + } else { + exitTrigger = finalizeWhenReady; + abortListener = () => { + exitTrigger = null; + finalizeWhenReady(); + }; + abortSignal.addEventListener('abort', abortListener, { once: true, }); - }); - - // eslint-disable-next-line @typescript-eslint/no-floating-promises - Promise.race([processingComplete, abortFired]).then(() => { - finalize(); - }); + } }, ); @@ -1082,13 +1259,18 @@ export class ShellExecutionService { const endLine = activePty.headlessTerminal.buffer.active.length; const startLine = Math.max( 0, - endLine - (activePty.maxSerializedLines ?? 2000), + endLine - + (activePty.maxSerializedLines ?? + activePty.headlessTerminal.rows + 1000), ); const bufferData = serializeTerminalToObject( activePty.headlessTerminal, startLine, endLine, ); + + activePty.lastSerializedOutput = bufferData; + if (bufferData && bufferData.length > 0) { listener({ type: 'data', chunk: bufferData }); } @@ -1149,13 +1331,18 @@ export class ShellExecutionService { const endLine = activePty.headlessTerminal.buffer.active.length; const startLine = Math.max( 0, - endLine - (activePty.maxSerializedLines ?? 2000), + endLine - + (activePty.maxSerializedLines ?? + activePty.headlessTerminal.rows + 1000), ); const bufferData = serializeTerminalToObject( activePty.headlessTerminal, startLine, endLine, ); + + activePty.lastSerializedOutput = bufferData; + const event: ShellOutputEvent = { type: 'data', chunk: bufferData }; const listeners = ShellExecutionService.activeListeners.get(pid); if (listeners) { diff --git a/packages/core/src/services/toolOutputMaskingService.test.ts b/packages/core/src/services/toolOutputMaskingService.test.ts index 1187a28ae1..a06d689896 100644 --- a/packages/core/src/services/toolOutputMaskingService.test.ts +++ b/packages/core/src/services/toolOutputMaskingService.test.ts @@ -662,4 +662,82 @@ describe('ToolOutputMaskingService', () => { )['output'], ).toContain(MASKING_INDICATOR_TAG); }); + + it('should use existing outputFile if available in the tool response', async () => { + // Setup: Create a large history to trigger masking + const largeContent = 'a'.repeat(60000); + const existingOutputFile = path.join(testTempDir, 'truly_full_output.txt'); + await fs.promises.writeFile(existingOutputFile, 'truly full content'); + + const history: Content[] = [ + { + role: 'user', + parts: [{ text: 'Old turn' }], + }, + { + role: 'model', + parts: [ + { + functionResponse: { + name: 'shell', + id: 'call-1', + response: { + output: largeContent, + outputFile: existingOutputFile, + }, + }, + }, + ], + }, + // Protection buffer + { + role: 'user', + parts: [ + { + functionResponse: { + name: 'padding', + response: { output: 'B'.repeat(60000) }, + }, + }, + ], + }, + { + role: 'user', + parts: [{ text: 'Newest turn' }], + }, + ]; + + mockedEstimateTokenCountSync.mockImplementation((parts: Part[]) => { + const resp = parts[0].functionResponse?.response as Record< + string, + unknown + >; + const content = (resp?.['output'] as string) ?? JSON.stringify(resp); + if (content.includes(`<${MASKING_INDICATOR_TAG}`)) return 100; + + const name = parts[0].functionResponse?.name; + if (name === 'shell') return 60000; + if (name === 'padding') return 60000; + return 10; + }); + + // Trigger masking + const result = await service.mask(history, mockConfig); + + expect(result.maskedCount).toBe(2); + const maskedPart = result.newHistory[1].parts![0]; + const maskedResponse = maskedPart.functionResponse?.response as Record< + string, + unknown + >; + const maskedOutput = maskedResponse['output'] as string; + + // Verify the masked snippet points to the existing file + expect(maskedOutput).toContain( + `Full output available at: ${existingOutputFile}`, + ); + + // Verify the path in maskedOutput is exactly the one we provided + expect(maskedOutput).toContain(existingOutputFile); + }); }); diff --git a/packages/core/src/services/toolOutputMaskingService.ts b/packages/core/src/services/toolOutputMaskingService.ts index 8a7ae0090d..e275983db5 100644 --- a/packages/core/src/services/toolOutputMaskingService.ts +++ b/packages/core/src/services/toolOutputMaskingService.ts @@ -179,25 +179,47 @@ export class ToolOutputMaskingService { const toolName = part.functionResponse.name || 'unknown_tool'; const callId = part.functionResponse.id || Date.now().toString(); - const safeToolName = sanitizeFilenamePart(toolName).toLowerCase(); - const safeCallId = sanitizeFilenamePart(callId).toLowerCase(); - const fileName = `${safeToolName}_${safeCallId}_${Math.random() - .toString(36) - .substring(7)}.txt`; - const filePath = path.join(toolOutputsDir, fileName); - - await fsPromises.writeFile(filePath, content, 'utf-8'); const originalResponse = // eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion (part.functionResponse.response as Record) || {}; - const totalLines = content.split('\n').length; - const fileSizeMB = ( - Buffer.byteLength(content, 'utf8') / - 1024 / - 1024 - ).toFixed(2); + let filePath = ''; + let fileSizeMB = '0.00'; + let totalLines = 0; + + if ( + typeof originalResponse['outputFile'] === 'string' && + originalResponse['outputFile'] + ) { + filePath = originalResponse['outputFile']; + try { + const stats = await fsPromises.stat(filePath); + fileSizeMB = (stats.size / 1024 / 1024).toFixed(2); + // For truly full files, we don't count lines as it's too slow. + // We just indicate it's the full file. + totalLines = -1; + } catch { + // Fallback if file is gone + filePath = ''; + } + } + + if (!filePath) { + const safeToolName = sanitizeFilenamePart(toolName).toLowerCase(); + const safeCallId = sanitizeFilenamePart(callId).toLowerCase(); + const fileName = `${safeToolName}_${safeCallId}_${Math.random() + .toString(36) + .substring(7)}.txt`; + filePath = path.join(toolOutputsDir, fileName); + + await fsPromises.writeFile(filePath, content, 'utf-8'); + + totalLines = content.split('\n').length; + fileSizeMB = (Buffer.byteLength(content, 'utf8') / 1024 / 1024).toFixed( + 2, + ); + } let preview = ''; if (toolName === SHELL_TOOL_NAME) { diff --git a/packages/core/src/tools/shell.test.ts b/packages/core/src/tools/shell.test.ts index 907d117439..7c47053afb 100644 --- a/packages/core/src/tools/shell.test.ts +++ b/packages/core/src/tools/shell.test.ts @@ -102,6 +102,7 @@ describe('ShellTool', () => { getDebugMode: vi.fn().mockReturnValue(false), getTargetDir: vi.fn().mockReturnValue(tempRootDir), getSummarizeToolOutputConfig: vi.fn().mockReturnValue(undefined), + getTruncateToolOutputThreshold: vi.fn().mockReturnValue(40000), getWorkspaceContext: vi .fn() .mockReturnValue(new WorkspaceContext(tempRootDir)), diff --git a/packages/core/src/tools/shell.ts b/packages/core/src/tools/shell.ts index 741272f555..fbab896261 100644 --- a/packages/core/src/tools/shell.ts +++ b/packages/core/src/tools/shell.ts @@ -5,24 +5,23 @@ */ import fsPromises from 'node:fs/promises'; +import fs from 'node:fs'; import path from 'node:path'; import os, { EOL } from 'node:os'; import crypto from 'node:crypto'; import type { Config } from '../config/config.js'; -import { debugLogger } from '../index.js'; +import { debugLogger } from '../utils/debugLogger.js'; import { ToolErrorType } from './tool-error.js'; -import type { - ToolInvocation, - ToolResult, - ToolCallConfirmationDetails, - ToolExecuteConfirmationDetails, - PolicyUpdateOptions, -} from './tools.js'; import { BaseDeclarativeTool, BaseToolInvocation, ToolConfirmationOutcome, Kind, + type ToolInvocation, + type ToolResult, + type ToolCallConfirmationDetails, + type ToolExecuteConfirmationDetails, + type PolicyUpdateOptions, } from './tools.js'; import { getErrorMessage } from '../utils/errors.js'; @@ -177,6 +176,12 @@ export class ShellToolInvocation extends BaseToolInvocation< const onAbort = () => combinedController.abort(); + const outputFileName = `gemini_shell_output_${crypto.randomBytes(6).toString('hex')}.log`; + const outputFilePath = path.join(os.tmpdir(), outputFileName); + const outputStream = fs.createWriteStream(outputFilePath); + + let fullOutputReturned = false; + try { // pgrep is not available on Windows, so we can't get background PIDs const commandToExecute = isWindows @@ -206,6 +211,7 @@ export class ShellToolInvocation extends BaseToolInvocation< let cumulativeOutput: string | AnsiOutput = ''; let lastUpdateTime = Date.now(); let isBinaryStream = false; + let totalBytesWritten = 0; const resetTimeout = () => { if (timeoutMs <= 0) { @@ -231,31 +237,46 @@ export class ShellToolInvocation extends BaseToolInvocation< cwd, (event: ShellOutputEvent) => { resetTimeout(); // Reset timeout on any event - if (!updateOutput) { - return; - } - - let shouldUpdate = false; switch (event.type) { + case 'raw_data': + // We do not write raw data to the file to avoid spurious escape codes. + // We rely on 'file_data' for the clean output stream. + break; + case 'file_data': + if (!isBinaryStream) { + totalBytesWritten += Buffer.byteLength(event.chunk); + outputStream.write(event.chunk); + } + break; case 'data': if (isBinaryStream) break; cumulativeOutput = event.chunk; - shouldUpdate = true; + if (updateOutput && !this.params.is_background) { + updateOutput(cumulativeOutput); + lastUpdateTime = Date.now(); + } break; case 'binary_detected': isBinaryStream = true; cumulativeOutput = '[Binary output detected. Halting stream...]'; - shouldUpdate = true; + if (updateOutput && !this.params.is_background) { + updateOutput(cumulativeOutput); + } break; case 'binary_progress': isBinaryStream = true; cumulativeOutput = `[Receiving binary output... ${formatBytes( event.bytesReceived, )} received]`; - if (Date.now() - lastUpdateTime > OUTPUT_UPDATE_INTERVAL_MS) { - shouldUpdate = true; + if ( + updateOutput && + !this.params.is_background && + Date.now() - lastUpdateTime > OUTPUT_UPDATE_INTERVAL_MS + ) { + updateOutput(cumulativeOutput); + lastUpdateTime = Date.now(); } break; case 'exit': @@ -264,11 +285,6 @@ export class ShellToolInvocation extends BaseToolInvocation< throw new Error('An unhandled ShellOutputEvent was found.'); } } - - if (shouldUpdate && !this.params.is_background) { - updateOutput(cumulativeOutput); - lastUpdateTime = Date.now(); - } }, combinedController.signal, this.config.getEnableInteractiveShell(), @@ -295,6 +311,9 @@ export class ShellToolInvocation extends BaseToolInvocation< } const result = await resultPromise; + await new Promise((resolve) => { + outputStream.end(resolve); + }); const backgroundPIDs: number[] = []; if (os.platform() !== 'win32') { @@ -427,21 +446,46 @@ export class ShellToolInvocation extends BaseToolInvocation< this.config.getGeminiClient(), signal, ); - return { + const threshold = this.config.getTruncateToolOutputThreshold(); + const fullOutputFilePath = + threshold > 0 && totalBytesWritten >= threshold + ? outputFilePath + : undefined; + + const toolResult: ToolResult = { llmContent: summary, returnDisplay: returnDisplayMessage, + fullOutputFilePath, ...executionError, }; + if (toolResult.fullOutputFilePath) { + fullOutputReturned = true; + } + return toolResult; } - return { + const threshold = this.config.getTruncateToolOutputThreshold(); + const fullOutputFilePath = + threshold > 0 && totalBytesWritten >= threshold + ? outputFilePath + : undefined; + + const toolResult: ToolResult = { llmContent, returnDisplay: returnDisplayMessage, data, + fullOutputFilePath, ...executionError, }; + if (toolResult.fullOutputFilePath) { + fullOutputReturned = true; + } + return toolResult; } finally { if (timeoutTimer) clearTimeout(timeoutTimer); + if (!outputStream.closed) { + outputStream.destroy(); + } signal.removeEventListener('abort', onAbort); timeoutController.signal.removeEventListener('abort', onAbort); try { @@ -449,6 +493,13 @@ export class ShellToolInvocation extends BaseToolInvocation< } catch { // Ignore errors during unlink } + if (!fullOutputReturned) { + try { + await fsPromises.unlink(outputFilePath); + } catch { + // Ignore errors during unlink + } + } } } } diff --git a/packages/core/src/tools/tools.ts b/packages/core/src/tools/tools.ts index d847b596e0..bbaaadb1dc 100644 --- a/packages/core/src/tools/tools.ts +++ b/packages/core/src/tools/tools.ts @@ -594,6 +594,13 @@ export interface ToolResult { name: string; args: Record; }; + + /** + * Optional path to a file containing the full, non-truncated output of the tool. + * If provided, the scheduler may use this file for long-term storage and + * reference it in the conversation history if the output is truncated. + */ + fullOutputFilePath?: string; } /** diff --git a/packages/core/src/utils/fileUtils.ts b/packages/core/src/utils/fileUtils.ts index 42119c3f18..85306e28ae 100644 --- a/packages/core/src/utils/fileUtils.ts +++ b/packages/core/src/utils/fileUtils.ts @@ -615,6 +615,48 @@ ${head} ${tail}`; } +/** + * Moves tool output from a source path to a temporary file for later retrieval. + */ +export async function moveToolOutputToFile( + sourcePath: string, + toolName: string, + id: string | number, // Accept string (callId) or number (truncationId) + projectTempDir: string, + sessionId?: string, +): Promise<{ outputFile: string }> { + const safeToolName = sanitizeFilenamePart(toolName).toLowerCase(); + const safeId = sanitizeFilenamePart(id.toString()).toLowerCase(); + const fileName = safeId.startsWith(safeToolName) + ? `${safeId}.txt` + : `${safeToolName}_${safeId}.txt`; + + let toolOutputDir = path.join(projectTempDir, TOOL_OUTPUTS_DIR); + if (sessionId) { + const safeSessionId = sanitizeFilenamePart(sessionId); + toolOutputDir = path.join(toolOutputDir, `session-${safeSessionId}`); + } + const outputFile = path.join(toolOutputDir, fileName); + + await fsPromises.mkdir(toolOutputDir, { recursive: true }); + + try { + // Attempt rename (efficient if on the same filesystem) + await fsPromises.rename(sourcePath, outputFile); + } catch (error: unknown) { + // If rename fails (e.g. cross-filesystem), copy and then delete + // eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion + if ((error as { code?: string }).code === 'EXDEV') { + await fsPromises.copyFile(sourcePath, outputFile); + await fsPromises.unlink(sourcePath); + } else { + throw error; + } + } + + return { outputFile }; +} + /** * Saves tool output to a temporary file for later retrieval. */ diff --git a/packages/core/src/utils/generateContentResponseUtilities.ts b/packages/core/src/utils/generateContentResponseUtilities.ts index fdd5dff81a..d558213c9c 100644 --- a/packages/core/src/utils/generateContentResponseUtilities.ts +++ b/packages/core/src/utils/generateContentResponseUtilities.ts @@ -21,12 +21,13 @@ function createFunctionResponsePart( callId: string, toolName: string, output: string, + outputFile?: string, ): Part { return { functionResponse: { id: callId, name: toolName, - response: { output }, + response: { output, outputFile }, }, }; } @@ -48,9 +49,12 @@ export function convertToFunctionResponse( callId: string, llmContent: PartListUnion, model: string, + outputFile?: string, ): Part[] { if (typeof llmContent === 'string') { - return [createFunctionResponsePart(callId, toolName, llmContent)]; + return [ + createFunctionResponsePart(callId, toolName, llmContent, outputFile), + ]; } const parts = toParts(llmContent); @@ -92,7 +96,10 @@ export function convertToFunctionResponse( functionResponse: { id: callId, name: toolName, - response: textParts.length > 0 ? { output: textParts.join('\n') } : {}, + response: { + ...(textParts.length > 0 ? { output: textParts.join('\n') } : {}), + outputFile, + }, }, };