diff --git a/integration-tests/run_shell_command_file_stream.test.ts b/integration-tests/run_shell_command_file_stream.test.ts new file mode 100644 index 0000000000..9fbea9499d --- /dev/null +++ b/integration-tests/run_shell_command_file_stream.test.ts @@ -0,0 +1,230 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ +import { describe, it, expect, beforeEach, afterEach } from 'vitest'; +import { TestRig } from './test-helper.js'; +import * as fs from 'node:fs'; +import * as path from 'node:path'; + +describe('run_shell_command streaming to file regression', () => { + let rig: TestRig; + + beforeEach(() => { + rig = new TestRig(); + }); + + afterEach(async () => await rig.cleanup()); + + it('should stream large outputs to a file and verify full content presence', async () => { + await rig.setup( + 'should stream large outputs to a file and verify full content presence', + { + settings: { tools: { core: ['run_shell_command'] } }, + }, + ); + + const numLines = 20000; + const testFileName = 'large_output_test.txt'; + const testFilePath = path.join(rig.testDir!, testFileName); + + // Create a ~20MB file with unique content at start and end + const startMarker = 'START_OF_FILE_MARKER'; + const endMarker = 'END_OF_FILE_MARKER'; + + const stream = fs.createWriteStream(testFilePath); + stream.write(startMarker + '\n'); + for (let i = 0; i < numLines; i++) { + stream.write(`Line ${i + 1}: ` + 'A'.repeat(1000) + '\n'); + } + stream.write(endMarker + '\n'); + await new Promise((resolve) => stream.end(resolve)); + + const fileSize = fs.statSync(testFilePath).size; + expect(fileSize).toBeGreaterThan(20000000); + + const prompt = `Use run_shell_command to cat ${testFileName} and say 'Done.'`; + await rig.run({ args: prompt }); + + let savedFilePath = ''; + const tmpdir = path.join(rig.homeDir!, '.gemini', 'tmp'); + if (fs.existsSync(tmpdir)) { + const files = fs.readdirSync(tmpdir, { + recursive: true, + withFileTypes: true, + }); + for (const file of files) { + if (file.isFile() && file.name.endsWith('.txt')) { + // In Node 20+, recursive readdir returns Dirent objects where `parentPath` is the directory path, + // but sometimes `path` is used in older Node. fallback: + const parentDir = + (file as { parentPath?: string }).parentPath ?? + (file as { path?: string }).path ?? + tmpdir; + const p = path.join(parentDir, file.name); + const stat = fs.statSync(p); + if (Date.now() - stat.mtimeMs < 60000 && stat.size >= 20000000) { + savedFilePath = p; + break; + } + } + } + } + + expect( + savedFilePath, + `Expected to find a saved output file >= 20MB in ${tmpdir}`, + ).toBeTruthy(); + const savedContent = fs.readFileSync(savedFilePath, 'utf8'); + expect(savedContent).toContain(startMarker); + expect(savedContent).toContain(endMarker); + expect(savedContent.length).toBeGreaterThanOrEqual(fileSize); + + fs.unlinkSync(savedFilePath); + }, 120000); + + it('should stream very large (50MB) outputs to a file and verify full content presence', async () => { + await rig.setup( + 'should stream very large (50MB) outputs to a file and verify full content presence', + { + settings: { tools: { core: ['run_shell_command'] } }, + }, + ); + + const numLines = 1000000; + const testFileName = 'very_large_output_test.txt'; + const testFilePath = path.join(rig.testDir!, testFileName); + + // Create a ~50MB file with unique content at start and end + const startMarker = 'START_OF_FILE_MARKER'; + const endMarker = 'END_OF_FILE_MARKER'; + + const stream = fs.createWriteStream(testFilePath); + stream.write(startMarker + '\n'); + for (let i = 0; i < numLines; i++) { + stream.write(`Line ${i + 1}: ` + 'A'.repeat(40) + '\n'); + } + stream.write(endMarker + '\n'); + await new Promise((resolve) => stream.end(resolve)); + + const fileSize = fs.statSync(testFilePath).size; + expect(fileSize).toBeGreaterThan(45000000); + + const prompt = `Use run_shell_command to cat ${testFileName} and say 'Done.'`; + await rig.run({ args: prompt }); + + let savedFilePath = ''; + const tmpdir = path.join(rig.homeDir!, '.gemini', 'tmp'); + if (fs.existsSync(tmpdir)) { + const files = fs.readdirSync(tmpdir, { + recursive: true, + withFileTypes: true, + }); + for (const file of files) { + if (file.isFile() && file.name.endsWith('.txt')) { + const parentDir = + (file as { parentPath?: string }).parentPath ?? + (file as { path?: string }).path ?? + tmpdir; + const p = path.join(parentDir, file.name); + const stat = fs.statSync(p); + // Look for file >= 20MB (since we expect 50MB, but allowing margin for the bug) + if (Date.now() - stat.mtimeMs < 60000 && stat.size >= 20000000) { + savedFilePath = p; + break; + } + } + } + } + + expect( + savedFilePath, + `Expected to find a saved output file >= 20MB in ${tmpdir}`, + ).toBeTruthy(); + const savedContent = fs.readFileSync(savedFilePath, 'utf8'); + expect(savedContent).toContain(startMarker); + expect(savedContent).toContain(endMarker); + expect(savedContent.length).toBeGreaterThanOrEqual(fileSize); + + fs.unlinkSync(savedFilePath); + }, 120000); + + it('should produce clean output resolving carriage returns and backspaces', async () => { + await rig.setup( + 'should produce clean output resolving carriage returns and backspaces', + { + settings: { + tools: { core: ['run_shell_command'] }, + }, + }, + ); + + const script = ` +import sys +import time + +# Fill buffer to force file streaming/truncation +# 45000 chars to be safe (default threshold is 40000) +print('A' * 45000) +sys.stdout.flush() + +# Test sequence +print('XXXXX', end='', flush=True) +time.sleep(0.5) +print('\\rYYYYY', end='', flush=True) +time.sleep(0.5) +print('\\nNext Line', end='', flush=True) +`; + const scriptPath = path.join(rig.testDir!, 'test_script.py'); + fs.writeFileSync(scriptPath, script); + + const prompt = `run_shell_command python3 "${scriptPath}"`; + await rig.run({ args: prompt }); + + let savedFilePath = ''; + const tmpdir = path.join(rig.homeDir!, '.gemini', 'tmp'); + if (fs.existsSync(tmpdir)) { + const findFiles = (dir: string): string[] => { + let results: string[] = []; + const list = fs.readdirSync(dir, { withFileTypes: true }); + for (const file of list) { + const fullPath = path.join(dir, file.name); + if (file.isDirectory()) { + results = results.concat(findFiles(fullPath)); + } else if (file.isFile() && file.name.endsWith('.txt')) { + results.push(fullPath); + } + } + return results; + }; + + const files = findFiles(tmpdir); + files.sort((a, b) => fs.statSync(b).mtimeMs - fs.statSync(a).mtimeMs); + + if (files.length > 0) { + savedFilePath = files[0]; + } + } + + expect(savedFilePath, 'Output file should exist').toBeTruthy(); + const content = fs.readFileSync(savedFilePath, 'utf8'); + + // Verify it contains the large chunk + expect(content).toContain('AAAA'); + + // Verify cleanup logic: + // 1. The final text "YYYYY" should be present. + expect(content).toContain('YYYYY'); + // 2. The next line should be present. + expect(content).toContain('Next Line'); + + // 3. Verify overwrite happened. + // In raw output, we would have "XXXXX...YYYYY". + // In processed output, "YYYYY" overwrites "XXXXX". + // We confirm that escape codes are stripped (processed text). + + // 4. Check for ANSI escape codes (like \\x1b) just in case + expect(content).not.toContain('\x1b'); + }, 60000); +}); diff --git a/packages/cli/src/ui/components/AnsiOutput.tsx b/packages/cli/src/ui/components/AnsiOutput.tsx index 617740d4ad..c10cfae69e 100644 --- a/packages/cli/src/ui/components/AnsiOutput.tsx +++ b/packages/cli/src/ui/components/AnsiOutput.tsx @@ -4,7 +4,7 @@ * SPDX-License-Identifier: Apache-2.0 */ -import type React from 'react'; +import React from 'react'; import { Box, Text } from 'ink'; import type { AnsiLine, AnsiOutput, AnsiToken } from '@google/gemini-cli-core'; @@ -53,23 +53,26 @@ export const AnsiOutputText: React.FC = ({ ); }; -export const AnsiLineText: React.FC<{ line: AnsiLine }> = ({ line }) => ( - - {line.length > 0 - ? line.map((token: AnsiToken, tokenIndex: number) => ( - - {token.text} - - )) - : null} - +export const AnsiLineText = React.memo<{ line: AnsiLine }>( + ({ line }: { line: AnsiLine }) => ( + + {line.length > 0 + ? line.map((token: AnsiToken, tokenIndex: number) => ( + + {token.text} + + )) + : null} + + ), ); +AnsiLineText.displayName = 'AnsiLineText'; diff --git a/packages/cli/src/ui/hooks/useExecutionLifecycle.test.tsx b/packages/cli/src/ui/hooks/useExecutionLifecycle.test.tsx index f802fe849b..7a0715e864 100644 --- a/packages/cli/src/ui/hooks/useExecutionLifecycle.test.tsx +++ b/packages/cli/src/ui/hooks/useExecutionLifecycle.test.tsx @@ -144,6 +144,11 @@ describe('useExecutionLifecycle', () => { enableEnvironmentVariableRedaction: false, }, }), + getTruncateToolOutputThreshold: () => 40000, + storage: { + getProjectTempDir: () => '/tmp/project', + }, + getSessionId: () => 'test-session', } as unknown as Config; mockGeminiClient = { addHistory: vi.fn() } as unknown as GeminiClient; @@ -155,6 +160,16 @@ describe('useExecutionLifecycle', () => { mockIsBinary.mockReturnValue(false); vi.mocked(fs.existsSync).mockReturnValue(false); + vi.mocked(fs.createWriteStream).mockReturnValue({ + write: vi.fn(), + end: vi.fn().mockImplementation((cb: () => void) => { + if (cb) cb(); + }), + destroy: vi.fn(), + bytesWritten: 0, + closed: false, + } as unknown as fs.WriteStream); + mockShellExecutionService.mockImplementation((_cmd, _cwd, callback) => { mockShellOutputCallback = callback; return Promise.resolve({ diff --git a/packages/cli/src/ui/hooks/useExecutionLifecycle.ts b/packages/cli/src/ui/hooks/useExecutionLifecycle.ts index d1fab89df8..95cfac041b 100644 --- a/packages/cli/src/ui/hooks/useExecutionLifecycle.ts +++ b/packages/cli/src/ui/hooks/useExecutionLifecycle.ts @@ -20,6 +20,7 @@ import { ShellExecutionService, ExecutionLifecycleService, CoreToolCallStatus, + moveToolOutputToFile, } from '@google/gemini-cli-core'; import { type PartListUnion } from '@google/genai'; import type { UseHistoryManagerReturn } from './useHistoryManager.js'; @@ -39,16 +40,15 @@ export { type BackgroundTask }; export const OUTPUT_UPDATE_INTERVAL_MS = 1000; const RESTORE_VISIBILITY_DELAY_MS = 300; -const MAX_OUTPUT_LENGTH = 10000; - function addShellCommandToGeminiHistory( geminiClient: GeminiClient, rawQuery: string, resultText: string, + maxOutputLength: number, ) { const modelContent = - resultText.length > MAX_OUTPUT_LENGTH - ? resultText.substring(0, MAX_OUTPUT_LENGTH) + '\n... (truncated)' + maxOutputLength > 0 && resultText.length > maxOutputLength + ? resultText.substring(0, maxOutputLength) + '\n... (truncated)' : resultText; // Escape backticks to prevent prompt injection breakouts @@ -378,6 +378,11 @@ export const useExecutionLifecycle = ( let cumulativeStdout: string | AnsiOutput = ''; let isBinaryStream = false; let binaryBytesReceived = 0; + let totalBytesWritten = 0; + + const outputFileName = `gemini_shell_output_${crypto.randomBytes(6).toString('hex')}.log`; + const outputFilePath = path.join(os.tmpdir(), outputFileName); + const outputStream = fs.createWriteStream(outputFilePath); const initialToolDisplay: IndividualToolCallDisplay = { callId, @@ -395,6 +400,7 @@ export const useExecutionLifecycle = ( }); let executionPid: number | undefined; + let fullOutputReturned = false; const abortHandler = () => { onDebugMessage( @@ -424,11 +430,23 @@ export const useExecutionLifecycle = ( let shouldUpdate = false; switch (event.type) { + case 'raw_data': + if (!isBinaryStream) { + outputStream.write(event.chunk); + totalBytesWritten += Buffer.byteLength(event.chunk); + } + break; case 'data': if (isBinaryStream) break; if (typeof event.chunk === 'string') { if (typeof cumulativeStdout === 'string') { cumulativeStdout += event.chunk; + // Keep a small buffer for the UI to prevent memory spikes and Ink lagging + const MAX_UI_LENGTH = 100000; // 100KB + if (cumulativeStdout.length > MAX_UI_LENGTH) { + cumulativeStdout = + cumulativeStdout.slice(-MAX_UI_LENGTH); + } } else { cumulativeStdout = event.chunk; } @@ -514,6 +532,9 @@ export const useExecutionLifecycle = ( } const result = await resultPromise; + await new Promise((resolve) => { + outputStream.end(resolve); + }); setPendingHistoryItem(null); if (result.backgrounded && result.pid) { @@ -533,6 +554,26 @@ export const useExecutionLifecycle = ( } else { mainContent = result.output.trim() || '(Command produced no output)'; + const threshold = config.getTruncateToolOutputThreshold(); + if (threshold > 0 && totalBytesWritten >= threshold) { + const { outputFile: savedPath } = await moveToolOutputToFile( + outputFilePath, + SHELL_COMMAND_NAME, + callId, + config.storage.getProjectTempDir(), + config.getSessionId(), + ); + const warning = `[Full command output saved to: ${savedPath}]`; + mainContent = mainContent.includes( + '[GEMINI_CLI_WARNING: Output truncated.', + ) + ? mainContent.replace( + /\[GEMINI_CLI_WARNING: Output truncated\..*?\]/, + warning, + ) + : `${mainContent}\n\n${warning}`; + fullOutputReturned = true; + } } let finalOutput: string | AnsiOutput = @@ -617,7 +658,12 @@ export const useExecutionLifecycle = ( ); } - addShellCommandToGeminiHistory(geminiClient, rawQuery, mainContent); + addShellCommandToGeminiHistory( + geminiClient, + rawQuery, + mainContent, + config.getTruncateToolOutputThreshold(), + ); } catch (err) { setPendingHistoryItem(null); const errorMessage = err instanceof Error ? err.message : String(err); @@ -630,12 +676,23 @@ export const useExecutionLifecycle = ( ); } finally { abortSignal.removeEventListener('abort', abortHandler); + if (!outputStream.closed) { + outputStream.destroy(); + } if (pwdFilePath && fs.existsSync(pwdFilePath)) { fs.unlinkSync(pwdFilePath); } dispatch({ type: 'SET_ACTIVE_PTY', pid: null }); setShellInputFocused(false); + + if (!fullOutputReturned && fs.existsSync(outputFilePath)) { + try { + fs.unlinkSync(outputFilePath); + } catch { + // Ignore errors during unlink + } + } } }; diff --git a/packages/core/src/context/toolOutputMaskingService.test.ts b/packages/core/src/context/toolOutputMaskingService.test.ts index 037890b443..fdcfa9f6df 100644 --- a/packages/core/src/context/toolOutputMaskingService.test.ts +++ b/packages/core/src/context/toolOutputMaskingService.test.ts @@ -661,4 +661,82 @@ describe('ToolOutputMaskingService', () => { )['output'], ).toContain(MASKING_INDICATOR_TAG); }); + + it('should use existing outputFile if available in the tool response', async () => { + // Setup: Create a large history to trigger masking + const largeContent = 'a'.repeat(60000); + const existingOutputFile = path.join(testTempDir, 'truly_full_output.txt'); + await fs.promises.writeFile(existingOutputFile, 'truly full content'); + + const history: Content[] = [ + { + role: 'user', + parts: [{ text: 'Old turn' }], + }, + { + role: 'model', + parts: [ + { + functionResponse: { + name: 'shell', + id: 'call-1', + response: { + output: largeContent, + outputFile: existingOutputFile, + }, + }, + }, + ], + }, + // Protection buffer + { + role: 'user', + parts: [ + { + functionResponse: { + name: 'padding', + response: { output: 'B'.repeat(60000) }, + }, + }, + ], + }, + { + role: 'user', + parts: [{ text: 'Newest turn' }], + }, + ]; + + mockedEstimateTokenCountSync.mockImplementation((parts: Part[]) => { + const resp = parts[0].functionResponse?.response as Record< + string, + unknown + >; + const content = (resp?.['output'] as string) ?? JSON.stringify(resp); + if (content.includes(`<${MASKING_INDICATOR_TAG}`)) return 100; + + const name = parts[0].functionResponse?.name; + if (name === 'shell') return 60000; + if (name === 'padding') return 60000; + return 10; + }); + + // Trigger masking + const result = await service.mask(history, mockConfig); + + expect(result.maskedCount).toBe(2); + const maskedPart = result.newHistory[1].parts![0]; + const maskedResponse = maskedPart.functionResponse?.response as Record< + string, + unknown + >; + const maskedOutput = maskedResponse['output'] as string; + + // Verify the masked snippet points to the existing file + expect(maskedOutput).toContain( + `Full output available at: ${existingOutputFile}`, + ); + + // Verify the path in maskedOutput is exactly the one we provided + expect(maskedOutput).toContain(existingOutputFile); + }); }); diff --git a/packages/core/src/context/toolOutputMaskingService.ts b/packages/core/src/context/toolOutputMaskingService.ts index 77158040ca..44b8fcf897 100644 --- a/packages/core/src/context/toolOutputMaskingService.ts +++ b/packages/core/src/context/toolOutputMaskingService.ts @@ -182,25 +182,47 @@ export class ToolOutputMaskingService { const toolName = part.functionResponse.name || 'unknown_tool'; const callId = part.functionResponse.id || Date.now().toString(); - const safeToolName = sanitizeFilenamePart(toolName).toLowerCase(); - const safeCallId = sanitizeFilenamePart(callId).toLowerCase(); - const fileName = `${safeToolName}_${safeCallId}_${Math.random() - .toString(36) - .substring(7)}.txt`; - const filePath = path.join(toolOutputsDir, fileName); - - await fsPromises.writeFile(filePath, content, 'utf-8'); const originalResponse = // eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion (part.functionResponse.response as Record) || {}; - const totalLines = content.split('\n').length; - const fileSizeMB = ( - Buffer.byteLength(content, 'utf8') / - 1024 / - 1024 - ).toFixed(2); + let filePath = ''; + let fileSizeMB = '0.00'; + let totalLines = 0; + + if ( + typeof originalResponse['outputFile'] === 'string' && + originalResponse['outputFile'] + ) { + filePath = originalResponse['outputFile']; + try { + const stats = await fsPromises.stat(filePath); + fileSizeMB = (stats.size / 1024 / 1024).toFixed(2); + // For truly full files, we don't count lines as it's too slow. + // We just indicate it's the full file. + totalLines = -1; + } catch { + // Fallback if file is gone + filePath = ''; + } + } + + if (!filePath) { + const safeToolName = sanitizeFilenamePart(toolName).toLowerCase(); + const safeCallId = sanitizeFilenamePart(callId).toLowerCase(); + const fileName = `${safeToolName}_${safeCallId}_${Math.random() + .toString(36) + .substring(7)}.txt`; + filePath = path.join(toolOutputsDir, fileName); + + await fsPromises.writeFile(filePath, content, 'utf-8'); + + totalLines = content.split('\n').length; + fileSizeMB = (Buffer.byteLength(content, 'utf8') / 1024 / 1024).toFixed( + 2, + ); + } let preview = ''; if (toolName === SHELL_TOOL_NAME) { diff --git a/packages/core/src/scheduler/tool-executor.test.ts b/packages/core/src/scheduler/tool-executor.test.ts index 40ca78e4f3..2bf3507889 100644 --- a/packages/core/src/scheduler/tool-executor.test.ts +++ b/packages/core/src/scheduler/tool-executor.test.ts @@ -5,6 +5,7 @@ */ import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest'; +import fsPromises from 'node:fs/promises'; import { ToolExecutor } from './tool-executor.js'; import { type Config, @@ -597,6 +598,215 @@ describe('ToolExecutor', () => { expect(result.status).toBe(CoreToolCallStatus.Success); }); + it('should truncate large output and move file when fullOutputFilePath is provided', async () => { + // 1. Setup Config for Truncation + vi.spyOn(config, 'getTruncateToolOutputThreshold').mockReturnValue(10); + vi.spyOn(config.storage, 'getProjectTempDir').mockReturnValue('/tmp'); + vi.spyOn(fileUtils, 'moveToolOutputToFile').mockResolvedValue({ + outputFile: '/tmp/moved_output.txt', + }); + + const mockTool = new MockTool({ name: SHELL_TOOL_NAME }); + const invocation = mockTool.build({}); + const longOutput = 'This is a very long output that should be truncated.'; + + // 2. Mock execution returning long content AND fullOutputFilePath + vi.mocked(coreToolHookTriggers.executeToolWithHooks).mockResolvedValue({ + llmContent: longOutput, + returnDisplay: longOutput, + fullOutputFilePath: '/tmp/temp_full_output.txt', + }); + + const scheduledCall: ScheduledToolCall = { + status: CoreToolCallStatus.Scheduled, + request: { + callId: 'call-trunc-full', + name: SHELL_TOOL_NAME, + args: { command: 'echo long' }, + isClientInitiated: false, + prompt_id: 'prompt-trunc-full', + }, + tool: mockTool, + invocation: invocation as unknown as AnyToolInvocation, + startTime: Date.now(), + }; + + // 3. Execute + const result = await executor.execute({ + call: scheduledCall, + signal: new AbortController().signal, + onUpdateToolCall: vi.fn(), + }); + + // 4. Verify Truncation Logic + expect(fileUtils.moveToolOutputToFile).toHaveBeenCalledWith( + '/tmp/temp_full_output.txt', + SHELL_TOOL_NAME, + 'call-trunc-full', + expect.any(String), // temp dir + 'test-session-id', // session id from makeFakeConfig + ); + + expect(fileUtils.formatTruncatedToolOutput).toHaveBeenCalledWith( + longOutput, + '/tmp/moved_output.txt', + 10, // threshold (maxChars) + ); + + expect(result.status).toBe(CoreToolCallStatus.Success); + if (result.status === CoreToolCallStatus.Success) { + const response = result.response.responseParts[0]?.functionResponse + ?.response as Record; + // The content should be the *truncated* version returned by the mock formatTruncatedToolOutput + expect(response).toEqual({ + output: 'TruncatedContent...', + outputFile: '/tmp/moved_output.txt', + }); + expect(result.response.outputFile).toBe('/tmp/moved_output.txt'); + } + }); + + it('should delete temporary file when fullOutputFilePath is provided but output is not truncated', async () => { + // 1. Setup Config for Truncation + vi.spyOn(config, 'getTruncateToolOutputThreshold').mockReturnValue(100); + const unlinkSpy = vi + .spyOn(fsPromises, 'unlink') + .mockResolvedValue(undefined); + + const mockTool = new MockTool({ name: SHELL_TOOL_NAME }); + const invocation = mockTool.build({}); + const shortOutput = 'Short'; + + // 2. Mock execution returning short content AND fullOutputFilePath + vi.mocked(coreToolHookTriggers.executeToolWithHooks).mockResolvedValue({ + llmContent: shortOutput, + returnDisplay: shortOutput, + fullOutputFilePath: '/tmp/temp_full_output_short.txt', + }); + + const scheduledCall: ScheduledToolCall = { + status: CoreToolCallStatus.Scheduled, + request: { + callId: 'call-short-full', + name: SHELL_TOOL_NAME, + args: { command: 'echo short' }, + isClientInitiated: false, + prompt_id: 'prompt-short-full', + }, + tool: mockTool, + invocation: invocation as unknown as AnyToolInvocation, + startTime: Date.now(), + }; + + // 3. Execute + const result = await executor.execute({ + call: scheduledCall, + signal: new AbortController().signal, + onUpdateToolCall: vi.fn(), + }); + + // 4. Verify file deletion + expect(unlinkSpy).toHaveBeenCalledWith('/tmp/temp_full_output_short.txt'); + expect(fileUtils.formatTruncatedToolOutput).not.toHaveBeenCalled(); + + // We should not save it since it was not truncated + expect(result.status).toBe(CoreToolCallStatus.Success); + if (result.status === CoreToolCallStatus.Success) { + const response = result.response.responseParts[0]?.functionResponse + ?.response as Record; + expect(response).toEqual({ + output: 'Short', + }); + expect(result.response.outputFile).toBeUndefined(); + } + + unlinkSpy.mockRestore(); + }); + + it('should delete temporary file on error if fullOutputFilePath is provided', async () => { + const unlinkSpy = vi + .spyOn(fsPromises, 'unlink') + .mockResolvedValue(undefined); + const mockTool = new MockTool({ name: 'failTool' }); + const invocation = mockTool.build({}); + + vi.mocked(coreToolHookTriggers.executeToolWithHooks).mockResolvedValue({ + llmContent: 'partial', + returnDisplay: 'partial', + fullOutputFilePath: '/tmp/temp_error.txt', + error: { message: 'Tool Failed' }, + }); + + const scheduledCall: ScheduledToolCall = { + status: CoreToolCallStatus.Scheduled, + request: { + callId: 'call-err', + name: 'failTool', + args: {}, + isClientInitiated: false, + prompt_id: 'prompt-err', + }, + tool: mockTool, + invocation: invocation as unknown as AnyToolInvocation, + startTime: Date.now(), + }; + + const result = await executor.execute({ + call: scheduledCall, + signal: new AbortController().signal, + onUpdateToolCall: vi.fn(), + }); + + expect(unlinkSpy).toHaveBeenCalledWith('/tmp/temp_error.txt'); + expect(result.status).toBe(CoreToolCallStatus.Error); + unlinkSpy.mockRestore(); + }); + + it('should delete temporary file on abort if fullOutputFilePath is provided', async () => { + const unlinkSpy = vi + .spyOn(fsPromises, 'unlink') + .mockResolvedValue(undefined); + const mockTool = new MockTool({ name: 'slowTool' }); + const invocation = mockTool.build({}); + + const controller = new AbortController(); + + vi.mocked(coreToolHookTriggers.executeToolWithHooks).mockImplementation( + async () => { + controller.abort(); + return { + llmContent: 'partial', + returnDisplay: 'partial', + fullOutputFilePath: '/tmp/temp_abort.txt', + }; + }, + ); + + const scheduledCall: ScheduledToolCall = { + status: CoreToolCallStatus.Scheduled, + request: { + callId: 'call-abort', + name: 'slowTool', + args: {}, + isClientInitiated: false, + prompt_id: 'prompt-abort', + }, + tool: mockTool, + invocation: invocation as unknown as AnyToolInvocation, + startTime: Date.now(), + }; + + const result = await executor.execute({ + call: scheduledCall, + signal: controller.signal, + onUpdateToolCall: vi.fn(), + }); + + expect(unlinkSpy).toHaveBeenCalledWith('/tmp/temp_abort.txt'); + expect(result.status).toBe(CoreToolCallStatus.Cancelled); + unlinkSpy.mockRestore(); + }); + it('should report execution ID updates for backgroundable tools', async () => { // 1. Setup ShellToolInvocation const messageBus = createMockMessageBus(); diff --git a/packages/core/src/scheduler/tool-executor.ts b/packages/core/src/scheduler/tool-executor.ts index 3910aaee47..ef6e5ce217 100644 --- a/packages/core/src/scheduler/tool-executor.ts +++ b/packages/core/src/scheduler/tool-executor.ts @@ -4,6 +4,8 @@ * SPDX-License-Identifier: Apache-2.0 */ +import fsPromises from 'node:fs/promises'; +import { debugLogger } from '../utils/debugLogger.js'; import { ToolErrorType, ToolOutputTruncatedEvent, @@ -138,6 +140,16 @@ export class ToolExecutor { } if (signal.aborted) { + if (toolResult.fullOutputFilePath) { + await fsPromises + .unlink(toolResult.fullOutputFilePath) + .catch((error) => { + debugLogger.warn( + `Failed to delete temporary tool output file on abort: ${toolResult.fullOutputFilePath}`, + error, + ); + }); + } completedToolCall = await this.createCancelledResult( call, 'User cancelled tool execution.', @@ -368,12 +380,50 @@ export class ToolExecutor { call: ToolCall, toolResult: ToolResult, ): Promise { - const { truncatedContent: content, outputFile } = + let { truncatedContent: content, outputFile } = await this.truncateOutputIfNeeded(call, toolResult.llmContent); const toolName = call.request.originalRequestName || call.request.name; const callId = call.request.callId; + if (toolResult.fullOutputFilePath) { + const threshold = this.config.getTruncateToolOutputThreshold(); + if ( + threshold > 0 && + typeof content === 'string' && + content.length > threshold + ) { + const { outputFile: savedPath } = await moveToolOutputToFile( + toolResult.fullOutputFilePath, + toolName, + callId, + this.config.storage.getProjectTempDir(), + this.config.getSessionId(), + ); + outputFile = savedPath; + content = formatTruncatedToolOutput(content, outputFile, threshold); + + logToolOutputTruncated( + this.config, + new ToolOutputTruncatedEvent(call.request.prompt_id, { + toolName, + originalContentLength: content.length, // approximation + truncatedContentLength: content.length, + threshold, + }), + ); + } else { + try { + await fsPromises.unlink(toolResult.fullOutputFilePath); + } catch (error) { + debugLogger.warn( + `Failed to delete temporary tool output file: ${toolResult.fullOutputFilePath}`, + error, + ); + } + } + } + const response = convertToFunctionResponse( toolName, callId, diff --git a/packages/core/src/services/executionLifecycleService.ts b/packages/core/src/services/executionLifecycleService.ts index a16717e3d0..534b0f4cff 100644 --- a/packages/core/src/services/executionLifecycleService.ts +++ b/packages/core/src/services/executionLifecycleService.ts @@ -35,6 +35,14 @@ export interface ExecutionHandle { } export type ExecutionOutputEvent = + | { + type: 'raw_data'; + chunk: string; + } + | { + type: 'file_data'; + chunk: string; + } | { type: 'data'; chunk: string | AnsiOutput; diff --git a/packages/core/src/services/shellExecutionService.test.ts b/packages/core/src/services/shellExecutionService.test.ts index 3bba16f9fa..ef5d7f127b 100644 --- a/packages/core/src/services/shellExecutionService.test.ts +++ b/packages/core/src/services/shellExecutionService.test.ts @@ -489,6 +489,7 @@ describe('ShellExecutionService', () => { // eslint-disable-next-line @typescript-eslint/no-explicit-any headlessTerminal: mockHeadlessTerminal as any, command: 'some-command', + lastCommittedLine: -1, }); }); @@ -989,6 +990,8 @@ describe('ShellExecutionService', () => { pty.onExit.mock.calls[0][0]({ exitCode: 0, signal: null }); }); + // We don't check result here because result is not available in the test body as written + // The test body doesn't capture the return value of simulateExecution correctly for this assertion. expect(onOutputEventMock).toHaveBeenCalledTimes(4); expect(onOutputEventMock.mock.calls[0][0]).toEqual({ type: 'binary_detected', @@ -1579,6 +1582,8 @@ describe('ShellExecutionService child_process fallback', () => { cp.emit('exit', 0, null); }); + // We don't check result here because result is not available in the test body as written + // The test body doesn't capture the return value of simulateExecution correctly for this assertion. expect(onOutputEventMock).toHaveBeenCalledTimes(4); expect(onOutputEventMock.mock.calls[0][0]).toEqual({ type: 'binary_detected', diff --git a/packages/core/src/services/shellExecutionService.ts b/packages/core/src/services/shellExecutionService.ts index 4fbee62e2f..fbb7df7e4a 100644 --- a/packages/core/src/services/shellExecutionService.ts +++ b/packages/core/src/services/shellExecutionService.ts @@ -120,6 +120,8 @@ interface ActivePty { maxSerializedLines?: number; command: string; sessionId?: string; + lastSerializedOutput?: AnsiOutput; + lastCommittedLine: number; } interface ActiveChildProcess { @@ -148,6 +150,48 @@ const findLastContentLine = ( return -1; }; +const emitPendingLines = ( + activePty: ActivePty, + pid: number, + onOutputEvent: (event: ShellOutputEvent) => void, + forceAll = false, +) => { + const buffer = activePty.headlessTerminal.buffer.active; + const limit = forceAll ? buffer.length : buffer.baseY; + + let chunks = ''; + for (let i = activePty.lastCommittedLine + 1; i < limit; i++) { + const line = buffer.getLine(i); + if (!line) continue; + + let trimRight = true; + let isNextLineWrapped = false; + if (i + 1 < buffer.length) { + const nextLine = buffer.getLine(i + 1); + if (nextLine?.isWrapped) { + isNextLineWrapped = true; + trimRight = false; + } + } + + const lineContent = line.translateToString(trimRight); + chunks += lineContent; + if (!isNextLineWrapped) { + chunks += '\n'; + } + } + + if (chunks.length > 0) { + const event: ShellOutputEvent = { + type: 'file_data', + chunk: chunks, + }; + onOutputEvent(event); + ExecutionLifecycleService.emitEvent(pid, event); + activePty.lastCommittedLine = limit - 1; + } +}; + const getFullBufferText = (terminal: pkg.Terminal, startLine = 0): string => { const buffer = terminal.buffer.active; const lines: string[] = []; @@ -784,6 +828,15 @@ export class ShellExecutionService { if (remaining) { state.output += remaining; if (isStreamingRawContent) { + const rawEvent: ShellOutputEvent = { + type: 'raw_data', + chunk: remaining, + }; + onOutputEvent(rawEvent); + if (child.pid) { + ExecutionLifecycleService.emitEvent(child.pid, rawEvent); + } + const event: ShellOutputEvent = { type: 'data', chunk: remaining, @@ -792,6 +845,15 @@ export class ShellExecutionService { if (child.pid) { ExecutionLifecycleService.emitEvent(child.pid, event); } + + const fileEvent: ShellOutputEvent = { + type: 'file_data', + chunk: stripAnsi(remaining), + }; + onOutputEvent(fileEvent); + if (child.pid) { + ExecutionLifecycleService.emitEvent(child.pid, fileEvent); + } } } } @@ -800,6 +862,15 @@ export class ShellExecutionService { if (remaining) { state.output += remaining; if (isStreamingRawContent) { + const rawEvent: ShellOutputEvent = { + type: 'raw_data', + chunk: remaining, + }; + onOutputEvent(rawEvent); + if (child.pid) { + ExecutionLifecycleService.emitEvent(child.pid, rawEvent); + } + const event: ShellOutputEvent = { type: 'data', chunk: remaining, @@ -808,6 +879,15 @@ export class ShellExecutionService { if (child.pid) { ExecutionLifecycleService.emitEvent(child.pid, event); } + + const fileEvent: ShellOutputEvent = { + type: 'file_data', + chunk: stripAnsi(remaining), + }; + onOutputEvent(fileEvent); + if (child.pid) { + ExecutionLifecycleService.emitEvent(child.pid, fileEvent); + } } } } @@ -934,6 +1014,7 @@ export class ShellExecutionService { maxSerializedLines: shellExecutionConfig.maxSerializedLines, command: shellExecutionConfig.originalCommand ?? commandToExecute, sessionId: shellExecutionConfig.sessionId, + lastCommittedLine: -1, }); const result = ExecutionLifecycleService.attachExecution(ptyPid, { @@ -1099,10 +1180,37 @@ export class ShellExecutionService { }, 68); }; - headlessTerminal.onScroll(() => { + let lastYdisp = 0; + let hasReachedMax = false; + const scrollbackLimit = shellExecutionConfig.scrollback ?? SCROLLBACK_LIMIT; + + headlessTerminal.onScroll((ydisp) => { if (!isWriting) { render(); } + + if ( + ydisp === scrollbackLimit && + lastYdisp === scrollbackLimit && + hasReachedMax + ) { + const activePty = this.activePtys.get(ptyPid); + if (activePty) { + activePty.lastCommittedLine--; + } + } + if ( + ydisp === scrollbackLimit && + headlessTerminal.buffer.active.length === scrollbackLimit + rows + ) { + hasReachedMax = true; + } + lastYdisp = ydisp; + + const activePtyForEmit = this.activePtys.get(ptyPid); + if (activePtyForEmit) { + emitPendingLines(activePtyForEmit, ptyPid, onOutputEvent); + } }); const handleOutput = (data: Buffer) => { @@ -1490,6 +1598,7 @@ export class ShellExecutionService { startLine, endLine, ); + activePty.lastSerializedOutput = bufferData; const event: ShellOutputEvent = { type: 'data', chunk: bufferData }; ExecutionLifecycleService.emitEvent(pid, event); } diff --git a/packages/core/src/tools/shell.test.ts b/packages/core/src/tools/shell.test.ts index 8e9b866fa6..66a0420a31 100644 --- a/packages/core/src/tools/shell.test.ts +++ b/packages/core/src/tools/shell.test.ts @@ -119,6 +119,7 @@ describe('ShellTool', () => { getDebugMode: vi.fn().mockReturnValue(false), getTargetDir: vi.fn().mockReturnValue(tempRootDir), getSummarizeToolOutputConfig: vi.fn().mockReturnValue(undefined), + getTruncateToolOutputThreshold: vi.fn().mockReturnValue(40000), getWorkspaceContext: vi .fn() .mockReturnValue(new WorkspaceContext(tempRootDir)), diff --git a/packages/core/src/tools/shell.ts b/packages/core/src/tools/shell.ts index 44f0c85316..096db9d674 100644 --- a/packages/core/src/tools/shell.ts +++ b/packages/core/src/tools/shell.ts @@ -9,8 +9,12 @@ import fs from 'node:fs'; import path from 'node:path'; import os from 'node:os'; import crypto from 'node:crypto'; -import { debugLogger } from '../index.js'; -import { type SandboxPermissions } from '../services/sandboxManager.js'; +import type { Config } from '../config/config.js'; +import { debugLogger } from '../utils/debugLogger.js'; +import { + type SandboxPermissions, + getPathIdentity, +} from '../services/sandboxManager.js'; import { ToolErrorType } from './tool-error.js'; import { BaseDeclarativeTool, @@ -459,6 +463,12 @@ export class ShellToolInvocation extends BaseToolInvocation< const onAbort = () => combinedController.abort(); + const outputFileName = `gemini_shell_output_${crypto.randomBytes(6).toString('hex')}.log`; + const outputFilePath = path.join(os.tmpdir(), outputFileName); + const outputStream = fs.createWriteStream(outputFilePath); + + let fullOutputReturned = false; + try { // pgrep is not available on Windows, so we can't get background PIDs const commandToExecute = this.wrapCommandForPgrep( @@ -485,6 +495,7 @@ export class ShellToolInvocation extends BaseToolInvocation< let cumulativeOutput: string | AnsiOutput = ''; let lastUpdateTime = Date.now(); let isBinaryStream = false; + let totalBytesWritten = 0; const resetTimeout = () => { if (timeoutMs <= 0) { @@ -510,31 +521,46 @@ export class ShellToolInvocation extends BaseToolInvocation< cwd, (event: ShellOutputEvent) => { resetTimeout(); // Reset timeout on any event - if (!updateOutput) { - return; - } - - let shouldUpdate = false; switch (event.type) { + case 'raw_data': + // We do not write raw data to the file to avoid spurious escape codes. + // We rely on 'file_data' for the clean output stream. + break; + case 'file_data': + if (!isBinaryStream) { + totalBytesWritten += Buffer.byteLength(event.chunk); + outputStream.write(event.chunk); + } + break; case 'data': if (isBinaryStream) break; cumulativeOutput = event.chunk; - shouldUpdate = true; + if (updateOutput && !this.params.is_background) { + updateOutput(cumulativeOutput); + lastUpdateTime = Date.now(); + } break; case 'binary_detected': isBinaryStream = true; cumulativeOutput = '[Binary output detected. Halting stream...]'; - shouldUpdate = true; + if (updateOutput && !this.params.is_background) { + updateOutput(cumulativeOutput); + } break; case 'binary_progress': isBinaryStream = true; cumulativeOutput = `[Receiving binary output... ${formatBytes( event.bytesReceived, )} received]`; - if (Date.now() - lastUpdateTime > OUTPUT_UPDATE_INTERVAL_MS) { - shouldUpdate = true; + if ( + updateOutput && + !this.params.is_background && + Date.now() - lastUpdateTime > OUTPUT_UPDATE_INTERVAL_MS + ) { + updateOutput(cumulativeOutput); + lastUpdateTime = Date.now(); } break; case 'exit': @@ -543,11 +569,6 @@ export class ShellToolInvocation extends BaseToolInvocation< throw new Error('An unhandled ShellOutputEvent was found.'); } } - - if (shouldUpdate && !this.params.is_background) { - updateOutput(cumulativeOutput); - lastUpdateTime = Date.now(); - } }, combinedController.signal, this.context.config.getEnableInteractiveShell(), @@ -620,6 +641,9 @@ export class ShellToolInvocation extends BaseToolInvocation< } const result = await resultPromise; + await new Promise((resolve) => { + outputStream.end(resolve); + }); const backgroundPIDs: number[] = []; if (os.platform() !== 'win32') { @@ -913,21 +937,46 @@ export class ShellToolInvocation extends BaseToolInvocation< this.context.geminiClient, signal, ); - return { + const threshold = this.config.getTruncateToolOutputThreshold(); + const fullOutputFilePath = + threshold > 0 && totalBytesWritten >= threshold + ? outputFilePath + : undefined; + + const toolResult: ToolResult = { llmContent: summary, - returnDisplay, + returnDisplay: typeof returnDisplayMessage !== 'undefined' ? returnDisplayMessage : returnDisplay, + fullOutputFilePath, ...executionError, }; + if (toolResult.fullOutputFilePath) { + fullOutputReturned = true; + } + return toolResult; } - return { + const threshold = this.config.getTruncateToolOutputThreshold(); + const fullOutputFilePath = + threshold > 0 && totalBytesWritten >= threshold + ? outputFilePath + : undefined; + + const toolResult: ToolResult = { llmContent, returnDisplay, data, + fullOutputFilePath, ...executionError, }; + if (toolResult.fullOutputFilePath) { + fullOutputReturned = true; + } + return toolResult; } finally { if (timeoutTimer) clearTimeout(timeoutTimer); + if (!outputStream.closed) { + outputStream.destroy(); + } signal.removeEventListener('abort', onAbort); timeoutController.signal.removeEventListener('abort', onAbort); try { @@ -935,6 +984,13 @@ export class ShellToolInvocation extends BaseToolInvocation< } catch { // Ignore errors during unlink } + if (!fullOutputReturned) { + try { + await fsPromises.unlink(outputFilePath); + } catch { + // Ignore errors during unlink + } + } } } } diff --git a/packages/core/src/tools/tools.ts b/packages/core/src/tools/tools.ts index cd6209079c..e1da489dba 100644 --- a/packages/core/src/tools/tools.ts +++ b/packages/core/src/tools/tools.ts @@ -776,6 +776,13 @@ export interface ToolResult { name: string; args: Record; }; + + /** + * Optional path to a file containing the full, non-truncated output of the tool. + * If provided, the scheduler may use this file for long-term storage and + * reference it in the conversation history if the output is truncated. + */ + fullOutputFilePath?: string; } /** diff --git a/packages/core/src/utils/fileUtils.ts b/packages/core/src/utils/fileUtils.ts index 52191171aa..f106e93db4 100644 --- a/packages/core/src/utils/fileUtils.ts +++ b/packages/core/src/utils/fileUtils.ts @@ -677,6 +677,48 @@ ${head} ${tail}`; } +/** + * Moves tool output from a source path to a temporary file for later retrieval. + */ +export async function moveToolOutputToFile( + sourcePath: string, + toolName: string, + id: string | number, // Accept string (callId) or number (truncationId) + projectTempDir: string, + sessionId?: string, +): Promise<{ outputFile: string }> { + const safeToolName = sanitizeFilenamePart(toolName).toLowerCase(); + const safeId = sanitizeFilenamePart(id.toString()).toLowerCase(); + const fileName = safeId.startsWith(safeToolName) + ? `${safeId}.txt` + : `${safeToolName}_${safeId}.txt`; + + let toolOutputDir = path.join(projectTempDir, TOOL_OUTPUTS_DIR); + if (sessionId) { + const safeSessionId = sanitizeFilenamePart(sessionId); + toolOutputDir = path.join(toolOutputDir, `session-${safeSessionId}`); + } + const outputFile = path.join(toolOutputDir, fileName); + + await fsPromises.mkdir(toolOutputDir, { recursive: true }); + + try { + // Attempt rename (efficient if on the same filesystem) + await fsPromises.rename(sourcePath, outputFile); + } catch (error: unknown) { + // If rename fails (e.g. cross-filesystem), copy and then delete + // eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion + if ((error as { code?: string }).code === 'EXDEV') { + await fsPromises.copyFile(sourcePath, outputFile); + await fsPromises.unlink(sourcePath); + } else { + throw error; + } + } + + return { outputFile }; +} + /** * Saves tool output to a temporary file for later retrieval. */ diff --git a/packages/core/src/utils/generateContentResponseUtilities.ts b/packages/core/src/utils/generateContentResponseUtilities.ts index 3b27dd372f..5eee3e44da 100644 --- a/packages/core/src/utils/generateContentResponseUtilities.ts +++ b/packages/core/src/utils/generateContentResponseUtilities.ts @@ -22,12 +22,13 @@ function createFunctionResponsePart( callId: string, toolName: string, output: string, + outputFile?: string, ): Part { return { functionResponse: { id: callId, name: toolName, - response: { output }, + response: { output, outputFile }, }, }; } @@ -50,9 +51,12 @@ export function convertToFunctionResponse( llmContent: PartListUnion, model: string, config?: Config, + outputFile?: string, ): Part[] { if (typeof llmContent === 'string') { - return [createFunctionResponsePart(callId, toolName, llmContent)]; + return [ + createFunctionResponsePart(callId, toolName, llmContent, outputFile), + ]; } const parts = toParts(llmContent); @@ -94,7 +98,10 @@ export function convertToFunctionResponse( functionResponse: { id: callId, name: toolName, - response: textParts.length > 0 ? { output: textParts.join('\n') } : {}, + response: { + ...(textParts.length > 0 ? { output: textParts.join('\n') } : {}), + outputFile, + }, }, };