Checkpoint of shell optimization

fix(cli): Write shell command output to a file and limit memory buffered in UI

Fixes.

Checkpoint.

fix(core, cli): await outputStream.end() to prevent race conditions

This commit fixes a critical race condition where
was called synchronously without being awaited. This led to potential file
truncation or EBUSY errors on Windows when attempting to manipulate the file
immediately after the  call.

Additionally, this change removes fixed wait times (`setTimeout`) that
were previously used in test files as a band-aid.

fix(core): stream processed xterm output to file to remove spurious escape codes

test(core): update shell regression tests to use file_data events
This commit is contained in:
jacob314
2026-02-19 11:12:13 -08:00
committed by Spencer
parent 986293bd38
commit 859c7c3a70
16 changed files with 959 additions and 62 deletions
@@ -5,6 +5,7 @@
*/
import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
import fsPromises from 'node:fs/promises';
import { ToolExecutor } from './tool-executor.js';
import {
type Config,
@@ -597,6 +598,215 @@ describe('ToolExecutor', () => {
expect(result.status).toBe(CoreToolCallStatus.Success);
});
it('should truncate large output and move file when fullOutputFilePath is provided', async () => {
// 1. Setup Config for Truncation
vi.spyOn(config, 'getTruncateToolOutputThreshold').mockReturnValue(10);
vi.spyOn(config.storage, 'getProjectTempDir').mockReturnValue('/tmp');
vi.spyOn(fileUtils, 'moveToolOutputToFile').mockResolvedValue({
outputFile: '/tmp/moved_output.txt',
});
const mockTool = new MockTool({ name: SHELL_TOOL_NAME });
const invocation = mockTool.build({});
const longOutput = 'This is a very long output that should be truncated.';
// 2. Mock execution returning long content AND fullOutputFilePath
vi.mocked(coreToolHookTriggers.executeToolWithHooks).mockResolvedValue({
llmContent: longOutput,
returnDisplay: longOutput,
fullOutputFilePath: '/tmp/temp_full_output.txt',
});
const scheduledCall: ScheduledToolCall = {
status: CoreToolCallStatus.Scheduled,
request: {
callId: 'call-trunc-full',
name: SHELL_TOOL_NAME,
args: { command: 'echo long' },
isClientInitiated: false,
prompt_id: 'prompt-trunc-full',
},
tool: mockTool,
invocation: invocation as unknown as AnyToolInvocation,
startTime: Date.now(),
};
// 3. Execute
const result = await executor.execute({
call: scheduledCall,
signal: new AbortController().signal,
onUpdateToolCall: vi.fn(),
});
// 4. Verify Truncation Logic
expect(fileUtils.moveToolOutputToFile).toHaveBeenCalledWith(
'/tmp/temp_full_output.txt',
SHELL_TOOL_NAME,
'call-trunc-full',
expect.any(String), // temp dir
'test-session-id', // session id from makeFakeConfig
);
expect(fileUtils.formatTruncatedToolOutput).toHaveBeenCalledWith(
longOutput,
'/tmp/moved_output.txt',
10, // threshold (maxChars)
);
expect(result.status).toBe(CoreToolCallStatus.Success);
if (result.status === CoreToolCallStatus.Success) {
const response = result.response.responseParts[0]?.functionResponse
?.response as Record<string, unknown>;
// The content should be the *truncated* version returned by the mock formatTruncatedToolOutput
expect(response).toEqual({
output: 'TruncatedContent...',
outputFile: '/tmp/moved_output.txt',
});
expect(result.response.outputFile).toBe('/tmp/moved_output.txt');
}
});
it('should delete temporary file when fullOutputFilePath is provided but output is not truncated', async () => {
// 1. Setup Config for Truncation
vi.spyOn(config, 'getTruncateToolOutputThreshold').mockReturnValue(100);
const unlinkSpy = vi
.spyOn(fsPromises, 'unlink')
.mockResolvedValue(undefined);
const mockTool = new MockTool({ name: SHELL_TOOL_NAME });
const invocation = mockTool.build({});
const shortOutput = 'Short';
// 2. Mock execution returning short content AND fullOutputFilePath
vi.mocked(coreToolHookTriggers.executeToolWithHooks).mockResolvedValue({
llmContent: shortOutput,
returnDisplay: shortOutput,
fullOutputFilePath: '/tmp/temp_full_output_short.txt',
});
const scheduledCall: ScheduledToolCall = {
status: CoreToolCallStatus.Scheduled,
request: {
callId: 'call-short-full',
name: SHELL_TOOL_NAME,
args: { command: 'echo short' },
isClientInitiated: false,
prompt_id: 'prompt-short-full',
},
tool: mockTool,
invocation: invocation as unknown as AnyToolInvocation,
startTime: Date.now(),
};
// 3. Execute
const result = await executor.execute({
call: scheduledCall,
signal: new AbortController().signal,
onUpdateToolCall: vi.fn(),
});
// 4. Verify file deletion
expect(unlinkSpy).toHaveBeenCalledWith('/tmp/temp_full_output_short.txt');
expect(fileUtils.formatTruncatedToolOutput).not.toHaveBeenCalled();
// We should not save it since it was not truncated
expect(result.status).toBe(CoreToolCallStatus.Success);
if (result.status === CoreToolCallStatus.Success) {
const response = result.response.responseParts[0]?.functionResponse
?.response as Record<string, unknown>;
expect(response).toEqual({
output: 'Short',
});
expect(result.response.outputFile).toBeUndefined();
}
unlinkSpy.mockRestore();
});
it('should delete temporary file on error if fullOutputFilePath is provided', async () => {
const unlinkSpy = vi
.spyOn(fsPromises, 'unlink')
.mockResolvedValue(undefined);
const mockTool = new MockTool({ name: 'failTool' });
const invocation = mockTool.build({});
vi.mocked(coreToolHookTriggers.executeToolWithHooks).mockResolvedValue({
llmContent: 'partial',
returnDisplay: 'partial',
fullOutputFilePath: '/tmp/temp_error.txt',
error: { message: 'Tool Failed' },
});
const scheduledCall: ScheduledToolCall = {
status: CoreToolCallStatus.Scheduled,
request: {
callId: 'call-err',
name: 'failTool',
args: {},
isClientInitiated: false,
prompt_id: 'prompt-err',
},
tool: mockTool,
invocation: invocation as unknown as AnyToolInvocation,
startTime: Date.now(),
};
const result = await executor.execute({
call: scheduledCall,
signal: new AbortController().signal,
onUpdateToolCall: vi.fn(),
});
expect(unlinkSpy).toHaveBeenCalledWith('/tmp/temp_error.txt');
expect(result.status).toBe(CoreToolCallStatus.Error);
unlinkSpy.mockRestore();
});
it('should delete temporary file on abort if fullOutputFilePath is provided', async () => {
const unlinkSpy = vi
.spyOn(fsPromises, 'unlink')
.mockResolvedValue(undefined);
const mockTool = new MockTool({ name: 'slowTool' });
const invocation = mockTool.build({});
const controller = new AbortController();
vi.mocked(coreToolHookTriggers.executeToolWithHooks).mockImplementation(
async () => {
controller.abort();
return {
llmContent: 'partial',
returnDisplay: 'partial',
fullOutputFilePath: '/tmp/temp_abort.txt',
};
},
);
const scheduledCall: ScheduledToolCall = {
status: CoreToolCallStatus.Scheduled,
request: {
callId: 'call-abort',
name: 'slowTool',
args: {},
isClientInitiated: false,
prompt_id: 'prompt-abort',
},
tool: mockTool,
invocation: invocation as unknown as AnyToolInvocation,
startTime: Date.now(),
};
const result = await executor.execute({
call: scheduledCall,
signal: controller.signal,
onUpdateToolCall: vi.fn(),
});
expect(unlinkSpy).toHaveBeenCalledWith('/tmp/temp_abort.txt');
expect(result.status).toBe(CoreToolCallStatus.Cancelled);
unlinkSpy.mockRestore();
});
it('should report execution ID updates for backgroundable tools', async () => {
// 1. Setup ShellToolInvocation
const messageBus = createMockMessageBus();
+51 -1
View File
@@ -4,6 +4,8 @@
* SPDX-License-Identifier: Apache-2.0
*/
import fsPromises from 'node:fs/promises';
import { debugLogger } from '../utils/debugLogger.js';
import {
ToolErrorType,
ToolOutputTruncatedEvent,
@@ -137,6 +139,16 @@ export class ToolExecutor {
}
if (signal.aborted) {
if (toolResult.fullOutputFilePath) {
await fsPromises
.unlink(toolResult.fullOutputFilePath)
.catch((error) => {
debugLogger.warn(
`Failed to delete temporary tool output file on abort: ${toolResult.fullOutputFilePath}`,
error,
);
});
}
completedToolCall = await this.createCancelledResult(
call,
'User cancelled tool execution.',
@@ -367,12 +379,50 @@ export class ToolExecutor {
call: ToolCall,
toolResult: ToolResult,
): Promise<SuccessfulToolCall> {
const { truncatedContent: content, outputFile } =
let { truncatedContent: content, outputFile } =
await this.truncateOutputIfNeeded(call, toolResult.llmContent);
const toolName = call.request.originalRequestName || call.request.name;
const callId = call.request.callId;
if (toolResult.fullOutputFilePath) {
const threshold = this.config.getTruncateToolOutputThreshold();
if (
threshold > 0 &&
typeof content === 'string' &&
content.length > threshold
) {
const { outputFile: savedPath } = await moveToolOutputToFile(
toolResult.fullOutputFilePath,
toolName,
callId,
this.config.storage.getProjectTempDir(),
this.config.getSessionId(),
);
outputFile = savedPath;
content = formatTruncatedToolOutput(content, outputFile, threshold);
logToolOutputTruncated(
this.config,
new ToolOutputTruncatedEvent(call.request.prompt_id, {
toolName,
originalContentLength: content.length, // approximation
truncatedContentLength: content.length,
threshold,
}),
);
} else {
try {
await fsPromises.unlink(toolResult.fullOutputFilePath);
} catch (error) {
debugLogger.warn(
`Failed to delete temporary tool output file: ${toolResult.fullOutputFilePath}`,
error,
);
}
}
}
const response = convertToFunctionResponse(
toolName,
callId,