feat(core): improve shell redirection transparency and security (#16486)

This commit is contained in:
N. Taylor Mullen
2026-01-19 20:07:28 -08:00
committed by GitHub
parent 451e0b49cb
commit ec7413456e
16 changed files with 497 additions and 137 deletions
+17 -16
View File
@@ -84,7 +84,7 @@ describe('GlobTool', () => {
expect(result.llmContent).toContain(path.join(tempRootDir, 'fileA.txt'));
expect(result.llmContent).toContain(path.join(tempRootDir, 'FileB.TXT'));
expect(result.returnDisplay).toBe('Found 2 matching file(s)');
});
}, 30000);
it('should find files case-sensitively when case_sensitive is true', async () => {
const params: GlobToolParams = { pattern: '*.txt', case_sensitive: true };
@@ -95,16 +95,17 @@ describe('GlobTool', () => {
expect(result.llmContent).not.toContain(
path.join(tempRootDir, 'FileB.TXT'),
);
});
}, 30000);
it('should find files case-insensitively by default (pattern: *.TXT)', async () => {
const params: GlobToolParams = { pattern: '*.TXT' };
const invocation = globTool.build(params);
const result = await invocation.execute(abortSignal);
expect(result.llmContent).toContain('Found 2 file(s)');
expect(result.llmContent).toContain(path.join(tempRootDir, 'fileA.txt'));
expect(result.llmContent).toContain(path.join(tempRootDir, 'FileB.TXT'));
});
expect(result.llmContent).toContain('fileA.txt');
expect(result.llmContent).toContain('FileB.TXT');
}, 30000);
it('should find files case-insensitively when case_sensitive is false (pattern: *.TXT)', async () => {
const params: GlobToolParams = {
@@ -116,7 +117,7 @@ describe('GlobTool', () => {
expect(result.llmContent).toContain('Found 2 file(s)');
expect(result.llmContent).toContain(path.join(tempRootDir, 'fileA.txt'));
expect(result.llmContent).toContain(path.join(tempRootDir, 'FileB.TXT'));
});
}, 30000);
it('should find files using a pattern that includes a subdirectory', async () => {
const params: GlobToolParams = { pattern: 'sub/*.md' };
@@ -129,7 +130,7 @@ describe('GlobTool', () => {
expect(result.llmContent).toContain(
path.join(tempRootDir, 'sub', 'FileD.MD'),
);
});
}, 30000);
it('should find files in a specified relative path (relative to rootDir)', async () => {
const params: GlobToolParams = { pattern: '*.md', dir_path: 'sub' };
@@ -142,7 +143,7 @@ describe('GlobTool', () => {
expect(result.llmContent).toContain(
path.join(tempRootDir, 'sub', 'FileD.MD'),
);
});
}, 30000);
it('should find files using a deep globstar pattern (e.g., **/*.log)', async () => {
const params: GlobToolParams = { pattern: '**/*.log' };
@@ -152,7 +153,7 @@ describe('GlobTool', () => {
expect(result.llmContent).toContain(
path.join(tempRootDir, 'sub', 'deep', 'fileE.log'),
);
});
}, 30000);
it('should return "No files found" message when pattern matches nothing', async () => {
const params: GlobToolParams = { pattern: '*.nonexistent' };
@@ -162,7 +163,7 @@ describe('GlobTool', () => {
'No files found matching pattern "*.nonexistent"',
);
expect(result.returnDisplay).toBe('No files found');
});
}, 30000);
it('should find files with special characters in the name', async () => {
await fs.writeFile(path.join(tempRootDir, 'file[1].txt'), 'content');
@@ -173,7 +174,7 @@ describe('GlobTool', () => {
expect(result.llmContent).toContain(
path.join(tempRootDir, 'file[1].txt'),
);
});
}, 30000);
it('should find files with special characters like [] and () in the path', async () => {
const filePath = path.join(
@@ -190,7 +191,7 @@ describe('GlobTool', () => {
const result = await invocation.execute(abortSignal);
expect(result.llmContent).toContain('Found 1 file(s)');
expect(result.llmContent).toContain(filePath);
});
}, 30000);
it('should correctly sort files by modification time (newest first)', async () => {
const params: GlobToolParams = { pattern: '*.sortme' };
@@ -216,7 +217,7 @@ describe('GlobTool', () => {
expect(path.resolve(filesListed[1])).toBe(
path.resolve(tempRootDir, 'older.sortme'),
);
});
}, 30000);
it('should return a PATH_NOT_IN_WORKSPACE error if path is outside workspace', async () => {
// Bypassing validation to test execute method directly
@@ -226,7 +227,7 @@ describe('GlobTool', () => {
const result = await invocation.execute(abortSignal);
expect(result.error?.type).toBe(ToolErrorType.PATH_NOT_IN_WORKSPACE);
expect(result.returnDisplay).toBe('Path is not within workspace');
});
}, 30000);
it('should return a GLOB_EXECUTION_ERROR on glob failure', async () => {
vi.mocked(glob.glob).mockRejectedValue(new Error('Glob failed'));
@@ -239,7 +240,7 @@ describe('GlobTool', () => {
);
// Reset glob.
vi.mocked(glob.glob).mockReset();
});
}, 30000);
});
describe('validateToolParams', () => {
+9 -9
View File
@@ -145,7 +145,7 @@ describe('GrepTool', () => {
);
expect(result.llmContent).toContain('L1: another world in sub dir');
expect(result.returnDisplay).toBe('Found 3 matches');
});
}, 30000);
it('should find matches in a specific path', async () => {
const params: GrepToolParams = { pattern: 'world', dir_path: 'sub' };
@@ -157,7 +157,7 @@ describe('GrepTool', () => {
expect(result.llmContent).toContain('File: fileC.txt'); // Path relative to 'sub'
expect(result.llmContent).toContain('L1: another world in sub dir');
expect(result.returnDisplay).toBe('Found 1 match');
});
}, 30000);
it('should find matches with an include glob', async () => {
const params: GrepToolParams = { pattern: 'hello', include: '*.js' };
@@ -171,7 +171,7 @@ describe('GrepTool', () => {
'L2: function baz() { return "hello"; }',
);
expect(result.returnDisplay).toBe('Found 1 match');
});
}, 30000);
it('should find matches with an include glob and path', async () => {
await fs.writeFile(
@@ -191,7 +191,7 @@ describe('GrepTool', () => {
expect(result.llmContent).toContain('File: another.js');
expect(result.llmContent).toContain('L1: const greeting = "hello";');
expect(result.returnDisplay).toBe('Found 1 match');
});
}, 30000);
it('should return "No matches found" when pattern does not exist', async () => {
const params: GrepToolParams = { pattern: 'nonexistentpattern' };
@@ -201,7 +201,7 @@ describe('GrepTool', () => {
'No matches found for pattern "nonexistentpattern" in the workspace directory.',
);
expect(result.returnDisplay).toBe('No matches found');
});
}, 30000);
it('should handle regex special characters correctly', async () => {
const params: GrepToolParams = { pattern: 'foo.*bar' }; // Matches 'const foo = "bar";'
@@ -212,7 +212,7 @@ describe('GrepTool', () => {
);
expect(result.llmContent).toContain('File: fileB.js');
expect(result.llmContent).toContain('L1: const foo = "bar";');
});
}, 30000);
it('should be case-insensitive by default (JS fallback)', async () => {
const params: GrepToolParams = { pattern: 'HELLO' };
@@ -227,14 +227,14 @@ describe('GrepTool', () => {
expect(result.llmContent).toContain(
'L2: function baz() { return "hello"; }',
);
});
}, 30000);
it('should throw an error if params are invalid', async () => {
const params = { dir_path: '.' } as unknown as GrepToolParams; // Invalid: pattern missing
expect(() => grepTool.build(params)).toThrow(
/params must have required property 'pattern'/,
);
});
}, 30000);
it('should return a GREP_EXECUTION_ERROR on failure', async () => {
vi.mocked(glob.globStream).mockRejectedValue(new Error('Glob failed'));
@@ -243,7 +243,7 @@ describe('GrepTool', () => {
const result = await invocation.execute(abortSignal);
expect(result.error?.type).toBe(ToolErrorType.GREP_EXECUTION_ERROR);
vi.mocked(glob.globStream).mockReset();
});
}, 30000);
});
describe('multi-directory workspace', () => {
+52
View File
@@ -538,4 +538,56 @@ describe('ShellTool', () => {
expect(shellTool.description).toMatchSnapshot();
});
});
describe('getConfirmationDetails', () => {
it('should annotate sub-commands with redirection correctly', async () => {
const shellTool = new ShellTool(mockConfig, createMockMessageBus());
const command = 'mkdir -p baz && echo "hello" > baz/test.md && ls';
const invocation = shellTool.build({ command });
// @ts-expect-error - getConfirmationDetails is protected
const details = await invocation.getConfirmationDetails(
new AbortController().signal,
);
expect(details).not.toBe(false);
if (details && details.type === 'exec') {
expect(details.rootCommand).toBe('mkdir, echo, redirection (>), ls');
}
});
it('should annotate all redirected sub-commands', async () => {
const shellTool = new ShellTool(mockConfig, createMockMessageBus());
const command = 'cat < input.txt && grep "foo" > output.txt';
const invocation = shellTool.build({ command });
// @ts-expect-error - getConfirmationDetails is protected
const details = await invocation.getConfirmationDetails(
new AbortController().signal,
);
expect(details).not.toBe(false);
if (details && details.type === 'exec') {
expect(details.rootCommand).toBe(
'cat, redirection (<), grep, redirection (>)',
);
}
});
it('should annotate sub-commands with pipes correctly', async () => {
const shellTool = new ShellTool(mockConfig, createMockMessageBus());
const command = 'ls | grep "baz"';
const invocation = shellTool.build({ command });
// @ts-expect-error - getConfirmationDetails is protected
const details = await invocation.getConfirmationDetails(
new AbortController().signal,
);
expect(details).not.toBe(false);
if (details && details.type === 'exec') {
expect(details.rootCommand).toBe('ls, grep');
}
});
});
});
+18 -8
View File
@@ -38,6 +38,8 @@ import {
getCommandRoots,
initializeShellParsers,
stripShellWrapper,
parseCommandDetails,
hasRedirection,
} from '../utils/shell-utils.js';
import { SHELL_TOOL_NAME } from './tool-names.js';
import type { MessageBus } from '../confirmation-bus/message-bus.js';
@@ -101,17 +103,25 @@ export class ShellToolInvocation extends BaseToolInvocation<
_abortSignal: AbortSignal,
): Promise<ToolCallConfirmationDetails | false> {
const command = stripShellWrapper(this.params.command);
let rootCommands = [...new Set(getCommandRoots(command))];
// Fallback for UI display if parser fails or returns no commands (e.g.
// variable assignments only)
if (rootCommands.length === 0 && command.trim()) {
const parsed = parseCommandDetails(command);
let rootCommandDisplay = '';
if (!parsed || parsed.hasError || parsed.details.length === 0) {
// Fallback if parser fails
const fallback = command.trim().split(/\s+/)[0];
if (fallback) {
rootCommands = [fallback];
rootCommandDisplay = fallback || 'shell command';
if (hasRedirection(command)) {
rootCommandDisplay += ', redirection';
}
} else {
rootCommandDisplay = parsed.details
.map((detail) => detail.name)
.join(', ');
}
const rootCommands = [...new Set(getCommandRoots(command))];
// Rely entirely on PolicyEngine for interactive confirmation.
// If we are here, it means PolicyEngine returned ASK_USER (or no message bus),
// so we must provide confirmation details.
@@ -119,7 +129,7 @@ export class ShellToolInvocation extends BaseToolInvocation<
type: 'exec',
title: 'Confirm Shell Command',
command: this.params.command,
rootCommand: rootCommands.join(', '),
rootCommand: rootCommandDisplay,
rootCommands,
onConfirm: async (outcome: ToolConfirmationOutcome) => {
await this.publishPolicyUpdate(outcome);
@@ -306,7 +316,7 @@ export class ShellToolInvocation extends BaseToolInvocation<
`Command: ${this.params.command}`,
`Directory: ${this.params.dir_path || '(root)'}`,
`Output: ${result.output || '(empty)'}`,
`Error: ${finalError}`, // Use the cleaned error string.
`Error: ${finalError}`,
`Exit Code: ${result.exitCode ?? '(none)'}`,
`Signal: ${result.signal ?? '(none)'}`,
`Background PIDs: ${
+27 -21
View File
@@ -45,8 +45,10 @@ export interface ToolInvocation<
toolLocations(): ToolLocation[];
/**
* Determines if the tool should prompt for confirmation before execution.
* @returns Confirmation details or false if no confirmation is needed.
* Checks if the tool call should be confirmed by the user before execution.
*
* @param abortSignal An AbortSignal that can be used to cancel the confirmation request.
* @returns A ToolCallConfirmationDetails object if confirmation is required, or false if not.
*/
shouldConfirmExecute(
abortSignal: AbortSignal,
@@ -143,7 +145,7 @@ export abstract class BaseToolInvocation<
) {
if (this._toolName) {
const options = this.getPolicyUpdateOptions(outcome);
await this.messageBus.publish({
void this.messageBus.publish({
type: MessageBusType.UPDATE_POLICY,
toolName: this._toolName,
persist: outcome === ToolConfirmationOutcome.ProceedAlwaysAndSave,
@@ -179,16 +181,21 @@ export abstract class BaseToolInvocation<
protected getMessageBusDecision(
abortSignal: AbortSignal,
): Promise<'ALLOW' | 'DENY' | 'ASK_USER'> {
if (!this.messageBus) {
if (!this.messageBus || !this._toolName) {
// If there's no message bus, we can't make a decision, so we allow.
// The legacy confirmation flow will still apply if the tool needs it.
return Promise.resolve('ALLOW');
}
const correlationId = randomUUID();
const toolCall = {
name: this._toolName || this.constructor.name,
args: this.params as Record<string, unknown>,
const request: ToolConfirmationRequest = {
type: MessageBusType.TOOL_CONFIRMATION_REQUEST,
correlationId,
toolCall: {
name: this._toolName,
args: this.params as Record<string, unknown>,
},
serverName: this._serverName,
};
return new Promise<'ALLOW' | 'DENY' | 'ASK_USER'>((resolve) => {
@@ -197,18 +204,19 @@ export abstract class BaseToolInvocation<
return;
}
let timeoutId: NodeJS.Timeout | undefined;
let timeoutId: NodeJS.Timeout | null = null;
let unsubscribe: (() => void) | null = null;
const cleanup = () => {
if (timeoutId) {
clearTimeout(timeoutId);
timeoutId = undefined;
timeoutId = null;
}
if (unsubscribe) {
unsubscribe();
unsubscribe = null;
}
abortSignal.removeEventListener('abort', abortHandler);
this.messageBus.unsubscribe(
MessageBusType.TOOL_CONFIRMATION_RESPONSE,
responseHandler,
);
};
const abortHandler = () => {
@@ -245,17 +253,15 @@ export abstract class BaseToolInvocation<
MessageBusType.TOOL_CONFIRMATION_RESPONSE,
responseHandler,
);
const request: ToolConfirmationRequest = {
type: MessageBusType.TOOL_CONFIRMATION_REQUEST,
toolCall,
correlationId,
serverName: this._serverName,
unsubscribe = () => {
this.messageBus?.unsubscribe(
MessageBusType.TOOL_CONFIRMATION_RESPONSE,
responseHandler,
);
};
try {
// eslint-disable-next-line @typescript-eslint/no-floating-promises
this.messageBus.publish(request);
void this.messageBus.publish(request);
} catch (_error) {
cleanup();
resolve('ALLOW');