fix(core): improve shell command with redirection detection (#15683)

This commit is contained in:
Gal Zahavi
2026-01-02 11:36:59 -08:00
committed by GitHub
parent 958284dc24
commit 18fef0db31
8 changed files with 432 additions and 97 deletions

View File

@@ -864,6 +864,116 @@ describe('PolicyEngine', () => {
(await engine.check({ name: 'test', args }, undefined)).decision,
).toBe(PolicyDecision.ALLOW);
});
it('should downgrade ALLOW to ASK_USER for redirected shell commands', async () => {
const rules: PolicyRule[] = [
{
toolName: 'run_shell_command',
// Matches "echo" prefix
argsPattern: /"command":"echo/,
decision: PolicyDecision.ALLOW,
},
];
engine = new PolicyEngine({ rules });
// Safe command should be allowed
expect(
(
await engine.check(
{ name: 'run_shell_command', args: { command: 'echo "hello"' } },
undefined,
)
).decision,
).toBe(PolicyDecision.ALLOW);
// Redirected command should be downgraded to ASK_USER
expect(
(
await engine.check(
{
name: 'run_shell_command',
args: { command: 'echo "hello" > file.txt' },
},
undefined,
)
).decision,
).toBe(PolicyDecision.ASK_USER);
});
it('should allow redirected shell commands when allowRedirection is true', async () => {
const rules: PolicyRule[] = [
{
toolName: 'run_shell_command',
// Matches "echo" prefix
argsPattern: /"command":"echo/,
decision: PolicyDecision.ALLOW,
allowRedirection: true,
},
];
engine = new PolicyEngine({ rules });
// Redirected command should stay ALLOW
expect(
(
await engine.check(
{
name: 'run_shell_command',
args: { command: 'echo "hello" > file.txt' },
},
undefined,
)
).decision,
).toBe(PolicyDecision.ALLOW);
});
it('should NOT downgrade ALLOW to ASK_USER for quoted redirection chars', async () => {
const rules: PolicyRule[] = [
{
toolName: 'run_shell_command',
argsPattern: /"command":"echo/,
decision: PolicyDecision.ALLOW,
},
];
engine = new PolicyEngine({ rules });
// Should remain ALLOW because it's not a real redirection
expect(
(
await engine.check(
{
name: 'run_shell_command',
args: { command: 'echo "-> arrow"' },
},
undefined,
)
).decision,
).toBe(PolicyDecision.ALLOW);
});
it('should avoid infinite recursion for commands with substitution', async () => {
const rules: PolicyRule[] = [
{
toolName: 'run_shell_command',
decision: PolicyDecision.ALLOW,
},
];
engine = new PolicyEngine({ rules });
// Command with substitution triggers splitCommands returning the same command as its first element.
// This verifies the fix for the infinite recursion bug.
const result = await engine.check(
{
name: 'run_shell_command',
args: { command: 'echo $(ls)' },
},
undefined,
);
expect(result.decision).toBe(PolicyDecision.ALLOW);
});
});
describe('safety checker integration', () => {

View File

@@ -24,6 +24,7 @@ import {
SHELL_TOOL_NAMES,
initializeShellParsers,
splitCommands,
hasRedirection,
} from '../utils/shell-utils.js';
function ruleMatches(
@@ -140,6 +141,92 @@ export class PolicyEngine {
return this.approvalMode;
}
/**
* Check if a shell command is allowed.
*/
private async checkShellCommand(
toolName: string,
command: string | undefined,
ruleDecision: PolicyDecision,
serverName: string | undefined,
dir_path: string | undefined,
allowRedirection?: boolean,
): Promise<PolicyDecision> {
if (!command) {
return this.applyNonInteractiveMode(ruleDecision);
}
await initializeShellParsers();
const subCommands = splitCommands(command);
if (subCommands.length === 0) {
debugLogger.debug(
`[PolicyEngine.check] Command parsing failed for: ${command}. Falling back to ASK_USER.`,
);
return this.applyNonInteractiveMode(PolicyDecision.ASK_USER);
}
// If there are multiple parts, or if we just want to validate the single part against DENY rules
if (subCommands.length > 0) {
debugLogger.debug(
`[PolicyEngine.check] Validating shell command: ${subCommands.length} parts`,
);
// Start with the decision for the full command string
let aggregateDecision = ruleDecision;
for (const subCmd of subCommands) {
// Prevent infinite recursion for the root command
if (subCmd === command) {
if (!allowRedirection && hasRedirection(subCmd)) {
debugLogger.debug(
`[PolicyEngine.check] Downgrading ALLOW to ASK_USER for redirected command: ${subCmd}`,
);
// Redirection always downgrades ALLOW to ASK_USER
if (aggregateDecision === PolicyDecision.ALLOW) {
aggregateDecision = PolicyDecision.ASK_USER;
}
}
continue;
}
const subResult = await this.check(
{ name: toolName, args: { command: subCmd, dir_path } },
serverName,
);
// If any part is DENIED, the whole command is DENIED
if (subResult.decision === PolicyDecision.DENY) {
return PolicyDecision.DENY;
}
// If any part requires ASK_USER, the whole command requires ASK_USER (unless already DENY)
if (subResult.decision === PolicyDecision.ASK_USER) {
if (aggregateDecision === PolicyDecision.ALLOW) {
aggregateDecision = PolicyDecision.ASK_USER;
}
}
// Check for redirection in allowed sub-commands
if (
subResult.decision === PolicyDecision.ALLOW &&
!allowRedirection &&
hasRedirection(subCmd)
) {
debugLogger.debug(
`[PolicyEngine.check] Downgrading ALLOW to ASK_USER for redirected command: ${subCmd}`,
);
if (aggregateDecision === PolicyDecision.ALLOW) {
aggregateDecision = PolicyDecision.ASK_USER;
}
}
}
return this.applyNonInteractiveMode(aggregateDecision);
}
return this.applyNonInteractiveMode(ruleDecision);
}
/**
* Check if a tool call is allowed based on the configured policies.
* Returns the decision and the matching rule (if any).
@@ -183,62 +270,16 @@ export class PolicyEngine {
`[PolicyEngine.check] MATCHED rule: toolName=${rule.toolName}, decision=${rule.decision}, priority=${rule.priority}, argsPattern=${rule.argsPattern?.source || 'none'}`,
);
// Special handling for shell commands: check sub-commands if present
if (
toolCall.name &&
SHELL_TOOL_NAMES.includes(toolCall.name) &&
rule.decision === PolicyDecision.ALLOW
) {
const command = (toolCall.args as { command?: string })?.command;
if (command) {
await initializeShellParsers();
const subCommands = splitCommands(command);
// If there are multiple sub-commands, we must verify EACH of them matches an ALLOW rule.
// If any sub-command results in DENY -> the whole thing is DENY.
// If any sub-command results in ASK_USER -> the whole thing is ASK_USER (unless one is DENY).
// Only if ALL sub-commands are ALLOW do we proceed with ALLOW.
if (subCommands.length === 0) {
// This case occurs if the command is non-empty but parsing fails.
// An ALLOW rule for a prefix might have matched, but since the rest of
// the command is un-parseable, it's unsafe to proceed.
// Fall back to a safe decision.
debugLogger.debug(
`[PolicyEngine.check] Command parsing failed for: ${command}. Falling back to safe decision because implicit ALLOW is unsafe.`,
);
decision = this.applyNonInteractiveMode(PolicyDecision.ASK_USER);
} else if (subCommands.length > 1) {
debugLogger.debug(
`[PolicyEngine.check] Compound command detected: ${subCommands.length} parts`,
);
let aggregateDecision = PolicyDecision.ALLOW;
for (const subCmd of subCommands) {
// Recursively check each sub-command
const subCall = {
name: toolCall.name,
args: { command: subCmd },
};
const subResult = await this.check(subCall, serverName);
if (subResult.decision === PolicyDecision.DENY) {
aggregateDecision = PolicyDecision.DENY;
break; // Fail fast
} else if (subResult.decision === PolicyDecision.ASK_USER) {
aggregateDecision = PolicyDecision.ASK_USER;
// efficient: we can only strictly downgrade from ALLOW to ASK_USER,
// but we must continue looking for DENY.
}
}
decision = aggregateDecision;
} else {
// Single command, rule match is valid
decision = this.applyNonInteractiveMode(rule.decision);
}
} else {
decision = this.applyNonInteractiveMode(rule.decision);
}
if (toolCall.name && SHELL_TOOL_NAMES.includes(toolCall.name)) {
const args = toolCall.args as { command?: string; dir_path?: string };
decision = await this.checkShellCommand(
toolCall.name,
args?.command,
rule.decision,
serverName,
args?.dir_path,
rule.allowRedirection,
);
} else {
decision = this.applyNonInteractiveMode(rule.decision);
}

View File

@@ -152,7 +152,6 @@ describe('ShellToolInvocation Policy Update', () => {
const invocation = new ShellToolInvocation(
mockConfig,
{ command: 'git status && npm test' },
new Set(),
mockMessageBus,
'run_shell_command',
'Shell',
@@ -174,7 +173,6 @@ describe('ShellToolInvocation Policy Update', () => {
const invocation = new ShellToolInvocation(
mockConfig,
{ command: 'ls -la /tmp' },
new Set(),
mockMessageBus,
'run_shell_command',
'Shell',

View File

@@ -123,6 +123,13 @@ export interface PolicyRule {
* If undefined or empty, it applies to all modes.
*/
modes?: ApprovalMode[];
/**
* If true, allows command redirection even if the policy engine would normally
* downgrade ALLOW to ASK_USER for redirected commands.
* Only applies when decision is ALLOW.
*/
allowRedirection?: boolean;
}
export interface SafetyCheckerRule {

View File

@@ -51,7 +51,6 @@ import * as path from 'node:path';
import * as crypto from 'node:crypto';
import * as summarizer from '../utils/summarizer.js';
import { ToolErrorType } from './tool-error.js';
import { ToolConfirmationOutcome } from './tools.js';
import { OUTPUT_UPDATE_INTERVAL_MS } from './shell.js';
import { SHELL_TOOL_NAME } from './tool-names.js';
import { WorkspaceContext } from '../utils/workspaceContext.js';
@@ -472,7 +471,7 @@ describe('ShellTool', () => {
});
describe('shouldConfirmExecute', () => {
it('should request confirmation for a new command and allowlist it on "Always"', async () => {
it('should return confirmation details when PolicyEngine delegates', async () => {
const params = { command: 'npm install' };
const invocation = shellTool.build(params);
const confirmation = await invocation.shouldConfirmExecute(
@@ -481,18 +480,6 @@ describe('ShellTool', () => {
expect(confirmation).not.toBe(false);
expect(confirmation && confirmation.type).toBe('exec');
// eslint-disable-next-line @typescript-eslint/no-explicit-any
await (confirmation as any).onConfirm(
ToolConfirmationOutcome.ProceedAlways,
);
// Should now be allowlisted
const secondInvocation = shellTool.build({ command: 'npm test' });
const secondConfirmation = await secondInvocation.shouldConfirmExecute(
new AbortController().signal,
);
expect(secondConfirmation).toBe(false);
});
it('should throw an error if validation fails', () => {

View File

@@ -62,7 +62,6 @@ export class ShellToolInvocation extends BaseToolInvocation<
constructor(
private readonly config: Config,
params: ShellToolParams,
private readonly allowlist: Set<string>,
messageBus?: MessageBus,
_toolName?: string,
_toolDisplayName?: string,
@@ -127,23 +126,15 @@ export class ShellToolInvocation extends BaseToolInvocation<
);
}
const commandsToConfirm = rootCommands.filter(
(command) => !this.allowlist.has(command),
);
if (commandsToConfirm.length === 0) {
return false; // already approved and allowlisted
}
// Rely entirely on PolicyEngine for interactive confirmation.
// If we are here, it means PolicyEngine returned ASK_USER (or no message bus),
// so we must provide confirmation details.
const confirmationDetails: ToolExecuteConfirmationDetails = {
type: 'exec',
title: 'Confirm Shell Command',
command: this.params.command,
rootCommand: commandsToConfirm.join(', '),
rootCommand: rootCommands.join(', '),
onConfirm: async (outcome: ToolConfirmationOutcome) => {
if (outcome === ToolConfirmationOutcome.ProceedAlways) {
commandsToConfirm.forEach((command) => this.allowlist.add(command));
}
await this.publishPolicyUpdate(outcome);
},
};
@@ -451,8 +442,6 @@ export class ShellTool extends BaseDeclarativeTool<
> {
static readonly Name = SHELL_TOOL_NAME;
private allowlist: Set<string> = new Set();
constructor(
private readonly config: Config,
messageBus?: MessageBus,
@@ -533,7 +522,6 @@ export class ShellTool extends BaseDeclarativeTool<
return new ShellToolInvocation(
this.config,
params,
this.allowlist,
messageBus,
_toolName,
_toolDisplayName,

View File

@@ -19,6 +19,7 @@ import {
getShellConfiguration,
initializeShellParsers,
stripShellWrapper,
hasRedirection,
} from './shell-utils.js';
const mockPlatform = vi.hoisted(() => vi.fn());
@@ -32,6 +33,12 @@ vi.mock('os', () => ({
homedir: mockHomedir,
}));
const mockSpawnSync = vi.hoisted(() => vi.fn());
vi.mock('node:child_process', () => ({
spawnSync: mockSpawnSync,
spawn: vi.fn(),
}));
const mockQuote = vi.hoisted(() => vi.fn());
vi.mock('shell-quote', () => ({
quote: mockQuote,
@@ -50,6 +57,12 @@ beforeEach(() => {
mockQuote.mockImplementation((args: string[]) =>
args.map((arg) => `'${arg}'`).join(' '),
);
mockSpawnSync.mockReturnValue({
stdout: Buffer.from(''),
stderr: Buffer.from(''),
status: 0,
error: undefined,
});
});
afterEach(() => {
@@ -105,6 +118,64 @@ describe('getCommandRoots', () => {
const roots = getCommandRoots('echo ${foo@P}');
expect(roots).toEqual([]);
});
it('should include nested command substitutions in redirected statements', () => {
const result = getCommandRoots('echo $(cat secret) > output.txt');
expect(result).toEqual(['echo', 'cat']);
});
it('should handle parser initialization failures gracefully', async () => {
// Reset modules to clear singleton state
vi.resetModules();
// Mock fileUtils to fail Wasm loading
vi.doMock('./fileUtils.js', () => ({
loadWasmBinary: vi.fn().mockRejectedValue(new Error('Wasm load failed')),
}));
// Re-import shell-utils with mocked dependencies
const shellUtils = await import('./shell-utils.js');
// Should catch the error and not throw
await expect(shellUtils.initializeShellParsers()).resolves.not.toThrow();
// Fallback: splitting commands depends on parser, so if parser fails, it returns empty
const roots = shellUtils.getCommandRoots('ls -la');
expect(roots).toEqual([]);
});
});
describe('hasRedirection', () => {
it('should detect output redirection', () => {
expect(hasRedirection('echo hello > world')).toBe(true);
});
it('should detect input redirection', () => {
expect(hasRedirection('cat < input')).toBe(true);
});
it('should detect append redirection', () => {
expect(hasRedirection('echo hello >> world')).toBe(true);
});
it('should detect heredoc', () => {
expect(hasRedirection('cat <<EOF\nhello\nEOF')).toBe(true);
});
it('should detect herestring', () => {
expect(hasRedirection('cat <<< "hello"')).toBe(true);
});
it('should return false for simple commands', () => {
expect(hasRedirection('ls -la')).toBe(false);
});
it('should return false for pipes (pipes are not redirections in this context)', () => {
// Note: pipes are often handled separately by splitCommands, but checking here confirms they don't trigger "redirection" flag if we don't want them to.
// However, the current implementation checks for 'redirected_statement' nodes.
// A pipe is a 'pipeline' node.
expect(hasRedirection('echo hello | cat')).toBe(false);
});
});
describeWindowsOnly('PowerShell integration', () => {
@@ -300,3 +371,55 @@ describe('getShellConfiguration', () => {
});
});
});
describe('hasRedirection (PowerShell via mock)', () => {
beforeEach(() => {
mockPlatform.mockReturnValue('win32');
process.env['ComSpec'] = 'powershell.exe';
});
const mockPowerShellResult = (
commands: Array<{ name: string; text: string }>,
hasRedirection: boolean,
) => {
mockSpawnSync.mockReturnValue({
stdout: Buffer.from(
JSON.stringify({
success: true,
commands,
hasRedirection,
}),
),
stderr: Buffer.from(''),
status: 0,
error: undefined,
});
};
it('should return true when PowerShell parser detects redirection', () => {
mockPowerShellResult([{ name: 'echo', text: 'echo hello' }], true);
expect(hasRedirection('echo hello > file.txt')).toBe(true);
});
it('should return false when PowerShell parser does not detect redirection', () => {
mockPowerShellResult([{ name: 'echo', text: 'echo hello' }], false);
expect(hasRedirection('echo hello')).toBe(false);
});
it('should return false when quoted redirection chars are used but not actual redirection', () => {
mockPowerShellResult(
[{ name: 'echo', text: 'echo "-> arrow"' }],
false, // Parser says NO redirection
);
expect(hasRedirection('echo "-> arrow"')).toBe(false);
});
it('should fallback to regex if parsing fails (simulating safety)', () => {
mockSpawnSync.mockReturnValue({
stdout: Buffer.from('invalid json'),
status: 0,
});
// Fallback regex sees '>' in arrow
expect(hasRedirection('echo "-> arrow"')).toBe(true);
});
});

View File

@@ -98,7 +98,9 @@ export async function initializeShellParsers(): Promise<void> {
if (!treeSitterInitialization) {
treeSitterInitialization = loadBashLanguage().catch((error) => {
treeSitterInitialization = null;
throw error;
// Log the error but don't throw, allowing the application to fall back to safe defaults (ASK_USER)
// or regex checks where appropriate.
debugLogger.debug('Failed to initialize shell parsers:', error);
});
}
@@ -113,6 +115,7 @@ export interface ParsedCommandDetail {
interface CommandParseResult {
details: ParsedCommandDetail[];
hasError: boolean;
hasRedirection?: boolean;
}
const POWERSHELL_COMMAND_ENV = '__GCLI_POWERSHELL_COMMAND__';
@@ -136,7 +139,11 @@ if ($errors -and $errors.Count -gt 0) {
}
$commandAsts = $ast.FindAll({ param($node) $node -is [System.Management.Automation.Language.CommandAst] }, $true)
$commandObjects = @()
$hasRedirection = $false
foreach ($commandAst in $commandAsts) {
if ($commandAst.Redirections.Count -gt 0) {
$hasRedirection = $true
}
$name = $commandAst.GetCommandName()
if ([string]::IsNullOrWhiteSpace($name)) {
continue
@@ -149,6 +156,7 @@ foreach ($commandAst in $commandAsts) {
[PSCustomObject]@{
success = $true
commands = $commandObjects
hasRedirection = $hasRedirection
} | ConvertTo-Json -Compress
`,
'utf16le',
@@ -230,22 +238,45 @@ function collectCommandDetails(
const details: ParsedCommandDetail[] = [];
while (stack.length > 0) {
const current = stack.pop();
if (!current) {
continue;
const current = stack.pop()!;
let name: string | null = null;
let ignoreChildId: number | undefined;
if (current.type === 'redirected_statement') {
const body = current.childForFieldName('body');
if (body) {
const bodyName = extractNameFromNode(body);
if (bodyName) {
name = bodyName;
ignoreChildId = body.id;
// If we ignore the body node (because we used it to name the redirected_statement),
// we must still traverse its children to find nested commands (e.g. command substitution).
for (let i = body.namedChildCount - 1; i >= 0; i -= 1) {
const grandChild = body.namedChild(i);
if (grandChild) {
stack.push(grandChild);
}
}
}
}
}
const commandName = extractNameFromNode(current);
if (commandName) {
if (!name) {
name = extractNameFromNode(current);
}
if (name) {
details.push({
name: commandName,
name,
text: source.slice(current.startIndex, current.endIndex).trim(),
});
}
for (let i = current.namedChildCount - 1; i >= 0; i -= 1) {
const child = current.namedChild(i);
if (child) {
if (child && child.id !== ignoreChildId) {
stack.push(child);
}
}
@@ -290,7 +321,11 @@ function hasPromptCommandTransform(root: Node): boolean {
function parseBashCommandDetails(command: string): CommandParseResult | null {
if (treeSitterInitializationError) {
throw treeSitterInitializationError;
debugLogger.debug(
'Bash parser not initialized:',
treeSitterInitializationError,
);
return null;
}
if (!bashLanguage) {
@@ -384,6 +419,7 @@ function parsePowerShellCommandDetails(
let parsed: {
success?: boolean;
commands?: Array<{ name?: string; text?: string }>;
hasRedirection?: boolean;
} | null = null;
try {
parsed = JSON.parse(output);
@@ -417,6 +453,7 @@ function parsePowerShellCommandDetails(
return {
details,
hasError: details.length === 0,
hasRedirection: parsed.hasRedirection,
};
} catch {
return null;
@@ -514,6 +551,50 @@ export function escapeShellArg(arg: string, shell: ShellType): string {
* @param command The shell command string to parse
* @returns An array of individual command strings
*/
/**
* Checks if a command contains redirection operators.
* Uses shell-specific parsers where possible, falling back to a broad regex check.
*/
export function hasRedirection(command: string): boolean {
const fallbackCheck = () => /[><]/.test(command);
const configuration = getShellConfiguration();
if (configuration.shell === 'powershell') {
const parsed = parsePowerShellCommandDetails(
command,
configuration.executable,
);
return parsed && !parsed.hasError
? !!parsed.hasRedirection
: fallbackCheck();
}
if (configuration.shell === 'bash' && bashLanguage) {
const tree = parseCommandTree(command);
if (!tree) return fallbackCheck();
const stack: Node[] = [tree.rootNode];
while (stack.length > 0) {
const current = stack.pop()!;
if (
current.type === 'redirected_statement' ||
current.type === 'file_redirect' ||
current.type === 'heredoc_redirect' ||
current.type === 'herestring_redirect'
) {
return true;
}
for (let i = current.childCount - 1; i >= 0; i -= 1) {
const child = current.child(i);
if (child) stack.push(child);
}
}
return false;
}
return fallbackCheck();
}
export function splitCommands(command: string): string[] {
const parsed = parseCommandDetails(command);
if (!parsed || parsed.hasError) {