/** * @license * Copyright 2025 Google LLC * SPDX-License-Identifier: Apache-2.0 */ import { describe, it, expect, beforeEach, vi } from 'vitest'; // Mock shell-utils to avoid relying on tree-sitter WASM which is flaky in CI on Windows vi.mock('../utils/shell-utils.js', async (importOriginal) => { const actual = await importOriginal(); // Static map of test commands to their expected subcommands // This mirrors what the real parser would output for these specific strings const commandMap: Record = { 'git log': ['git log'], 'git log --oneline': ['git log --oneline'], 'git logout': ['git logout'], 'git log && rm -rf /': ['git log', 'rm -rf /'], 'git log; rm -rf /': ['git log', 'rm -rf /'], 'git log || rm -rf /': ['git log', 'rm -rf /'], 'git log &&& rm -rf /': [], // Simulates parse failure 'echo $(rm -rf /)': ['echo $(rm -rf /)', 'rm -rf /'], 'echo $(git log)': ['echo $(git log)', 'git log'], 'echo `rm -rf /`': ['echo `rm -rf /`', 'rm -rf /'], 'diff <(git log) <(rm -rf /)': [ 'diff <(git log) <(rm -rf /)', 'git log', 'rm -rf /', ], 'tee >(rm -rf /)': ['tee >(rm -rf /)', 'rm -rf /'], 'git log | rm -rf /': ['git log', 'rm -rf /'], 'git log --format=$(rm -rf /)': [ 'git log --format=$(rm -rf /)', 'rm -rf /', ], 'git log && echo $(git log | rm -rf /)': [ 'git log', 'echo $(git log | rm -rf /)', 'git log', 'rm -rf /', ], 'git log && echo $(git log)': ['git log', 'echo $(git log)', 'git log'], 'git log > /tmp/test': ['git log > /tmp/test'], 'git log @(Get-Process)': [], // Simulates parse failure (Bash parser vs PowerShell syntax) 'git commit -m "msg" && git push': ['git commit -m "msg"', 'git push'], 'git status && unknown_command': ['git status', 'unknown_command'], 'unknown_command_1 && another_unknown_command': [ 'unknown_command_1', 'another_unknown_command', ], 'known_ask_command_1 && known_ask_command_2': [ 'known_ask_command_1', 'known_ask_command_2', ], }; return { ...actual, initializeShellParsers: vi.fn(), parseCommandDetails: (command: string) => { if (Object.prototype.hasOwnProperty.call(commandMap, command)) { const subcommands = commandMap[command]; return { details: subcommands.map((text) => ({ name: text.split(' ')[0], text, startIndex: command.indexOf(text), })), hasError: subcommands.length === 0 && command.includes('&&&'), }; } return { details: [ { name: command.split(' ')[0], text: command, startIndex: 0, }, ], hasError: false, }; }, stripShellWrapper: (command: string) => command, splitCommands: (command: string) => { if (Object.prototype.hasOwnProperty.call(commandMap, command)) { return commandMap[command]; } const known = commandMap[command]; if (known) return known; // Default fallback for unmatched simple cases in development, but explicit map is better return [command]; }, hasRedirection: (command: string) => // Simple regex check sufficient for testing the policy engine's handling of the *result* of hasRedirection /[><]/.test(command), }; }); import { PolicyEngine } from './policy-engine.js'; import { PolicyDecision, ApprovalMode } from './types.js'; import type { FunctionCall } from '@google/genai'; import { buildArgsPatterns } from './utils.js'; describe('Shell Safety Policy', () => { let policyEngine: PolicyEngine; // Helper to create a policy engine with a simple command prefix rule function createPolicyEngineWithPrefix(prefix: string) { const argsPatterns = buildArgsPatterns(undefined, prefix, undefined); // Since buildArgsPatterns returns array of patterns (strings), we pick the first one // and compile it. const argsPattern = new RegExp(argsPatterns[0]!); return new PolicyEngine({ rules: [ { toolName: 'run_shell_command', argsPattern, decision: PolicyDecision.ALLOW, priority: 1.01, }, ], defaultDecision: PolicyDecision.ASK_USER, approvalMode: ApprovalMode.DEFAULT, }); } beforeEach(() => { policyEngine = createPolicyEngineWithPrefix('git log'); }); it('SHOULD match "git log" exactly', async () => { const toolCall: FunctionCall = { name: 'run_shell_command', args: { command: 'git log' }, }; const result = await policyEngine.check(toolCall, undefined); expect(result.decision).toBe(PolicyDecision.ALLOW); }); it('SHOULD match "git log" with arguments', async () => { const toolCall: FunctionCall = { name: 'run_shell_command', args: { command: 'git log --oneline' }, }; const result = await policyEngine.check(toolCall, undefined); expect(result.decision).toBe(PolicyDecision.ALLOW); }); it('SHOULD NOT match "git logout" when prefix is "git log" (strict word boundary)', async () => { const toolCall: FunctionCall = { name: 'run_shell_command', args: { command: 'git logout' }, }; // Desired behavior: Should NOT match "git log" prefix. // If it doesn't match, it should fall back to default decision (ASK_USER). const result = await policyEngine.check(toolCall, undefined); expect(result.decision).toBe(PolicyDecision.ASK_USER); }); it('SHOULD NOT allow "git log && rm -rf /" completely when prefix is "git log" (compound command safety)', async () => { const toolCall: FunctionCall = { name: 'run_shell_command', args: { command: 'git log && rm -rf /' }, }; // Desired behavior: Should inspect all parts. "rm -rf /" is not allowed. // The "git log" part is ALLOW, but "rm -rf /" is ASK_USER (default). // Aggregate should be ASK_USER. const result = await policyEngine.check(toolCall, undefined); expect(result.decision).toBe(PolicyDecision.ASK_USER); }); it('SHOULD NOT allow "git log; rm -rf /" (semicolon separator)', async () => { const toolCall: FunctionCall = { name: 'run_shell_command', args: { command: 'git log; rm -rf /' }, }; const result = await policyEngine.check(toolCall, undefined); expect(result.decision).toBe(PolicyDecision.ASK_USER); }); it('SHOULD NOT allow "git log || rm -rf /" (OR separator)', async () => { const toolCall: FunctionCall = { name: 'run_shell_command', args: { command: 'git log || rm -rf /' }, }; const result = await policyEngine.check(toolCall, undefined); expect(result.decision).toBe(PolicyDecision.ASK_USER); }); it('SHOULD NOT allow "git log &&& rm -rf /" when prefix is "git log" (parse failure)', async () => { const toolCall: FunctionCall = { name: 'run_shell_command', args: { command: 'git log &&& rm -rf /' }, }; // Desired behavior: Should fail safe (ASK_USER or DENY) because parsing failed. const result = await policyEngine.check(toolCall, undefined); expect(result.decision).toBe(PolicyDecision.ASK_USER); }); it('SHOULD NOT allow command substitution $(rm -rf /)', async () => { const toolCall: FunctionCall = { name: 'run_shell_command', args: { command: 'echo $(rm -rf /)' }, }; // `splitCommands` recursively finds nested commands (e.g., `rm` inside `echo $()`). // The policy engine requires ALL extracted commands to be allowed. // Since `rm` does not match the allowed prefix, this should result in ASK_USER. const echoPolicy = createPolicyEngineWithPrefix('echo'); const result = await echoPolicy.check(toolCall, undefined); expect(result.decision).toBe(PolicyDecision.ASK_USER); }); it('SHOULD allow command substitution if inner command is ALSO allowed', async () => { // Both `echo` and `git` allowed. const argsPatternsEcho = buildArgsPatterns(undefined, 'echo', undefined); const argsPatternsGit = buildArgsPatterns(undefined, 'git', undefined); // Allow all git const policyEngineWithBoth = new PolicyEngine({ rules: [ { toolName: 'run_shell_command', argsPattern: new RegExp(argsPatternsEcho[0]!), decision: PolicyDecision.ALLOW, priority: 2, }, { toolName: 'run_shell_command', argsPattern: new RegExp(argsPatternsGit[0]!), decision: PolicyDecision.ALLOW, priority: 2, }, ], defaultDecision: PolicyDecision.ASK_USER, }); const toolCall: FunctionCall = { name: 'run_shell_command', args: { command: 'echo $(git log)' }, }; const result = await policyEngineWithBoth.check(toolCall, undefined); expect(result.decision).toBe(PolicyDecision.ALLOW); }); it('SHOULD NOT allow command substitution with backticks `rm -rf /`', async () => { const toolCall: FunctionCall = { name: 'run_shell_command', args: { command: 'echo `rm -rf /`' }, }; const result = await policyEngine.check(toolCall, undefined); expect(result.decision).toBe(PolicyDecision.ASK_USER); }); it('SHOULD NOT allow process substitution <(rm -rf /)', async () => { const toolCall: FunctionCall = { name: 'run_shell_command', args: { command: 'diff <(git log) <(rm -rf /)' }, }; const result = await policyEngine.check(toolCall, undefined); expect(result.decision).toBe(PolicyDecision.ASK_USER); }); it('SHOULD NOT allow process substitution >(rm -rf /)', async () => { // Note: >(...) is output substitution, but syntax is similar. const toolCall: FunctionCall = { name: 'run_shell_command', args: { command: 'tee >(rm -rf /)' }, }; const result = await policyEngine.check(toolCall, undefined); expect(result.decision).toBe(PolicyDecision.ASK_USER); }); it('SHOULD NOT allow piped commands "git log | rm -rf /"', async () => { const toolCall: FunctionCall = { name: 'run_shell_command', args: { command: 'git log | rm -rf /' }, }; const result = await policyEngine.check(toolCall, undefined); expect(result.decision).toBe(PolicyDecision.ASK_USER); }); it('SHOULD NOT allow argument injection via --arg=$(rm -rf /)', async () => { const toolCall: FunctionCall = { name: 'run_shell_command', args: { command: 'git log --format=$(rm -rf /)' }, }; const result = await policyEngine.check(toolCall, undefined); expect(result.decision).toBe(PolicyDecision.ASK_USER); }); it('SHOULD NOT allow complex nested commands "git log && echo $(git log | rm -rf /)"', async () => { const toolCall: FunctionCall = { name: 'run_shell_command', args: { command: 'git log && echo $(git log | rm -rf /)' }, }; const result = await policyEngine.check(toolCall, undefined); expect(result.decision).toBe(PolicyDecision.ASK_USER); }); it('SHOULD allow complex allowed commands "git log && echo $(git log)"', async () => { // Both `echo` and `git` allowed. const argsPatternsEcho = buildArgsPatterns(undefined, 'echo', undefined); const argsPatternsGit = buildArgsPatterns(undefined, 'git', undefined); const policyEngineWithBoth = new PolicyEngine({ rules: [ { toolName: 'run_shell_command', argsPattern: new RegExp(argsPatternsEcho[0]!), decision: PolicyDecision.ALLOW, priority: 2, }, { toolName: 'run_shell_command', // Matches "git" at start of *subcommand* argsPattern: new RegExp(argsPatternsGit[0]!), decision: PolicyDecision.ALLOW, priority: 2, }, ], defaultDecision: PolicyDecision.ASK_USER, }); const toolCall: FunctionCall = { name: 'run_shell_command', args: { command: 'git log && echo $(git log)' }, }; const result = await policyEngineWithBoth.check(toolCall, undefined); expect(result.decision).toBe(PolicyDecision.ALLOW); }); it('SHOULD NOT allow generic redirection > /tmp/test', async () => { // Current logic downgrades ALLOW to ASK_USER for redirections if redirection is not explicitly allowed. const toolCall: FunctionCall = { name: 'run_shell_command', args: { command: 'git log > /tmp/test' }, }; const result = await policyEngine.check(toolCall, undefined); expect(result.decision).toBe(PolicyDecision.ASK_USER); }); it('SHOULD allow generic redirection > /tmp/test if allowRedirection is true', async () => { // If PolicyRule has allowRedirection: true, it should stay ALLOW const argsPatternsGitLog = buildArgsPatterns( undefined, 'git log', undefined, ); const policyWithRedirection = new PolicyEngine({ rules: [ { toolName: 'run_shell_command', argsPattern: new RegExp(argsPatternsGitLog[0]!), decision: PolicyDecision.ALLOW, priority: 2, allowRedirection: true, }, ], defaultDecision: PolicyDecision.ASK_USER, }); const toolCall: FunctionCall = { name: 'run_shell_command', args: { command: 'git log > /tmp/test' }, }; const result = await policyWithRedirection.check(toolCall, undefined); expect(result.decision).toBe(PolicyDecision.ALLOW); }); it('SHOULD NOT allow PowerShell @(...) usage if it implies code execution', async () => { // Bash parser fails on PowerShell syntax @(...) (returns empty subcommands). // The policy engine correctly identifies this as unparseable and falls back to ASK_USER. const toolCall: FunctionCall = { name: 'run_shell_command', args: { command: 'git log @(Get-Process)' }, }; const result = await policyEngine.check(toolCall, undefined); expect(result.decision).toBe(PolicyDecision.ASK_USER); }); it('SHOULD match DENY rule even if nested/chained with unknown command', async () => { // Scenario: // git commit -m "..." (Unknown/No Rule -> ASK_USER) // git push (DENY -> DENY) // Overall should be DENY. const argsPatternsPush = buildArgsPatterns( undefined, 'git push', undefined, ); const denyPushPolicy = new PolicyEngine({ rules: [ { toolName: 'run_shell_command', argsPattern: new RegExp(argsPatternsPush[0]!), decision: PolicyDecision.DENY, priority: 2, }, ], defaultDecision: PolicyDecision.ASK_USER, }); const toolCall: FunctionCall = { name: 'run_shell_command', args: { command: 'git commit -m "msg" && git push' }, }; const result = await denyPushPolicy.check(toolCall, undefined); expect(result.decision).toBe(PolicyDecision.DENY); }); it('SHOULD aggregate ALLOW + ASK_USER to ASK_USER and blame the ASK_USER part', async () => { // Scenario: // `git status` (ALLOW) && `unknown_command` (ASK_USER by default) // Expected: ASK_USER, and the matched rule should be related to the unknown_command const argsPatternsGitStatus = buildArgsPatterns( undefined, 'git status', undefined, ); const policyEngine = new PolicyEngine({ rules: [ { toolName: 'run_shell_command', argsPattern: new RegExp(argsPatternsGitStatus[0]!), decision: PolicyDecision.ALLOW, priority: 2, name: 'allow_git_status_rule', // Give a name to easily identify }, ], defaultDecision: PolicyDecision.ASK_USER, }); const toolCall: FunctionCall = { name: 'run_shell_command', args: { command: 'git status && unknown_command' }, }; const result = await policyEngine.check(toolCall, undefined); expect(result.decision).toBe(PolicyDecision.ASK_USER); // Expect the matched rule to be null/undefined since it's the default decision for 'unknown_command' // or the rule that led to the ASK_USER decision. In this case, it should be the rule for 'unknown_command', which is the default decision. // The policy engine's `matchedRule` will be the rule that caused the final decision. // If it's a default ASK_USER, then `result.rule` should be undefined. expect(result.rule).toBeUndefined(); }); it('SHOULD aggregate ASK_USER (default) + ASK_USER (rule) to ASK_USER and blame the specific ASK_USER rule', async () => { // Scenario: // `unknown_command_1` (ASK_USER by default) && `another_unknown_command` (ASK_USER by explicit rule) // Expected: ASK_USER, and the matched rule should be the explicit ASK_USER rule const argsPatternsAnotherUnknown = buildArgsPatterns( undefined, 'another_unknown_command', undefined, ); const policyEngine = new PolicyEngine({ rules: [ { toolName: 'run_shell_command', argsPattern: new RegExp(argsPatternsAnotherUnknown[0]!), decision: PolicyDecision.ASK_USER, priority: 2, name: 'ask_another_unknown_command_rule', }, ], defaultDecision: PolicyDecision.ASK_USER, }); const toolCall: FunctionCall = { name: 'run_shell_command', args: { command: 'unknown_command_1 && another_unknown_command' }, }; const result = await policyEngine.check(toolCall, undefined); expect(result.decision).toBe(PolicyDecision.ASK_USER); // The first command triggers default ASK_USER (undefined rule). // The second triggers explicit ASK_USER rule. // We attribute to the first cause => undefined. expect(result.rule).toBeUndefined(); }); it('SHOULD aggregate ASK_USER (rule) + ASK_USER (rule) to ASK_USER and blame the first specific ASK_USER rule in subcommands', async () => { // Scenario: // `known_ask_command_1` (ASK_USER by explicit rule 1) && `known_ask_command_2` (ASK_USER by explicit rule 2) // Expected: ASK_USER, and the matched rule should be explicit ASK_USER rule 1. // The current implementation prioritizes the rule that changes the decision to ASK_USER, if any. // If multiple rules lead to ASK_USER, it takes the first one. const argsPatternsAsk1 = buildArgsPatterns( undefined, 'known_ask_command_1', undefined, ); const argsPatternsAsk2 = buildArgsPatterns( undefined, 'known_ask_command_2', undefined, ); const policyEngine = new PolicyEngine({ rules: [ { toolName: 'run_shell_command', argsPattern: new RegExp(argsPatternsAsk1[0]!), decision: PolicyDecision.ASK_USER, priority: 2, name: 'ask_rule_1', }, { toolName: 'run_shell_command', argsPattern: new RegExp(argsPatternsAsk2[0]!), decision: PolicyDecision.ASK_USER, priority: 2, name: 'ask_rule_2', }, ], defaultDecision: PolicyDecision.ALLOW, // Set default to ALLOW to ensure rules are hit }); const toolCall: FunctionCall = { name: 'run_shell_command', args: { command: 'known_ask_command_1 && known_ask_command_2' }, }; const result = await policyEngine.check(toolCall, undefined); expect(result.decision).toBe(PolicyDecision.ASK_USER); // Expect the rule that first caused ASK_USER to be blamed expect(result.rule?.name).toBe('ask_rule_1'); }); });