From 64cde8d439501f9b8448acdfa5baa9b6963bdee7 Mon Sep 17 00:00:00 2001 From: Allen Hutchison Date: Mon, 12 Jan 2026 11:23:32 -0800 Subject: [PATCH] fix(policy): enhance shell command safety and parsing (#15034) Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> Co-authored-by: Abhi <43648792+abhipatel12@users.noreply.github.com> --- packages/core/src/policy/policy-engine.ts | 105 ++++- packages/core/src/policy/shell-safety.test.ts | 446 +++++++++++++++++- packages/core/src/policy/types.ts | 5 + 3 files changed, 526 insertions(+), 30 deletions(-) diff --git a/packages/core/src/policy/policy-engine.ts b/packages/core/src/policy/policy-engine.ts index 13a3ef5225..f90b905938 100644 --- a/packages/core/src/policy/policy-engine.ts +++ b/packages/core/src/policy/policy-engine.ts @@ -151,9 +151,13 @@ export class PolicyEngine { serverName: string | undefined, dir_path: string | undefined, allowRedirection?: boolean, - ): Promise { + rule?: PolicyRule, + ): Promise<{ decision: PolicyDecision; rule?: PolicyRule }> { if (!command) { - return this.applyNonInteractiveMode(ruleDecision); + return { + decision: this.applyNonInteractiveMode(ruleDecision), + rule, + }; } await initializeShellParsers(); @@ -163,7 +167,12 @@ export class PolicyEngine { debugLogger.debug( `[PolicyEngine.check] Command parsing failed for: ${command}. Falling back to ASK_USER.`, ); - return this.applyNonInteractiveMode(PolicyDecision.ASK_USER); + // Parsing logic failed, we can't trust it. Force ASK_USER (or DENY). + // We don't blame a specific rule here, unless the input rule was stricter. + return { + decision: this.applyNonInteractiveMode(PolicyDecision.ASK_USER), + rule: undefined, + }; } // If there are multiple parts, or if we just want to validate the single part against DENY rules @@ -173,14 +182,16 @@ export class PolicyEngine { ); if (ruleDecision === PolicyDecision.DENY) { - return PolicyDecision.DENY; + return { decision: PolicyDecision.DENY, rule }; } // Start optimistically. If all parts are ALLOW, the whole is ALLOW. // We will downgrade if any part is ASK_USER or DENY. let aggregateDecision = PolicyDecision.ALLOW; + let responsibleRule: PolicyRule | undefined; - for (const subCmd of subCommands) { + for (const rawSubCmd of subCommands) { + const subCmd = rawSubCmd.trim(); // Prevent infinite recursion for the root command if (subCmd === command) { if (!allowRedirection && hasRedirection(subCmd)) { @@ -190,17 +201,16 @@ export class PolicyEngine { // Redirection always downgrades ALLOW to ASK_USER if (aggregateDecision === PolicyDecision.ALLOW) { aggregateDecision = PolicyDecision.ASK_USER; + responsibleRule = undefined; // Inherent policy } } else { - // If the command is atomic (cannot be split further) and didn't - // trigger infinite recursion checks, we must respect the decision - // of the rule that triggered this check. If the rule was ASK_USER - // (e.g. wildcard), we must downgrade. + // Atomic command matching the rule. if ( ruleDecision === PolicyDecision.ASK_USER && aggregateDecision === PolicyDecision.ALLOW ) { aggregateDecision = PolicyDecision.ASK_USER; + responsibleRule = rule; } } continue; @@ -216,13 +226,17 @@ export class PolicyEngine { // If any part is DENIED, the whole command is DENIED if (subDecision === PolicyDecision.DENY) { - return PolicyDecision.DENY; + return { + decision: PolicyDecision.DENY, + rule: subResult.rule, + }; } // If any part requires ASK_USER, the whole command requires ASK_USER if (subDecision === PolicyDecision.ASK_USER) { - if (aggregateDecision === PolicyDecision.ALLOW) { - aggregateDecision = PolicyDecision.ASK_USER; + aggregateDecision = PolicyDecision.ASK_USER; + if (!responsibleRule) { + responsibleRule = subResult.rule; } } @@ -237,13 +251,22 @@ export class PolicyEngine { ); if (aggregateDecision === PolicyDecision.ALLOW) { aggregateDecision = PolicyDecision.ASK_USER; + responsibleRule = undefined; } } } - return this.applyNonInteractiveMode(aggregateDecision); + return { + decision: this.applyNonInteractiveMode(aggregateDecision), + // If we stayed at ALLOW, we return the original rule (if any). + // If we downgraded, we return the responsible rule (or undefined if implicit). + rule: aggregateDecision === ruleDecision ? rule : responsibleRule, + }; } - return this.applyNonInteractiveMode(ruleDecision); + return { + decision: this.applyNonInteractiveMode(ruleDecision), + rule, + }; } /** @@ -271,6 +294,18 @@ export class PolicyEngine { `[PolicyEngine.check] toolCall.name: ${toolCall.name}, stringifiedArgs: ${stringifiedArgs}`, ); + // Check for shell commands upfront to handle splitting + let isShellCommand = false; + let command: string | undefined; + let shellDirPath: string | undefined; + + if (toolCall.name && SHELL_TOOL_NAMES.includes(toolCall.name)) { + isShellCommand = true; + const args = toolCall.args as { command?: string; dir_path?: string }; + command = args?.command; + shellDirPath = args?.dir_path; + } + // Find the first matching rule (already sorted by priority) let matchedRule: PolicyRule | undefined; let decision: PolicyDecision | undefined; @@ -289,21 +324,31 @@ export class PolicyEngine { `[PolicyEngine.check] MATCHED rule: toolName=${rule.toolName}, decision=${rule.decision}, priority=${rule.priority}, argsPattern=${rule.argsPattern?.source || 'none'}`, ); - if (toolCall.name && SHELL_TOOL_NAMES.includes(toolCall.name)) { - const args = toolCall.args as { command?: string; dir_path?: string }; - decision = await this.checkShellCommand( - toolCall.name, - args?.command, + if (isShellCommand) { + const shellResult = await this.checkShellCommand( + toolCall.name!, + command, rule.decision, serverName, - args?.dir_path, + shellDirPath, rule.allowRedirection, + rule, ); + decision = shellResult.decision; + if (shellResult.rule) { + matchedRule = shellResult.rule; + break; + } + // If no rule returned (e.g. downgraded to default ASK_USER due to redirection), + // we might still want to blame the matched rule? + // No, test says we should return undefined rule if implicit. + matchedRule = shellResult.rule; + break; } else { decision = this.applyNonInteractiveMode(rule.decision); + matchedRule = rule; + break; } - matchedRule = rule; - break; } } @@ -313,6 +358,22 @@ export class PolicyEngine { `[PolicyEngine.check] NO MATCH - using default decision: ${this.defaultDecision}`, ); decision = this.applyNonInteractiveMode(this.defaultDecision); + + // If it's a shell command and we fell back to default, we MUST still verify subcommands! + // This is critical for security: "git commit && git push" where "git push" is DENY but "git commit" has no rule. + if (isShellCommand && decision !== PolicyDecision.DENY) { + const shellResult = await this.checkShellCommand( + toolCall.name!, + command, + decision, // default decision + serverName, + shellDirPath, + false, // no rule, so no allowRedirection + undefined, // no rule + ); + decision = shellResult.decision; + matchedRule = shellResult.rule; + } } // If decision is not DENY, run safety checkers diff --git a/packages/core/src/policy/shell-safety.test.ts b/packages/core/src/policy/shell-safety.test.ts index bcc9f562d9..340264485e 100644 --- a/packages/core/src/policy/shell-safety.test.ts +++ b/packages/core/src/policy/shell-safety.test.ts @@ -4,29 +4,107 @@ * SPDX-License-Identifier: Apache-2.0 */ -import { describe, it, expect, beforeEach } from 'vitest'; +import { describe, it, expect, beforeEach, vi } from 'vitest'; + +// Mock shell-utils to avoid relying on tree-sitter WASM which is flaky in CI on Windows +vi.mock('../utils/shell-utils.js', async (importOriginal) => { + const actual = + await importOriginal(); + + // Static map of test commands to their expected subcommands + // This mirrors what the real parser would output for these specific strings + const commandMap: Record = { + 'git log': ['git log'], + 'git log --oneline': ['git log --oneline'], + 'git logout': ['git logout'], + 'git log && rm -rf /': ['git log', 'rm -rf /'], + 'git log; rm -rf /': ['git log', 'rm -rf /'], + 'git log || rm -rf /': ['git log', 'rm -rf /'], + 'git log &&& rm -rf /': [], // Simulates parse failure + 'echo $(rm -rf /)': ['echo $(rm -rf /)', 'rm -rf /'], + 'echo $(git log)': ['echo $(git log)', 'git log'], + 'echo `rm -rf /`': ['echo `rm -rf /`', 'rm -rf /'], + 'diff <(git log) <(rm -rf /)': [ + 'diff <(git log) <(rm -rf /)', + 'git log', + 'rm -rf /', + ], + 'tee >(rm -rf /)': ['tee >(rm -rf /)', 'rm -rf /'], + 'git log | rm -rf /': ['git log', 'rm -rf /'], + 'git log --format=$(rm -rf /)': [ + 'git log --format=$(rm -rf /)', + 'rm -rf /', + ], + 'git log && echo $(git log | rm -rf /)': [ + 'git log', + 'echo $(git log | rm -rf /)', + 'git log', + 'rm -rf /', + ], + 'git log && echo $(git log)': ['git log', 'echo $(git log)', 'git log'], + 'git log > /tmp/test': ['git log > /tmp/test'], + 'git log @(Get-Process)': [], // Simulates parse failure (Bash parser vs PowerShell syntax) + 'git commit -m "msg" && git push': ['git commit -m "msg"', 'git push'], + 'git status && unknown_command': ['git status', 'unknown_command'], + 'unknown_command_1 && another_unknown_command': [ + 'unknown_command_1', + 'another_unknown_command', + ], + 'known_ask_command_1 && known_ask_command_2': [ + 'known_ask_command_1', + 'known_ask_command_2', + ], + }; + + return { + ...actual, + initializeShellParsers: vi.fn(), + splitCommands: (command: string) => { + if (Object.prototype.hasOwnProperty.call(commandMap, command)) { + return commandMap[command]; + } + const known = commandMap[command]; + if (known) return known; + // Default fallback for unmatched simple cases in development, but explicit map is better + return [command]; + }, + hasRedirection: (command: string) => + // Simple regex check sufficient for testing the policy engine's handling of the *result* of hasRedirection + /[><]/.test(command), + }; +}); + import { PolicyEngine } from './policy-engine.js'; import { PolicyDecision, ApprovalMode } from './types.js'; import type { FunctionCall } from '@google/genai'; +import { buildArgsPatterns } from './utils.js'; describe('Shell Safety Policy', () => { let policyEngine: PolicyEngine; - beforeEach(() => { - policyEngine = new PolicyEngine({ + // Helper to create a policy engine with a simple command prefix rule + function createPolicyEngineWithPrefix(prefix: string) { + const argsPatterns = buildArgsPatterns(undefined, prefix, undefined); + // Since buildArgsPatterns returns array of patterns (strings), we pick the first one + // and compile it. + const argsPattern = new RegExp(argsPatterns[0]!); + + return new PolicyEngine({ rules: [ { toolName: 'run_shell_command', - // Mimic the regex generated by toml-loader for commandPrefix = ["git log"] - // Regex: "command":"git log(?:[\s"]|$) - argsPattern: /"command":"git log(?:[\s"]|$)/, + argsPattern, decision: PolicyDecision.ALLOW, - priority: 1.01, // Higher priority than default + priority: 1.01, }, ], defaultDecision: PolicyDecision.ASK_USER, approvalMode: ApprovalMode.DEFAULT, }); + } + + beforeEach(() => { + policyEngine = createPolicyEngineWithPrefix('git log'); }); it('SHOULD match "git log" exactly', async () => { @@ -71,6 +149,25 @@ describe('Shell Safety Policy', () => { const result = await policyEngine.check(toolCall, undefined); expect(result.decision).toBe(PolicyDecision.ASK_USER); }); + + it('SHOULD NOT allow "git log; rm -rf /" (semicolon separator)', async () => { + const toolCall: FunctionCall = { + name: 'run_shell_command', + args: { command: 'git log; rm -rf /' }, + }; + const result = await policyEngine.check(toolCall, undefined); + expect(result.decision).toBe(PolicyDecision.ASK_USER); + }); + + it('SHOULD NOT allow "git log || rm -rf /" (OR separator)', async () => { + const toolCall: FunctionCall = { + name: 'run_shell_command', + args: { command: 'git log || rm -rf /' }, + }; + const result = await policyEngine.check(toolCall, undefined); + expect(result.decision).toBe(PolicyDecision.ASK_USER); + }); + it('SHOULD NOT allow "git log &&& rm -rf /" when prefix is "git log" (parse failure)', async () => { const toolCall: FunctionCall = { name: 'run_shell_command', @@ -78,8 +175,341 @@ describe('Shell Safety Policy', () => { }; // Desired behavior: Should fail safe (ASK_USER or DENY) because parsing failed. - // If we let it pass as "single command" that matches prefix, it's dangerous. const result = await policyEngine.check(toolCall, undefined); expect(result.decision).toBe(PolicyDecision.ASK_USER); }); + + it('SHOULD NOT allow command substitution $(rm -rf /)', async () => { + const toolCall: FunctionCall = { + name: 'run_shell_command', + args: { command: 'echo $(rm -rf /)' }, + }; + // `splitCommands` recursively finds nested commands (e.g., `rm` inside `echo $()`). + // The policy engine requires ALL extracted commands to be allowed. + // Since `rm` does not match the allowed prefix, this should result in ASK_USER. + const echoPolicy = createPolicyEngineWithPrefix('echo'); + const result = await echoPolicy.check(toolCall, undefined); + expect(result.decision).toBe(PolicyDecision.ASK_USER); + }); + + it('SHOULD allow command substitution if inner command is ALSO allowed', async () => { + // Both `echo` and `git` allowed. + const argsPatternsEcho = buildArgsPatterns(undefined, 'echo', undefined); + const argsPatternsGit = buildArgsPatterns(undefined, 'git', undefined); // Allow all git + + const policyEngineWithBoth = new PolicyEngine({ + rules: [ + { + toolName: 'run_shell_command', + argsPattern: new RegExp(argsPatternsEcho[0]!), + decision: PolicyDecision.ALLOW, + priority: 2, + }, + { + toolName: 'run_shell_command', + argsPattern: new RegExp(argsPatternsGit[0]!), + decision: PolicyDecision.ALLOW, + priority: 2, + }, + ], + defaultDecision: PolicyDecision.ASK_USER, + }); + + const toolCall: FunctionCall = { + name: 'run_shell_command', + args: { command: 'echo $(git log)' }, + }; + + const result = await policyEngineWithBoth.check(toolCall, undefined); + expect(result.decision).toBe(PolicyDecision.ALLOW); + }); + it('SHOULD NOT allow command substitution with backticks `rm -rf /`', async () => { + const toolCall: FunctionCall = { + name: 'run_shell_command', + args: { command: 'echo `rm -rf /`' }, + }; + const result = await policyEngine.check(toolCall, undefined); + expect(result.decision).toBe(PolicyDecision.ASK_USER); + }); + + it('SHOULD NOT allow process substitution <(rm -rf /)', async () => { + const toolCall: FunctionCall = { + name: 'run_shell_command', + args: { command: 'diff <(git log) <(rm -rf /)' }, + }; + const result = await policyEngine.check(toolCall, undefined); + expect(result.decision).toBe(PolicyDecision.ASK_USER); + }); + + it('SHOULD NOT allow process substitution >(rm -rf /)', async () => { + // Note: >(...) is output substitution, but syntax is similar. + const toolCall: FunctionCall = { + name: 'run_shell_command', + args: { command: 'tee >(rm -rf /)' }, + }; + const result = await policyEngine.check(toolCall, undefined); + expect(result.decision).toBe(PolicyDecision.ASK_USER); + }); + + it('SHOULD NOT allow piped commands "git log | rm -rf /"', async () => { + const toolCall: FunctionCall = { + name: 'run_shell_command', + args: { command: 'git log | rm -rf /' }, + }; + const result = await policyEngine.check(toolCall, undefined); + expect(result.decision).toBe(PolicyDecision.ASK_USER); + }); + + it('SHOULD NOT allow argument injection via --arg=$(rm -rf /)', async () => { + const toolCall: FunctionCall = { + name: 'run_shell_command', + args: { command: 'git log --format=$(rm -rf /)' }, + }; + const result = await policyEngine.check(toolCall, undefined); + expect(result.decision).toBe(PolicyDecision.ASK_USER); + }); + + it('SHOULD NOT allow complex nested commands "git log && echo $(git log | rm -rf /)"', async () => { + const toolCall: FunctionCall = { + name: 'run_shell_command', + args: { command: 'git log && echo $(git log | rm -rf /)' }, + }; + const result = await policyEngine.check(toolCall, undefined); + expect(result.decision).toBe(PolicyDecision.ASK_USER); + }); + + it('SHOULD allow complex allowed commands "git log && echo $(git log)"', async () => { + // Both `echo` and `git` allowed. + const argsPatternsEcho = buildArgsPatterns(undefined, 'echo', undefined); + const argsPatternsGit = buildArgsPatterns(undefined, 'git', undefined); + + const policyEngineWithBoth = new PolicyEngine({ + rules: [ + { + toolName: 'run_shell_command', + argsPattern: new RegExp(argsPatternsEcho[0]!), + decision: PolicyDecision.ALLOW, + priority: 2, + }, + { + toolName: 'run_shell_command', + // Matches "git" at start of *subcommand* + argsPattern: new RegExp(argsPatternsGit[0]!), + decision: PolicyDecision.ALLOW, + priority: 2, + }, + ], + defaultDecision: PolicyDecision.ASK_USER, + }); + + const toolCall: FunctionCall = { + name: 'run_shell_command', + args: { command: 'git log && echo $(git log)' }, + }; + + const result = await policyEngineWithBoth.check(toolCall, undefined); + expect(result.decision).toBe(PolicyDecision.ALLOW); + }); + + it('SHOULD NOT allow generic redirection > /tmp/test', async () => { + // Current logic downgrades ALLOW to ASK_USER for redirections if redirection is not explicitly allowed. + const toolCall: FunctionCall = { + name: 'run_shell_command', + args: { command: 'git log > /tmp/test' }, + }; + const result = await policyEngine.check(toolCall, undefined); + expect(result.decision).toBe(PolicyDecision.ASK_USER); + }); + + it('SHOULD allow generic redirection > /tmp/test if allowRedirection is true', async () => { + // If PolicyRule has allowRedirection: true, it should stay ALLOW + const argsPatternsGitLog = buildArgsPatterns( + undefined, + 'git log', + undefined, + ); + const policyWithRedirection = new PolicyEngine({ + rules: [ + { + toolName: 'run_shell_command', + argsPattern: new RegExp(argsPatternsGitLog[0]!), + decision: PolicyDecision.ALLOW, + priority: 2, + allowRedirection: true, + }, + ], + defaultDecision: PolicyDecision.ASK_USER, + }); + + const toolCall: FunctionCall = { + name: 'run_shell_command', + args: { command: 'git log > /tmp/test' }, + }; + const result = await policyWithRedirection.check(toolCall, undefined); + expect(result.decision).toBe(PolicyDecision.ALLOW); + }); + + it('SHOULD NOT allow PowerShell @(...) usage if it implies code execution', async () => { + // Bash parser fails on PowerShell syntax @(...) (returns empty subcommands). + // The policy engine correctly identifies this as unparseable and falls back to ASK_USER. + const toolCall: FunctionCall = { + name: 'run_shell_command', + args: { command: 'git log @(Get-Process)' }, + }; + const result = await policyEngine.check(toolCall, undefined); + expect(result.decision).toBe(PolicyDecision.ASK_USER); + }); + + it('SHOULD match DENY rule even if nested/chained with unknown command', async () => { + // Scenario: + // git commit -m "..." (Unknown/No Rule -> ASK_USER) + // git push (DENY -> DENY) + // Overall should be DENY. + const argsPatternsPush = buildArgsPatterns( + undefined, + 'git push', + undefined, + ); + + const denyPushPolicy = new PolicyEngine({ + rules: [ + { + toolName: 'run_shell_command', + argsPattern: new RegExp(argsPatternsPush[0]!), + decision: PolicyDecision.DENY, + priority: 2, + }, + ], + defaultDecision: PolicyDecision.ASK_USER, + }); + + const toolCall: FunctionCall = { + name: 'run_shell_command', + args: { command: 'git commit -m "msg" && git push' }, + }; + + const result = await denyPushPolicy.check(toolCall, undefined); + expect(result.decision).toBe(PolicyDecision.DENY); + }); + + it('SHOULD aggregate ALLOW + ASK_USER to ASK_USER and blame the ASK_USER part', async () => { + // Scenario: + // `git status` (ALLOW) && `unknown_command` (ASK_USER by default) + // Expected: ASK_USER, and the matched rule should be related to the unknown_command + const argsPatternsGitStatus = buildArgsPatterns( + undefined, + 'git status', + undefined, + ); + + const policyEngine = new PolicyEngine({ + rules: [ + { + toolName: 'run_shell_command', + argsPattern: new RegExp(argsPatternsGitStatus[0]!), + decision: PolicyDecision.ALLOW, + priority: 2, + name: 'allow_git_status_rule', // Give a name to easily identify + }, + ], + defaultDecision: PolicyDecision.ASK_USER, + }); + + const toolCall: FunctionCall = { + name: 'run_shell_command', + args: { command: 'git status && unknown_command' }, + }; + + const result = await policyEngine.check(toolCall, undefined); + expect(result.decision).toBe(PolicyDecision.ASK_USER); + // Expect the matched rule to be null/undefined since it's the default decision for 'unknown_command' + // or the rule that led to the ASK_USER decision. In this case, it should be the rule for 'unknown_command', which is the default decision. + // The policy engine's `matchedRule` will be the rule that caused the final decision. + // If it's a default ASK_USER, then `result.rule` should be undefined. + expect(result.rule).toBeUndefined(); + }); + + it('SHOULD aggregate ASK_USER (default) + ASK_USER (rule) to ASK_USER and blame the specific ASK_USER rule', async () => { + // Scenario: + // `unknown_command_1` (ASK_USER by default) && `another_unknown_command` (ASK_USER by explicit rule) + // Expected: ASK_USER, and the matched rule should be the explicit ASK_USER rule + const argsPatternsAnotherUnknown = buildArgsPatterns( + undefined, + 'another_unknown_command', + undefined, + ); + + const policyEngine = new PolicyEngine({ + rules: [ + { + toolName: 'run_shell_command', + argsPattern: new RegExp(argsPatternsAnotherUnknown[0]!), + decision: PolicyDecision.ASK_USER, + priority: 2, + name: 'ask_another_unknown_command_rule', + }, + ], + defaultDecision: PolicyDecision.ASK_USER, + }); + + const toolCall: FunctionCall = { + name: 'run_shell_command', + args: { command: 'unknown_command_1 && another_unknown_command' }, + }; + + const result = await policyEngine.check(toolCall, undefined); + expect(result.decision).toBe(PolicyDecision.ASK_USER); + // The first command triggers default ASK_USER (undefined rule). + // The second triggers explicit ASK_USER rule. + // We attribute to the first cause => undefined. + expect(result.rule).toBeUndefined(); + }); + + it('SHOULD aggregate ASK_USER (rule) + ASK_USER (rule) to ASK_USER and blame the first specific ASK_USER rule in subcommands', async () => { + // Scenario: + // `known_ask_command_1` (ASK_USER by explicit rule 1) && `known_ask_command_2` (ASK_USER by explicit rule 2) + // Expected: ASK_USER, and the matched rule should be explicit ASK_USER rule 1. + // The current implementation prioritizes the rule that changes the decision to ASK_USER, if any. + // If multiple rules lead to ASK_USER, it takes the first one. + const argsPatternsAsk1 = buildArgsPatterns( + undefined, + 'known_ask_command_1', + undefined, + ); + const argsPatternsAsk2 = buildArgsPatterns( + undefined, + 'known_ask_command_2', + undefined, + ); + + const policyEngine = new PolicyEngine({ + rules: [ + { + toolName: 'run_shell_command', + argsPattern: new RegExp(argsPatternsAsk1[0]!), + decision: PolicyDecision.ASK_USER, + priority: 2, + name: 'ask_rule_1', + }, + { + toolName: 'run_shell_command', + argsPattern: new RegExp(argsPatternsAsk2[0]!), + decision: PolicyDecision.ASK_USER, + priority: 2, + name: 'ask_rule_2', + }, + ], + defaultDecision: PolicyDecision.ALLOW, // Set default to ALLOW to ensure rules are hit + }); + + const toolCall: FunctionCall = { + name: 'run_shell_command', + args: { command: 'known_ask_command_1 && known_ask_command_2' }, + }; + + const result = await policyEngine.check(toolCall, undefined); + expect(result.decision).toBe(PolicyDecision.ASK_USER); + // Expect the rule that first caused ASK_USER to be blamed + expect(result.rule?.name).toBe('ask_rule_1'); + }); }); diff --git a/packages/core/src/policy/types.ts b/packages/core/src/policy/types.ts index 1d61ec84c5..c2d2802f1e 100644 --- a/packages/core/src/policy/types.ts +++ b/packages/core/src/policy/types.ts @@ -95,6 +95,11 @@ export type SafetyCheckerConfig = | InProcessCheckerConfig; export interface PolicyRule { + /** + * A unique name for the policy rule, useful for identification and debugging. + */ + name?: string; + /** * The name of the tool this rule applies to. * If undefined, the rule applies to all tools.