mirror of
https://github.com/google-gemini/gemini-cli.git
synced 2026-03-12 23:21:27 -07:00
fix(policy): enhance shell command safety and parsing (#15034)
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> Co-authored-by: Abhi <43648792+abhipatel12@users.noreply.github.com>
This commit is contained in:
@@ -151,9 +151,13 @@ export class PolicyEngine {
|
||||
serverName: string | undefined,
|
||||
dir_path: string | undefined,
|
||||
allowRedirection?: boolean,
|
||||
): Promise<PolicyDecision> {
|
||||
rule?: PolicyRule,
|
||||
): Promise<{ decision: PolicyDecision; rule?: PolicyRule }> {
|
||||
if (!command) {
|
||||
return this.applyNonInteractiveMode(ruleDecision);
|
||||
return {
|
||||
decision: this.applyNonInteractiveMode(ruleDecision),
|
||||
rule,
|
||||
};
|
||||
}
|
||||
|
||||
await initializeShellParsers();
|
||||
@@ -163,7 +167,12 @@ export class PolicyEngine {
|
||||
debugLogger.debug(
|
||||
`[PolicyEngine.check] Command parsing failed for: ${command}. Falling back to ASK_USER.`,
|
||||
);
|
||||
return this.applyNonInteractiveMode(PolicyDecision.ASK_USER);
|
||||
// Parsing logic failed, we can't trust it. Force ASK_USER (or DENY).
|
||||
// We don't blame a specific rule here, unless the input rule was stricter.
|
||||
return {
|
||||
decision: this.applyNonInteractiveMode(PolicyDecision.ASK_USER),
|
||||
rule: undefined,
|
||||
};
|
||||
}
|
||||
|
||||
// If there are multiple parts, or if we just want to validate the single part against DENY rules
|
||||
@@ -173,14 +182,16 @@ export class PolicyEngine {
|
||||
);
|
||||
|
||||
if (ruleDecision === PolicyDecision.DENY) {
|
||||
return PolicyDecision.DENY;
|
||||
return { decision: PolicyDecision.DENY, rule };
|
||||
}
|
||||
|
||||
// Start optimistically. If all parts are ALLOW, the whole is ALLOW.
|
||||
// We will downgrade if any part is ASK_USER or DENY.
|
||||
let aggregateDecision = PolicyDecision.ALLOW;
|
||||
let responsibleRule: PolicyRule | undefined;
|
||||
|
||||
for (const subCmd of subCommands) {
|
||||
for (const rawSubCmd of subCommands) {
|
||||
const subCmd = rawSubCmd.trim();
|
||||
// Prevent infinite recursion for the root command
|
||||
if (subCmd === command) {
|
||||
if (!allowRedirection && hasRedirection(subCmd)) {
|
||||
@@ -190,17 +201,16 @@ export class PolicyEngine {
|
||||
// Redirection always downgrades ALLOW to ASK_USER
|
||||
if (aggregateDecision === PolicyDecision.ALLOW) {
|
||||
aggregateDecision = PolicyDecision.ASK_USER;
|
||||
responsibleRule = undefined; // Inherent policy
|
||||
}
|
||||
} else {
|
||||
// If the command is atomic (cannot be split further) and didn't
|
||||
// trigger infinite recursion checks, we must respect the decision
|
||||
// of the rule that triggered this check. If the rule was ASK_USER
|
||||
// (e.g. wildcard), we must downgrade.
|
||||
// Atomic command matching the rule.
|
||||
if (
|
||||
ruleDecision === PolicyDecision.ASK_USER &&
|
||||
aggregateDecision === PolicyDecision.ALLOW
|
||||
) {
|
||||
aggregateDecision = PolicyDecision.ASK_USER;
|
||||
responsibleRule = rule;
|
||||
}
|
||||
}
|
||||
continue;
|
||||
@@ -216,13 +226,17 @@ export class PolicyEngine {
|
||||
|
||||
// If any part is DENIED, the whole command is DENIED
|
||||
if (subDecision === PolicyDecision.DENY) {
|
||||
return PolicyDecision.DENY;
|
||||
return {
|
||||
decision: PolicyDecision.DENY,
|
||||
rule: subResult.rule,
|
||||
};
|
||||
}
|
||||
|
||||
// If any part requires ASK_USER, the whole command requires ASK_USER
|
||||
if (subDecision === PolicyDecision.ASK_USER) {
|
||||
if (aggregateDecision === PolicyDecision.ALLOW) {
|
||||
aggregateDecision = PolicyDecision.ASK_USER;
|
||||
aggregateDecision = PolicyDecision.ASK_USER;
|
||||
if (!responsibleRule) {
|
||||
responsibleRule = subResult.rule;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -237,13 +251,22 @@ export class PolicyEngine {
|
||||
);
|
||||
if (aggregateDecision === PolicyDecision.ALLOW) {
|
||||
aggregateDecision = PolicyDecision.ASK_USER;
|
||||
responsibleRule = undefined;
|
||||
}
|
||||
}
|
||||
}
|
||||
return this.applyNonInteractiveMode(aggregateDecision);
|
||||
return {
|
||||
decision: this.applyNonInteractiveMode(aggregateDecision),
|
||||
// If we stayed at ALLOW, we return the original rule (if any).
|
||||
// If we downgraded, we return the responsible rule (or undefined if implicit).
|
||||
rule: aggregateDecision === ruleDecision ? rule : responsibleRule,
|
||||
};
|
||||
}
|
||||
|
||||
return this.applyNonInteractiveMode(ruleDecision);
|
||||
return {
|
||||
decision: this.applyNonInteractiveMode(ruleDecision),
|
||||
rule,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -271,6 +294,18 @@ export class PolicyEngine {
|
||||
`[PolicyEngine.check] toolCall.name: ${toolCall.name}, stringifiedArgs: ${stringifiedArgs}`,
|
||||
);
|
||||
|
||||
// Check for shell commands upfront to handle splitting
|
||||
let isShellCommand = false;
|
||||
let command: string | undefined;
|
||||
let shellDirPath: string | undefined;
|
||||
|
||||
if (toolCall.name && SHELL_TOOL_NAMES.includes(toolCall.name)) {
|
||||
isShellCommand = true;
|
||||
const args = toolCall.args as { command?: string; dir_path?: string };
|
||||
command = args?.command;
|
||||
shellDirPath = args?.dir_path;
|
||||
}
|
||||
|
||||
// Find the first matching rule (already sorted by priority)
|
||||
let matchedRule: PolicyRule | undefined;
|
||||
let decision: PolicyDecision | undefined;
|
||||
@@ -289,21 +324,31 @@ export class PolicyEngine {
|
||||
`[PolicyEngine.check] MATCHED rule: toolName=${rule.toolName}, decision=${rule.decision}, priority=${rule.priority}, argsPattern=${rule.argsPattern?.source || 'none'}`,
|
||||
);
|
||||
|
||||
if (toolCall.name && SHELL_TOOL_NAMES.includes(toolCall.name)) {
|
||||
const args = toolCall.args as { command?: string; dir_path?: string };
|
||||
decision = await this.checkShellCommand(
|
||||
toolCall.name,
|
||||
args?.command,
|
||||
if (isShellCommand) {
|
||||
const shellResult = await this.checkShellCommand(
|
||||
toolCall.name!,
|
||||
command,
|
||||
rule.decision,
|
||||
serverName,
|
||||
args?.dir_path,
|
||||
shellDirPath,
|
||||
rule.allowRedirection,
|
||||
rule,
|
||||
);
|
||||
decision = shellResult.decision;
|
||||
if (shellResult.rule) {
|
||||
matchedRule = shellResult.rule;
|
||||
break;
|
||||
}
|
||||
// If no rule returned (e.g. downgraded to default ASK_USER due to redirection),
|
||||
// we might still want to blame the matched rule?
|
||||
// No, test says we should return undefined rule if implicit.
|
||||
matchedRule = shellResult.rule;
|
||||
break;
|
||||
} else {
|
||||
decision = this.applyNonInteractiveMode(rule.decision);
|
||||
matchedRule = rule;
|
||||
break;
|
||||
}
|
||||
matchedRule = rule;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -313,6 +358,22 @@ export class PolicyEngine {
|
||||
`[PolicyEngine.check] NO MATCH - using default decision: ${this.defaultDecision}`,
|
||||
);
|
||||
decision = this.applyNonInteractiveMode(this.defaultDecision);
|
||||
|
||||
// If it's a shell command and we fell back to default, we MUST still verify subcommands!
|
||||
// This is critical for security: "git commit && git push" where "git push" is DENY but "git commit" has no rule.
|
||||
if (isShellCommand && decision !== PolicyDecision.DENY) {
|
||||
const shellResult = await this.checkShellCommand(
|
||||
toolCall.name!,
|
||||
command,
|
||||
decision, // default decision
|
||||
serverName,
|
||||
shellDirPath,
|
||||
false, // no rule, so no allowRedirection
|
||||
undefined, // no rule
|
||||
);
|
||||
decision = shellResult.decision;
|
||||
matchedRule = shellResult.rule;
|
||||
}
|
||||
}
|
||||
|
||||
// If decision is not DENY, run safety checkers
|
||||
|
||||
@@ -4,29 +4,107 @@
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
import { describe, it, expect, beforeEach } from 'vitest';
|
||||
import { describe, it, expect, beforeEach, vi } from 'vitest';
|
||||
|
||||
// Mock shell-utils to avoid relying on tree-sitter WASM which is flaky in CI on Windows
|
||||
vi.mock('../utils/shell-utils.js', async (importOriginal) => {
|
||||
const actual =
|
||||
await importOriginal<typeof import('../utils/shell-utils.js')>();
|
||||
|
||||
// Static map of test commands to their expected subcommands
|
||||
// This mirrors what the real parser would output for these specific strings
|
||||
const commandMap: Record<string, string[]> = {
|
||||
'git log': ['git log'],
|
||||
'git log --oneline': ['git log --oneline'],
|
||||
'git logout': ['git logout'],
|
||||
'git log && rm -rf /': ['git log', 'rm -rf /'],
|
||||
'git log; rm -rf /': ['git log', 'rm -rf /'],
|
||||
'git log || rm -rf /': ['git log', 'rm -rf /'],
|
||||
'git log &&& rm -rf /': [], // Simulates parse failure
|
||||
'echo $(rm -rf /)': ['echo $(rm -rf /)', 'rm -rf /'],
|
||||
'echo $(git log)': ['echo $(git log)', 'git log'],
|
||||
'echo `rm -rf /`': ['echo `rm -rf /`', 'rm -rf /'],
|
||||
'diff <(git log) <(rm -rf /)': [
|
||||
'diff <(git log) <(rm -rf /)',
|
||||
'git log',
|
||||
'rm -rf /',
|
||||
],
|
||||
'tee >(rm -rf /)': ['tee >(rm -rf /)', 'rm -rf /'],
|
||||
'git log | rm -rf /': ['git log', 'rm -rf /'],
|
||||
'git log --format=$(rm -rf /)': [
|
||||
'git log --format=$(rm -rf /)',
|
||||
'rm -rf /',
|
||||
],
|
||||
'git log && echo $(git log | rm -rf /)': [
|
||||
'git log',
|
||||
'echo $(git log | rm -rf /)',
|
||||
'git log',
|
||||
'rm -rf /',
|
||||
],
|
||||
'git log && echo $(git log)': ['git log', 'echo $(git log)', 'git log'],
|
||||
'git log > /tmp/test': ['git log > /tmp/test'],
|
||||
'git log @(Get-Process)': [], // Simulates parse failure (Bash parser vs PowerShell syntax)
|
||||
'git commit -m "msg" && git push': ['git commit -m "msg"', 'git push'],
|
||||
'git status && unknown_command': ['git status', 'unknown_command'],
|
||||
'unknown_command_1 && another_unknown_command': [
|
||||
'unknown_command_1',
|
||||
'another_unknown_command',
|
||||
],
|
||||
'known_ask_command_1 && known_ask_command_2': [
|
||||
'known_ask_command_1',
|
||||
'known_ask_command_2',
|
||||
],
|
||||
};
|
||||
|
||||
return {
|
||||
...actual,
|
||||
initializeShellParsers: vi.fn(),
|
||||
splitCommands: (command: string) => {
|
||||
if (Object.prototype.hasOwnProperty.call(commandMap, command)) {
|
||||
return commandMap[command];
|
||||
}
|
||||
const known = commandMap[command];
|
||||
if (known) return known;
|
||||
// Default fallback for unmatched simple cases in development, but explicit map is better
|
||||
return [command];
|
||||
},
|
||||
hasRedirection: (command: string) =>
|
||||
// Simple regex check sufficient for testing the policy engine's handling of the *result* of hasRedirection
|
||||
/[><]/.test(command),
|
||||
};
|
||||
});
|
||||
|
||||
import { PolicyEngine } from './policy-engine.js';
|
||||
import { PolicyDecision, ApprovalMode } from './types.js';
|
||||
import type { FunctionCall } from '@google/genai';
|
||||
import { buildArgsPatterns } from './utils.js';
|
||||
|
||||
describe('Shell Safety Policy', () => {
|
||||
let policyEngine: PolicyEngine;
|
||||
|
||||
beforeEach(() => {
|
||||
policyEngine = new PolicyEngine({
|
||||
// Helper to create a policy engine with a simple command prefix rule
|
||||
function createPolicyEngineWithPrefix(prefix: string) {
|
||||
const argsPatterns = buildArgsPatterns(undefined, prefix, undefined);
|
||||
// Since buildArgsPatterns returns array of patterns (strings), we pick the first one
|
||||
// and compile it.
|
||||
const argsPattern = new RegExp(argsPatterns[0]!);
|
||||
|
||||
return new PolicyEngine({
|
||||
rules: [
|
||||
{
|
||||
toolName: 'run_shell_command',
|
||||
// Mimic the regex generated by toml-loader for commandPrefix = ["git log"]
|
||||
// Regex: "command":"git log(?:[\s"]|$)
|
||||
argsPattern: /"command":"git log(?:[\s"]|$)/,
|
||||
argsPattern,
|
||||
decision: PolicyDecision.ALLOW,
|
||||
priority: 1.01, // Higher priority than default
|
||||
priority: 1.01,
|
||||
},
|
||||
],
|
||||
defaultDecision: PolicyDecision.ASK_USER,
|
||||
approvalMode: ApprovalMode.DEFAULT,
|
||||
});
|
||||
}
|
||||
|
||||
beforeEach(() => {
|
||||
policyEngine = createPolicyEngineWithPrefix('git log');
|
||||
});
|
||||
|
||||
it('SHOULD match "git log" exactly', async () => {
|
||||
@@ -71,6 +149,25 @@ describe('Shell Safety Policy', () => {
|
||||
const result = await policyEngine.check(toolCall, undefined);
|
||||
expect(result.decision).toBe(PolicyDecision.ASK_USER);
|
||||
});
|
||||
|
||||
it('SHOULD NOT allow "git log; rm -rf /" (semicolon separator)', async () => {
|
||||
const toolCall: FunctionCall = {
|
||||
name: 'run_shell_command',
|
||||
args: { command: 'git log; rm -rf /' },
|
||||
};
|
||||
const result = await policyEngine.check(toolCall, undefined);
|
||||
expect(result.decision).toBe(PolicyDecision.ASK_USER);
|
||||
});
|
||||
|
||||
it('SHOULD NOT allow "git log || rm -rf /" (OR separator)', async () => {
|
||||
const toolCall: FunctionCall = {
|
||||
name: 'run_shell_command',
|
||||
args: { command: 'git log || rm -rf /' },
|
||||
};
|
||||
const result = await policyEngine.check(toolCall, undefined);
|
||||
expect(result.decision).toBe(PolicyDecision.ASK_USER);
|
||||
});
|
||||
|
||||
it('SHOULD NOT allow "git log &&& rm -rf /" when prefix is "git log" (parse failure)', async () => {
|
||||
const toolCall: FunctionCall = {
|
||||
name: 'run_shell_command',
|
||||
@@ -78,8 +175,341 @@ describe('Shell Safety Policy', () => {
|
||||
};
|
||||
|
||||
// Desired behavior: Should fail safe (ASK_USER or DENY) because parsing failed.
|
||||
// If we let it pass as "single command" that matches prefix, it's dangerous.
|
||||
const result = await policyEngine.check(toolCall, undefined);
|
||||
expect(result.decision).toBe(PolicyDecision.ASK_USER);
|
||||
});
|
||||
|
||||
it('SHOULD NOT allow command substitution $(rm -rf /)', async () => {
|
||||
const toolCall: FunctionCall = {
|
||||
name: 'run_shell_command',
|
||||
args: { command: 'echo $(rm -rf /)' },
|
||||
};
|
||||
// `splitCommands` recursively finds nested commands (e.g., `rm` inside `echo $()`).
|
||||
// The policy engine requires ALL extracted commands to be allowed.
|
||||
// Since `rm` does not match the allowed prefix, this should result in ASK_USER.
|
||||
const echoPolicy = createPolicyEngineWithPrefix('echo');
|
||||
const result = await echoPolicy.check(toolCall, undefined);
|
||||
expect(result.decision).toBe(PolicyDecision.ASK_USER);
|
||||
});
|
||||
|
||||
it('SHOULD allow command substitution if inner command is ALSO allowed', async () => {
|
||||
// Both `echo` and `git` allowed.
|
||||
const argsPatternsEcho = buildArgsPatterns(undefined, 'echo', undefined);
|
||||
const argsPatternsGit = buildArgsPatterns(undefined, 'git', undefined); // Allow all git
|
||||
|
||||
const policyEngineWithBoth = new PolicyEngine({
|
||||
rules: [
|
||||
{
|
||||
toolName: 'run_shell_command',
|
||||
argsPattern: new RegExp(argsPatternsEcho[0]!),
|
||||
decision: PolicyDecision.ALLOW,
|
||||
priority: 2,
|
||||
},
|
||||
{
|
||||
toolName: 'run_shell_command',
|
||||
argsPattern: new RegExp(argsPatternsGit[0]!),
|
||||
decision: PolicyDecision.ALLOW,
|
||||
priority: 2,
|
||||
},
|
||||
],
|
||||
defaultDecision: PolicyDecision.ASK_USER,
|
||||
});
|
||||
|
||||
const toolCall: FunctionCall = {
|
||||
name: 'run_shell_command',
|
||||
args: { command: 'echo $(git log)' },
|
||||
};
|
||||
|
||||
const result = await policyEngineWithBoth.check(toolCall, undefined);
|
||||
expect(result.decision).toBe(PolicyDecision.ALLOW);
|
||||
});
|
||||
it('SHOULD NOT allow command substitution with backticks `rm -rf /`', async () => {
|
||||
const toolCall: FunctionCall = {
|
||||
name: 'run_shell_command',
|
||||
args: { command: 'echo `rm -rf /`' },
|
||||
};
|
||||
const result = await policyEngine.check(toolCall, undefined);
|
||||
expect(result.decision).toBe(PolicyDecision.ASK_USER);
|
||||
});
|
||||
|
||||
it('SHOULD NOT allow process substitution <(rm -rf /)', async () => {
|
||||
const toolCall: FunctionCall = {
|
||||
name: 'run_shell_command',
|
||||
args: { command: 'diff <(git log) <(rm -rf /)' },
|
||||
};
|
||||
const result = await policyEngine.check(toolCall, undefined);
|
||||
expect(result.decision).toBe(PolicyDecision.ASK_USER);
|
||||
});
|
||||
|
||||
it('SHOULD NOT allow process substitution >(rm -rf /)', async () => {
|
||||
// Note: >(...) is output substitution, but syntax is similar.
|
||||
const toolCall: FunctionCall = {
|
||||
name: 'run_shell_command',
|
||||
args: { command: 'tee >(rm -rf /)' },
|
||||
};
|
||||
const result = await policyEngine.check(toolCall, undefined);
|
||||
expect(result.decision).toBe(PolicyDecision.ASK_USER);
|
||||
});
|
||||
|
||||
it('SHOULD NOT allow piped commands "git log | rm -rf /"', async () => {
|
||||
const toolCall: FunctionCall = {
|
||||
name: 'run_shell_command',
|
||||
args: { command: 'git log | rm -rf /' },
|
||||
};
|
||||
const result = await policyEngine.check(toolCall, undefined);
|
||||
expect(result.decision).toBe(PolicyDecision.ASK_USER);
|
||||
});
|
||||
|
||||
it('SHOULD NOT allow argument injection via --arg=$(rm -rf /)', async () => {
|
||||
const toolCall: FunctionCall = {
|
||||
name: 'run_shell_command',
|
||||
args: { command: 'git log --format=$(rm -rf /)' },
|
||||
};
|
||||
const result = await policyEngine.check(toolCall, undefined);
|
||||
expect(result.decision).toBe(PolicyDecision.ASK_USER);
|
||||
});
|
||||
|
||||
it('SHOULD NOT allow complex nested commands "git log && echo $(git log | rm -rf /)"', async () => {
|
||||
const toolCall: FunctionCall = {
|
||||
name: 'run_shell_command',
|
||||
args: { command: 'git log && echo $(git log | rm -rf /)' },
|
||||
};
|
||||
const result = await policyEngine.check(toolCall, undefined);
|
||||
expect(result.decision).toBe(PolicyDecision.ASK_USER);
|
||||
});
|
||||
|
||||
it('SHOULD allow complex allowed commands "git log && echo $(git log)"', async () => {
|
||||
// Both `echo` and `git` allowed.
|
||||
const argsPatternsEcho = buildArgsPatterns(undefined, 'echo', undefined);
|
||||
const argsPatternsGit = buildArgsPatterns(undefined, 'git', undefined);
|
||||
|
||||
const policyEngineWithBoth = new PolicyEngine({
|
||||
rules: [
|
||||
{
|
||||
toolName: 'run_shell_command',
|
||||
argsPattern: new RegExp(argsPatternsEcho[0]!),
|
||||
decision: PolicyDecision.ALLOW,
|
||||
priority: 2,
|
||||
},
|
||||
{
|
||||
toolName: 'run_shell_command',
|
||||
// Matches "git" at start of *subcommand*
|
||||
argsPattern: new RegExp(argsPatternsGit[0]!),
|
||||
decision: PolicyDecision.ALLOW,
|
||||
priority: 2,
|
||||
},
|
||||
],
|
||||
defaultDecision: PolicyDecision.ASK_USER,
|
||||
});
|
||||
|
||||
const toolCall: FunctionCall = {
|
||||
name: 'run_shell_command',
|
||||
args: { command: 'git log && echo $(git log)' },
|
||||
};
|
||||
|
||||
const result = await policyEngineWithBoth.check(toolCall, undefined);
|
||||
expect(result.decision).toBe(PolicyDecision.ALLOW);
|
||||
});
|
||||
|
||||
it('SHOULD NOT allow generic redirection > /tmp/test', async () => {
|
||||
// Current logic downgrades ALLOW to ASK_USER for redirections if redirection is not explicitly allowed.
|
||||
const toolCall: FunctionCall = {
|
||||
name: 'run_shell_command',
|
||||
args: { command: 'git log > /tmp/test' },
|
||||
};
|
||||
const result = await policyEngine.check(toolCall, undefined);
|
||||
expect(result.decision).toBe(PolicyDecision.ASK_USER);
|
||||
});
|
||||
|
||||
it('SHOULD allow generic redirection > /tmp/test if allowRedirection is true', async () => {
|
||||
// If PolicyRule has allowRedirection: true, it should stay ALLOW
|
||||
const argsPatternsGitLog = buildArgsPatterns(
|
||||
undefined,
|
||||
'git log',
|
||||
undefined,
|
||||
);
|
||||
const policyWithRedirection = new PolicyEngine({
|
||||
rules: [
|
||||
{
|
||||
toolName: 'run_shell_command',
|
||||
argsPattern: new RegExp(argsPatternsGitLog[0]!),
|
||||
decision: PolicyDecision.ALLOW,
|
||||
priority: 2,
|
||||
allowRedirection: true,
|
||||
},
|
||||
],
|
||||
defaultDecision: PolicyDecision.ASK_USER,
|
||||
});
|
||||
|
||||
const toolCall: FunctionCall = {
|
||||
name: 'run_shell_command',
|
||||
args: { command: 'git log > /tmp/test' },
|
||||
};
|
||||
const result = await policyWithRedirection.check(toolCall, undefined);
|
||||
expect(result.decision).toBe(PolicyDecision.ALLOW);
|
||||
});
|
||||
|
||||
it('SHOULD NOT allow PowerShell @(...) usage if it implies code execution', async () => {
|
||||
// Bash parser fails on PowerShell syntax @(...) (returns empty subcommands).
|
||||
// The policy engine correctly identifies this as unparseable and falls back to ASK_USER.
|
||||
const toolCall: FunctionCall = {
|
||||
name: 'run_shell_command',
|
||||
args: { command: 'git log @(Get-Process)' },
|
||||
};
|
||||
const result = await policyEngine.check(toolCall, undefined);
|
||||
expect(result.decision).toBe(PolicyDecision.ASK_USER);
|
||||
});
|
||||
|
||||
it('SHOULD match DENY rule even if nested/chained with unknown command', async () => {
|
||||
// Scenario:
|
||||
// git commit -m "..." (Unknown/No Rule -> ASK_USER)
|
||||
// git push (DENY -> DENY)
|
||||
// Overall should be DENY.
|
||||
const argsPatternsPush = buildArgsPatterns(
|
||||
undefined,
|
||||
'git push',
|
||||
undefined,
|
||||
);
|
||||
|
||||
const denyPushPolicy = new PolicyEngine({
|
||||
rules: [
|
||||
{
|
||||
toolName: 'run_shell_command',
|
||||
argsPattern: new RegExp(argsPatternsPush[0]!),
|
||||
decision: PolicyDecision.DENY,
|
||||
priority: 2,
|
||||
},
|
||||
],
|
||||
defaultDecision: PolicyDecision.ASK_USER,
|
||||
});
|
||||
|
||||
const toolCall: FunctionCall = {
|
||||
name: 'run_shell_command',
|
||||
args: { command: 'git commit -m "msg" && git push' },
|
||||
};
|
||||
|
||||
const result = await denyPushPolicy.check(toolCall, undefined);
|
||||
expect(result.decision).toBe(PolicyDecision.DENY);
|
||||
});
|
||||
|
||||
it('SHOULD aggregate ALLOW + ASK_USER to ASK_USER and blame the ASK_USER part', async () => {
|
||||
// Scenario:
|
||||
// `git status` (ALLOW) && `unknown_command` (ASK_USER by default)
|
||||
// Expected: ASK_USER, and the matched rule should be related to the unknown_command
|
||||
const argsPatternsGitStatus = buildArgsPatterns(
|
||||
undefined,
|
||||
'git status',
|
||||
undefined,
|
||||
);
|
||||
|
||||
const policyEngine = new PolicyEngine({
|
||||
rules: [
|
||||
{
|
||||
toolName: 'run_shell_command',
|
||||
argsPattern: new RegExp(argsPatternsGitStatus[0]!),
|
||||
decision: PolicyDecision.ALLOW,
|
||||
priority: 2,
|
||||
name: 'allow_git_status_rule', // Give a name to easily identify
|
||||
},
|
||||
],
|
||||
defaultDecision: PolicyDecision.ASK_USER,
|
||||
});
|
||||
|
||||
const toolCall: FunctionCall = {
|
||||
name: 'run_shell_command',
|
||||
args: { command: 'git status && unknown_command' },
|
||||
};
|
||||
|
||||
const result = await policyEngine.check(toolCall, undefined);
|
||||
expect(result.decision).toBe(PolicyDecision.ASK_USER);
|
||||
// Expect the matched rule to be null/undefined since it's the default decision for 'unknown_command'
|
||||
// or the rule that led to the ASK_USER decision. In this case, it should be the rule for 'unknown_command', which is the default decision.
|
||||
// The policy engine's `matchedRule` will be the rule that caused the final decision.
|
||||
// If it's a default ASK_USER, then `result.rule` should be undefined.
|
||||
expect(result.rule).toBeUndefined();
|
||||
});
|
||||
|
||||
it('SHOULD aggregate ASK_USER (default) + ASK_USER (rule) to ASK_USER and blame the specific ASK_USER rule', async () => {
|
||||
// Scenario:
|
||||
// `unknown_command_1` (ASK_USER by default) && `another_unknown_command` (ASK_USER by explicit rule)
|
||||
// Expected: ASK_USER, and the matched rule should be the explicit ASK_USER rule
|
||||
const argsPatternsAnotherUnknown = buildArgsPatterns(
|
||||
undefined,
|
||||
'another_unknown_command',
|
||||
undefined,
|
||||
);
|
||||
|
||||
const policyEngine = new PolicyEngine({
|
||||
rules: [
|
||||
{
|
||||
toolName: 'run_shell_command',
|
||||
argsPattern: new RegExp(argsPatternsAnotherUnknown[0]!),
|
||||
decision: PolicyDecision.ASK_USER,
|
||||
priority: 2,
|
||||
name: 'ask_another_unknown_command_rule',
|
||||
},
|
||||
],
|
||||
defaultDecision: PolicyDecision.ASK_USER,
|
||||
});
|
||||
|
||||
const toolCall: FunctionCall = {
|
||||
name: 'run_shell_command',
|
||||
args: { command: 'unknown_command_1 && another_unknown_command' },
|
||||
};
|
||||
|
||||
const result = await policyEngine.check(toolCall, undefined);
|
||||
expect(result.decision).toBe(PolicyDecision.ASK_USER);
|
||||
// The first command triggers default ASK_USER (undefined rule).
|
||||
// The second triggers explicit ASK_USER rule.
|
||||
// We attribute to the first cause => undefined.
|
||||
expect(result.rule).toBeUndefined();
|
||||
});
|
||||
|
||||
it('SHOULD aggregate ASK_USER (rule) + ASK_USER (rule) to ASK_USER and blame the first specific ASK_USER rule in subcommands', async () => {
|
||||
// Scenario:
|
||||
// `known_ask_command_1` (ASK_USER by explicit rule 1) && `known_ask_command_2` (ASK_USER by explicit rule 2)
|
||||
// Expected: ASK_USER, and the matched rule should be explicit ASK_USER rule 1.
|
||||
// The current implementation prioritizes the rule that changes the decision to ASK_USER, if any.
|
||||
// If multiple rules lead to ASK_USER, it takes the first one.
|
||||
const argsPatternsAsk1 = buildArgsPatterns(
|
||||
undefined,
|
||||
'known_ask_command_1',
|
||||
undefined,
|
||||
);
|
||||
const argsPatternsAsk2 = buildArgsPatterns(
|
||||
undefined,
|
||||
'known_ask_command_2',
|
||||
undefined,
|
||||
);
|
||||
|
||||
const policyEngine = new PolicyEngine({
|
||||
rules: [
|
||||
{
|
||||
toolName: 'run_shell_command',
|
||||
argsPattern: new RegExp(argsPatternsAsk1[0]!),
|
||||
decision: PolicyDecision.ASK_USER,
|
||||
priority: 2,
|
||||
name: 'ask_rule_1',
|
||||
},
|
||||
{
|
||||
toolName: 'run_shell_command',
|
||||
argsPattern: new RegExp(argsPatternsAsk2[0]!),
|
||||
decision: PolicyDecision.ASK_USER,
|
||||
priority: 2,
|
||||
name: 'ask_rule_2',
|
||||
},
|
||||
],
|
||||
defaultDecision: PolicyDecision.ALLOW, // Set default to ALLOW to ensure rules are hit
|
||||
});
|
||||
|
||||
const toolCall: FunctionCall = {
|
||||
name: 'run_shell_command',
|
||||
args: { command: 'known_ask_command_1 && known_ask_command_2' },
|
||||
};
|
||||
|
||||
const result = await policyEngine.check(toolCall, undefined);
|
||||
expect(result.decision).toBe(PolicyDecision.ASK_USER);
|
||||
// Expect the rule that first caused ASK_USER to be blamed
|
||||
expect(result.rule?.name).toBe('ask_rule_1');
|
||||
});
|
||||
});
|
||||
|
||||
@@ -95,6 +95,11 @@ export type SafetyCheckerConfig =
|
||||
| InProcessCheckerConfig;
|
||||
|
||||
export interface PolicyRule {
|
||||
/**
|
||||
* A unique name for the policy rule, useful for identification and debugging.
|
||||
*/
|
||||
name?: string;
|
||||
|
||||
/**
|
||||
* The name of the tool this rule applies to.
|
||||
* If undefined, the rule applies to all tools.
|
||||
|
||||
Reference in New Issue
Block a user