mirror of
https://github.com/google-gemini/gemini-cli.git
synced 2026-05-13 05:12:55 -07:00
feat(core): enhance shell command validation and add core tools allowlist (#25720)
Co-authored-by: David Pierce <davidapierce@google.com> Co-authored-by: Keith Schaab <keithsc@google.com> Co-authored-by: Keith Schaab <keith.schaab@gmail.com> Co-authored-by: Emily Hedlund <ehedlund@google.com>
This commit is contained in:
@@ -1467,6 +1467,12 @@ their corresponding top-level category object in your `settings.json` file.
|
|||||||
- **Default:** `undefined`
|
- **Default:** `undefined`
|
||||||
- **Requires restart:** Yes
|
- **Requires restart:** Yes
|
||||||
|
|
||||||
|
- **`tools.confirmationRequired`** (array):
|
||||||
|
- **Description:** Tool names that always require user confirmation. Takes
|
||||||
|
precedence over allowed tools and core tool allowlists.
|
||||||
|
- **Default:** `undefined`
|
||||||
|
- **Requires restart:** Yes
|
||||||
|
|
||||||
- **`tools.exclude`** (array):
|
- **`tools.exclude`** (array):
|
||||||
- **Description:** Tool names to exclude from discovery.
|
- **Description:** Tool names to exclude from discovery.
|
||||||
- **Default:** `undefined`
|
- **Default:** `undefined`
|
||||||
|
|||||||
@@ -18,6 +18,11 @@ describe('save_memory', () => {
|
|||||||
suiteName: 'default',
|
suiteName: 'default',
|
||||||
suiteType: 'behavioral',
|
suiteType: 'behavioral',
|
||||||
name: rememberingFavoriteColor,
|
name: rememberingFavoriteColor,
|
||||||
|
params: {
|
||||||
|
settings: {
|
||||||
|
experimental: { memoryV2: false },
|
||||||
|
},
|
||||||
|
},
|
||||||
|
|
||||||
prompt: `remember that my favorite color is blue.
|
prompt: `remember that my favorite color is blue.
|
||||||
|
|
||||||
@@ -40,6 +45,11 @@ describe('save_memory', () => {
|
|||||||
suiteName: 'default',
|
suiteName: 'default',
|
||||||
suiteType: 'behavioral',
|
suiteType: 'behavioral',
|
||||||
name: rememberingCommandRestrictions,
|
name: rememberingCommandRestrictions,
|
||||||
|
params: {
|
||||||
|
settings: {
|
||||||
|
experimental: { memoryV2: false },
|
||||||
|
},
|
||||||
|
},
|
||||||
|
|
||||||
prompt: `I don't want you to ever run npm commands.`,
|
prompt: `I don't want you to ever run npm commands.`,
|
||||||
assert: async (rig, result) => {
|
assert: async (rig, result) => {
|
||||||
@@ -61,6 +71,11 @@ describe('save_memory', () => {
|
|||||||
suiteName: 'default',
|
suiteName: 'default',
|
||||||
suiteType: 'behavioral',
|
suiteType: 'behavioral',
|
||||||
name: rememberingWorkflow,
|
name: rememberingWorkflow,
|
||||||
|
params: {
|
||||||
|
settings: {
|
||||||
|
experimental: { memoryV2: false },
|
||||||
|
},
|
||||||
|
},
|
||||||
|
|
||||||
prompt: `I want you to always lint after building.`,
|
prompt: `I want you to always lint after building.`,
|
||||||
assert: async (rig, result) => {
|
assert: async (rig, result) => {
|
||||||
@@ -83,6 +98,11 @@ describe('save_memory', () => {
|
|||||||
suiteName: 'default',
|
suiteName: 'default',
|
||||||
suiteType: 'behavioral',
|
suiteType: 'behavioral',
|
||||||
name: ignoringTemporaryInformation,
|
name: ignoringTemporaryInformation,
|
||||||
|
params: {
|
||||||
|
settings: {
|
||||||
|
experimental: { memoryV2: false },
|
||||||
|
},
|
||||||
|
},
|
||||||
|
|
||||||
prompt: `I'm going to get a coffee.`,
|
prompt: `I'm going to get a coffee.`,
|
||||||
assert: async (rig, result) => {
|
assert: async (rig, result) => {
|
||||||
@@ -108,6 +128,11 @@ describe('save_memory', () => {
|
|||||||
suiteName: 'default',
|
suiteName: 'default',
|
||||||
suiteType: 'behavioral',
|
suiteType: 'behavioral',
|
||||||
name: rememberingPetName,
|
name: rememberingPetName,
|
||||||
|
params: {
|
||||||
|
settings: {
|
||||||
|
experimental: { memoryV2: false },
|
||||||
|
},
|
||||||
|
},
|
||||||
|
|
||||||
prompt: `Please remember that my dog's name is Buddy.`,
|
prompt: `Please remember that my dog's name is Buddy.`,
|
||||||
assert: async (rig, result) => {
|
assert: async (rig, result) => {
|
||||||
@@ -129,6 +154,11 @@ describe('save_memory', () => {
|
|||||||
suiteName: 'default',
|
suiteName: 'default',
|
||||||
suiteType: 'behavioral',
|
suiteType: 'behavioral',
|
||||||
name: rememberingCommandAlias,
|
name: rememberingCommandAlias,
|
||||||
|
params: {
|
||||||
|
settings: {
|
||||||
|
experimental: { memoryV2: false },
|
||||||
|
},
|
||||||
|
},
|
||||||
|
|
||||||
prompt: `When I say 'start server', you should run 'npm run dev'.`,
|
prompt: `When I say 'start server', you should run 'npm run dev'.`,
|
||||||
assert: async (rig, result) => {
|
assert: async (rig, result) => {
|
||||||
@@ -151,6 +181,11 @@ describe('save_memory', () => {
|
|||||||
suiteName: 'default',
|
suiteName: 'default',
|
||||||
suiteType: 'behavioral',
|
suiteType: 'behavioral',
|
||||||
name: savingDbSchemaLocationAsProjectMemory,
|
name: savingDbSchemaLocationAsProjectMemory,
|
||||||
|
params: {
|
||||||
|
settings: {
|
||||||
|
experimental: { memoryV2: false },
|
||||||
|
},
|
||||||
|
},
|
||||||
prompt: `The database schema for this workspace is located in \`db/schema.sql\`.`,
|
prompt: `The database schema for this workspace is located in \`db/schema.sql\`.`,
|
||||||
assert: async (rig, result) => {
|
assert: async (rig, result) => {
|
||||||
const wasToolCalled = await rig.waitForToolCall(
|
const wasToolCalled = await rig.waitForToolCall(
|
||||||
@@ -180,6 +215,11 @@ describe('save_memory', () => {
|
|||||||
suiteName: 'default',
|
suiteName: 'default',
|
||||||
suiteType: 'behavioral',
|
suiteType: 'behavioral',
|
||||||
name: rememberingCodingStyle,
|
name: rememberingCodingStyle,
|
||||||
|
params: {
|
||||||
|
settings: {
|
||||||
|
experimental: { memoryV2: false },
|
||||||
|
},
|
||||||
|
},
|
||||||
|
|
||||||
prompt: `I prefer to use tabs instead of spaces for indentation.`,
|
prompt: `I prefer to use tabs instead of spaces for indentation.`,
|
||||||
assert: async (rig, result) => {
|
assert: async (rig, result) => {
|
||||||
@@ -202,6 +242,11 @@ describe('save_memory', () => {
|
|||||||
suiteName: 'default',
|
suiteName: 'default',
|
||||||
suiteType: 'behavioral',
|
suiteType: 'behavioral',
|
||||||
name: savingBuildArtifactLocationAsProjectMemory,
|
name: savingBuildArtifactLocationAsProjectMemory,
|
||||||
|
params: {
|
||||||
|
settings: {
|
||||||
|
experimental: { memoryV2: false },
|
||||||
|
},
|
||||||
|
},
|
||||||
prompt: `In this workspace, build artifacts are stored in the \`dist/artifacts\` directory.`,
|
prompt: `In this workspace, build artifacts are stored in the \`dist/artifacts\` directory.`,
|
||||||
assert: async (rig, result) => {
|
assert: async (rig, result) => {
|
||||||
const wasToolCalled = await rig.waitForToolCall(
|
const wasToolCalled = await rig.waitForToolCall(
|
||||||
@@ -231,6 +276,11 @@ describe('save_memory', () => {
|
|||||||
suiteName: 'default',
|
suiteName: 'default',
|
||||||
suiteType: 'behavioral',
|
suiteType: 'behavioral',
|
||||||
name: savingMainEntryPointAsProjectMemory,
|
name: savingMainEntryPointAsProjectMemory,
|
||||||
|
params: {
|
||||||
|
settings: {
|
||||||
|
experimental: { memoryV2: false },
|
||||||
|
},
|
||||||
|
},
|
||||||
prompt: `The main entry point for this workspace is \`src/index.js\`.`,
|
prompt: `The main entry point for this workspace is \`src/index.js\`.`,
|
||||||
assert: async (rig, result) => {
|
assert: async (rig, result) => {
|
||||||
const wasToolCalled = await rig.waitForToolCall(
|
const wasToolCalled = await rig.waitForToolCall(
|
||||||
@@ -259,6 +309,11 @@ describe('save_memory', () => {
|
|||||||
suiteName: 'default',
|
suiteName: 'default',
|
||||||
suiteType: 'behavioral',
|
suiteType: 'behavioral',
|
||||||
name: rememberingBirthday,
|
name: rememberingBirthday,
|
||||||
|
params: {
|
||||||
|
settings: {
|
||||||
|
experimental: { memoryV2: false },
|
||||||
|
},
|
||||||
|
},
|
||||||
|
|
||||||
prompt: `My birthday is on June 15th.`,
|
prompt: `My birthday is on June 15th.`,
|
||||||
assert: async (rig, result) => {
|
assert: async (rig, result) => {
|
||||||
|
|||||||
@@ -172,6 +172,7 @@ export async function internalEvalTest(evalCase: EvalCase) {
|
|||||||
timeout: evalCase.timeout,
|
timeout: evalCase.timeout,
|
||||||
env: {
|
env: {
|
||||||
GEMINI_CLI_ACTIVITY_LOG_TARGET: activityLogFile,
|
GEMINI_CLI_ACTIVITY_LOG_TARGET: activityLogFile,
|
||||||
|
GEMINI_CLI_TRUST_WORKSPACE: 'true',
|
||||||
},
|
},
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|||||||
@@ -1667,6 +1667,19 @@ const SETTINGS_SCHEMA = {
|
|||||||
showInDialog: false,
|
showInDialog: false,
|
||||||
items: { type: 'string' },
|
items: { type: 'string' },
|
||||||
},
|
},
|
||||||
|
confirmationRequired: {
|
||||||
|
type: 'array',
|
||||||
|
label: 'Confirmation Required',
|
||||||
|
category: 'Advanced',
|
||||||
|
requiresRestart: true,
|
||||||
|
default: undefined as string[] | undefined,
|
||||||
|
description: oneLine`
|
||||||
|
Tool names that always require user confirmation.
|
||||||
|
Takes precedence over allowed tools and core tool allowlists.
|
||||||
|
`,
|
||||||
|
showInDialog: false,
|
||||||
|
items: { type: 'string' },
|
||||||
|
},
|
||||||
exclude: {
|
exclude: {
|
||||||
type: 'array',
|
type: 'array',
|
||||||
label: 'Exclude Tools',
|
label: 'Exclude Tools',
|
||||||
|
|||||||
@@ -168,6 +168,13 @@ exports[`InputPrompt > mouse interaction > should toggle paste expansion on doub
|
|||||||
"
|
"
|
||||||
`;
|
`;
|
||||||
|
|
||||||
|
exports[`InputPrompt > mouse interaction > should toggle paste expansion on double-click 4`] = `
|
||||||
|
"▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀
|
||||||
|
> [Pasted Text: 10 lines]
|
||||||
|
▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄
|
||||||
|
"
|
||||||
|
`;
|
||||||
|
|
||||||
exports[`InputPrompt > multiline rendering > should correctly render multiline input including blank lines 1`] = `
|
exports[`InputPrompt > multiline rendering > should correctly render multiline input including blank lines 1`] = `
|
||||||
"────────────────────────────────────────────────────────────────────────────────────────────────────
|
"────────────────────────────────────────────────────────────────────────────────────────────────────
|
||||||
> hello
|
> hello
|
||||||
|
|||||||
@@ -74,7 +74,9 @@ export const ADMIN_POLICY_TIER = 5;
|
|||||||
|
|
||||||
export const MCP_EXCLUDED_PRIORITY = USER_POLICY_TIER + 0.9;
|
export const MCP_EXCLUDED_PRIORITY = USER_POLICY_TIER + 0.9;
|
||||||
export const EXCLUDE_TOOLS_FLAG_PRIORITY = USER_POLICY_TIER + 0.4;
|
export const EXCLUDE_TOOLS_FLAG_PRIORITY = USER_POLICY_TIER + 0.4;
|
||||||
|
export const CONFIRMATION_REQUIRED_PRIORITY = USER_POLICY_TIER + 0.35;
|
||||||
export const ALLOWED_TOOLS_FLAG_PRIORITY = USER_POLICY_TIER + 0.3;
|
export const ALLOWED_TOOLS_FLAG_PRIORITY = USER_POLICY_TIER + 0.3;
|
||||||
|
export const CORE_TOOLS_FLAG_PRIORITY = USER_POLICY_TIER + 0.25;
|
||||||
export const TRUSTED_MCP_SERVER_PRIORITY = USER_POLICY_TIER + 0.2;
|
export const TRUSTED_MCP_SERVER_PRIORITY = USER_POLICY_TIER + 0.2;
|
||||||
export const ALLOWED_MCP_SERVER_PRIORITY = USER_POLICY_TIER + 0.1;
|
export const ALLOWED_MCP_SERVER_PRIORITY = USER_POLICY_TIER + 0.1;
|
||||||
|
|
||||||
@@ -434,10 +436,21 @@ export async function createPolicyEngineConfig(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Tools that are explicitly allowed in the settings.
|
const nonPlanModes = [
|
||||||
// Priority: ALLOWED_TOOLS_FLAG_PRIORITY (user tier - explicit temporary allows)
|
ApprovalMode.DEFAULT,
|
||||||
if (settings.tools?.allowed) {
|
ApprovalMode.AUTO_EDIT,
|
||||||
for (const tool of settings.tools.allowed) {
|
ApprovalMode.YOLO,
|
||||||
|
];
|
||||||
|
|
||||||
|
const mapToolsToRules = (
|
||||||
|
tools: string[],
|
||||||
|
priority: number,
|
||||||
|
source: string,
|
||||||
|
modes?: ApprovalMode[],
|
||||||
|
addDefaultDenyForTools = false,
|
||||||
|
) => {
|
||||||
|
const toolsWithNarrowing = new Set<string>();
|
||||||
|
for (const tool of tools) {
|
||||||
// Check for legacy format: toolName(args)
|
// Check for legacy format: toolName(args)
|
||||||
const match = tool.match(/^([a-zA-Z0-9_-]+)\((.*)\)$/);
|
const match = tool.match(/^([a-zA-Z0-9_-]+)\((.*)\)$/);
|
||||||
if (match) {
|
if (match) {
|
||||||
@@ -449,15 +462,17 @@ export async function createPolicyEngineConfig(
|
|||||||
|
|
||||||
// Treat args as a command prefix for shell tool
|
// Treat args as a command prefix for shell tool
|
||||||
if (toolName === SHELL_TOOL_NAME) {
|
if (toolName === SHELL_TOOL_NAME) {
|
||||||
|
toolsWithNarrowing.add(toolName);
|
||||||
const patterns = buildArgsPatterns(undefined, args);
|
const patterns = buildArgsPatterns(undefined, args);
|
||||||
for (const pattern of patterns) {
|
for (const pattern of patterns) {
|
||||||
if (pattern) {
|
if (pattern) {
|
||||||
rules.push({
|
rules.push({
|
||||||
toolName,
|
toolName,
|
||||||
decision: PolicyDecision.ALLOW,
|
decision: PolicyDecision.ALLOW,
|
||||||
priority: ALLOWED_TOOLS_FLAG_PRIORITY,
|
priority,
|
||||||
argsPattern: new RegExp(pattern),
|
argsPattern: new RegExp(pattern),
|
||||||
source: 'Settings (Tools Allowed)',
|
source,
|
||||||
|
modes,
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -467,8 +482,9 @@ export async function createPolicyEngineConfig(
|
|||||||
rules.push({
|
rules.push({
|
||||||
toolName,
|
toolName,
|
||||||
decision: PolicyDecision.ALLOW,
|
decision: PolicyDecision.ALLOW,
|
||||||
priority: ALLOWED_TOOLS_FLAG_PRIORITY,
|
priority,
|
||||||
source: 'Settings (Tools Allowed)',
|
source,
|
||||||
|
modes,
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
@@ -479,11 +495,70 @@ export async function createPolicyEngineConfig(
|
|||||||
rules.push({
|
rules.push({
|
||||||
toolName,
|
toolName,
|
||||||
decision: PolicyDecision.ALLOW,
|
decision: PolicyDecision.ALLOW,
|
||||||
priority: ALLOWED_TOOLS_FLAG_PRIORITY,
|
priority,
|
||||||
source: 'Settings (Tools Allowed)',
|
source,
|
||||||
|
modes,
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (addDefaultDenyForTools) {
|
||||||
|
for (const toolName of toolsWithNarrowing) {
|
||||||
|
rules.push({
|
||||||
|
toolName,
|
||||||
|
decision: PolicyDecision.DENY,
|
||||||
|
priority: priority - 0.01,
|
||||||
|
source: `${source} (Narrowing Enforcement)`,
|
||||||
|
modes,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
// Tools that are explicitly allowed in the settings.
|
||||||
|
// Priority: ALLOWED_TOOLS_FLAG_PRIORITY (user tier - explicit temporary allows)
|
||||||
|
if (settings.tools?.allowed) {
|
||||||
|
mapToolsToRules(
|
||||||
|
settings.tools.allowed,
|
||||||
|
ALLOWED_TOOLS_FLAG_PRIORITY,
|
||||||
|
'Settings (Tools Allowed)',
|
||||||
|
undefined,
|
||||||
|
true,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Tools that explicitly require confirmation in the settings.
|
||||||
|
// Priority: CONFIRMATION_REQUIRED_PRIORITY (overrides allowed and core)
|
||||||
|
if (settings.tools?.confirmationRequired) {
|
||||||
|
for (const tool of settings.tools.confirmationRequired) {
|
||||||
|
rules.push({
|
||||||
|
toolName: SHELL_TOOL_NAMES.includes(tool) ? SHELL_TOOL_NAME : tool,
|
||||||
|
decision: PolicyDecision.ASK_USER,
|
||||||
|
priority: CONFIRMATION_REQUIRED_PRIORITY,
|
||||||
|
source: 'Settings (Confirmation Required)',
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Core tools that are restricted in the settings.
|
||||||
|
// Priority: CORE_TOOLS_FLAG_PRIORITY (user tier - core tool allowlist)
|
||||||
|
if (settings.tools?.core) {
|
||||||
|
mapToolsToRules(
|
||||||
|
settings.tools.core,
|
||||||
|
CORE_TOOLS_FLAG_PRIORITY,
|
||||||
|
'Settings (Core Tools)',
|
||||||
|
nonPlanModes,
|
||||||
|
);
|
||||||
|
|
||||||
|
// If core tools are restricted, we should add a default DENY rule for everything else
|
||||||
|
// at a slightly lower priority than the explicit allows.
|
||||||
|
rules.push({
|
||||||
|
toolName: '*',
|
||||||
|
decision: PolicyDecision.DENY,
|
||||||
|
priority: CORE_TOOLS_FLAG_PRIORITY - 0.01,
|
||||||
|
source: 'Settings (Core Tools Allowlist Enforcement)',
|
||||||
|
modes: nonPlanModes,
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
// MCP servers that are trusted in the settings.
|
// MCP servers that are trusted in the settings.
|
||||||
@@ -501,6 +576,7 @@ export async function createPolicyEngineConfig(
|
|||||||
decision: PolicyDecision.ALLOW,
|
decision: PolicyDecision.ALLOW,
|
||||||
priority: TRUSTED_MCP_SERVER_PRIORITY,
|
priority: TRUSTED_MCP_SERVER_PRIORITY,
|
||||||
source: 'Settings (MCP Trusted)',
|
source: 'Settings (MCP Trusted)',
|
||||||
|
modes: nonPlanModes,
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -519,6 +595,7 @@ export async function createPolicyEngineConfig(
|
|||||||
decision: PolicyDecision.ALLOW,
|
decision: PolicyDecision.ALLOW,
|
||||||
priority: ALLOWED_MCP_SERVER_PRIORITY,
|
priority: ALLOWED_MCP_SERVER_PRIORITY,
|
||||||
source: 'Settings (MCP Allowed)',
|
source: 'Settings (MCP Allowed)',
|
||||||
|
modes: nonPlanModes,
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -0,0 +1,76 @@
|
|||||||
|
/**
|
||||||
|
* @license
|
||||||
|
* Copyright 2026 Google LLC
|
||||||
|
* SPDX-License-Identifier: Apache-2.0
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { describe, it, expect } from 'vitest';
|
||||||
|
import { createPolicyEngineConfig } from './config.js';
|
||||||
|
import { PolicyEngine } from './policy-engine.js';
|
||||||
|
import { PolicyDecision, ApprovalMode } from './types.js';
|
||||||
|
|
||||||
|
describe('PolicyEngine - Core Tools Mapping', () => {
|
||||||
|
it('should allow tools explicitly listed in settings.tools.core', async () => {
|
||||||
|
const settings = {
|
||||||
|
tools: {
|
||||||
|
core: ['run_shell_command(ls)', 'run_shell_command(git status)'],
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
const config = await createPolicyEngineConfig(
|
||||||
|
settings,
|
||||||
|
ApprovalMode.DEFAULT,
|
||||||
|
undefined,
|
||||||
|
true, // interactive
|
||||||
|
);
|
||||||
|
|
||||||
|
const engine = new PolicyEngine(config);
|
||||||
|
|
||||||
|
// Test simple tool name
|
||||||
|
const result1 = await engine.check(
|
||||||
|
{ name: 'run_shell_command', args: { command: 'ls' } },
|
||||||
|
undefined,
|
||||||
|
);
|
||||||
|
expect(result1.decision).toBe(PolicyDecision.ALLOW);
|
||||||
|
|
||||||
|
// Test tool name with args
|
||||||
|
const result2 = await engine.check(
|
||||||
|
{ name: 'run_shell_command', args: { command: 'git status' } },
|
||||||
|
undefined,
|
||||||
|
);
|
||||||
|
expect(result2.decision).toBe(PolicyDecision.ALLOW);
|
||||||
|
|
||||||
|
// Test tool not in core list
|
||||||
|
const result3 = await engine.check(
|
||||||
|
{ name: 'run_shell_command', args: { command: 'npm test' } },
|
||||||
|
undefined,
|
||||||
|
);
|
||||||
|
// Should be DENIED because of strict allowlist
|
||||||
|
expect(result3.decision).toBe(PolicyDecision.DENY);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should allow tools in tools.core even if they are restricted by default policies', async () => {
|
||||||
|
// By default run_shell_command is ASK_USER.
|
||||||
|
// Putting it in tools.core should make it ALLOW.
|
||||||
|
const settings = {
|
||||||
|
tools: {
|
||||||
|
core: ['run_shell_command'],
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
const config = await createPolicyEngineConfig(
|
||||||
|
settings,
|
||||||
|
ApprovalMode.DEFAULT,
|
||||||
|
undefined,
|
||||||
|
true,
|
||||||
|
);
|
||||||
|
|
||||||
|
const engine = new PolicyEngine(config);
|
||||||
|
|
||||||
|
const result = await engine.check(
|
||||||
|
{ name: 'run_shell_command', args: { command: 'any command' } },
|
||||||
|
undefined,
|
||||||
|
);
|
||||||
|
expect(result.decision).toBe(PolicyDecision.ALLOW);
|
||||||
|
});
|
||||||
|
});
|
||||||
@@ -43,6 +43,35 @@ vi.mock('../utils/shell-utils.js', async (importOriginal) => {
|
|||||||
}
|
}
|
||||||
return [command];
|
return [command];
|
||||||
}),
|
}),
|
||||||
|
parseCommandDetails: vi.fn().mockImplementation((command: string) => {
|
||||||
|
// Basic mock implementation for PolicyEngine test needs
|
||||||
|
const commands = command.includes('&&')
|
||||||
|
? command.split('&&').map((c) => c.trim())
|
||||||
|
: [command.trim()];
|
||||||
|
|
||||||
|
// Detect $(...) or `...` and add as sub-commands for recursion tests
|
||||||
|
const subCommands = [...commands];
|
||||||
|
for (const cmd of commands) {
|
||||||
|
const subMatch = cmd.match(/\$\((.*)\)/) || cmd.match(/`(.*)`/);
|
||||||
|
if (subMatch?.[1]) {
|
||||||
|
subCommands.push(subMatch[1].trim());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return {
|
||||||
|
details: subCommands.map((c, i) => ({
|
||||||
|
name: c.split(' ')[0],
|
||||||
|
text: c,
|
||||||
|
startIndex: i === 0 ? 0 : -1, // Simple root indication
|
||||||
|
})),
|
||||||
|
hasError: false,
|
||||||
|
};
|
||||||
|
}),
|
||||||
|
stripShellWrapper: vi.fn().mockImplementation((command: string) => {
|
||||||
|
// Simple mock for stripping wrappers
|
||||||
|
const match = command.match(/^(?:bash|sh|zsh)\s+-c\s+["'](.*)["']$/i);
|
||||||
|
return match ? match[1] : command;
|
||||||
|
}),
|
||||||
hasRedirection: vi.fn().mockImplementation(
|
hasRedirection: vi.fn().mockImplementation(
|
||||||
(command: string) =>
|
(command: string) =>
|
||||||
// Simple mock: true if '>' is present, unless it looks like "-> arrow"
|
// Simple mock: true if '>' is present, unless it looks like "-> arrow"
|
||||||
@@ -1862,7 +1891,6 @@ describe('PolicyEngine', () => {
|
|||||||
});
|
});
|
||||||
|
|
||||||
it('should return ASK_USER in non-YOLO mode if shell command parsing fails', async () => {
|
it('should return ASK_USER in non-YOLO mode if shell command parsing fails', async () => {
|
||||||
const { splitCommands } = await import('../utils/shell-utils.js');
|
|
||||||
const rules: PolicyRule[] = [
|
const rules: PolicyRule[] = [
|
||||||
{
|
{
|
||||||
toolName: 'run_shell_command',
|
toolName: 'run_shell_command',
|
||||||
@@ -1877,7 +1905,11 @@ describe('PolicyEngine', () => {
|
|||||||
});
|
});
|
||||||
|
|
||||||
// Simulate parsing failure
|
// Simulate parsing failure
|
||||||
vi.mocked(splitCommands).mockReturnValueOnce([]);
|
const { parseCommandDetails } = await import('../utils/shell-utils.js');
|
||||||
|
vi.mocked(parseCommandDetails).mockReturnValueOnce({
|
||||||
|
details: [],
|
||||||
|
hasError: true,
|
||||||
|
});
|
||||||
|
|
||||||
const result = await engine.check(
|
const result = await engine.check(
|
||||||
{ name: 'run_shell_command', args: { command: 'complex command' } },
|
{ name: 'run_shell_command', args: { command: 'complex command' } },
|
||||||
|
|||||||
@@ -7,8 +7,10 @@
|
|||||||
import { type FunctionCall } from '@google/genai';
|
import { type FunctionCall } from '@google/genai';
|
||||||
import {
|
import {
|
||||||
SHELL_TOOL_NAMES,
|
SHELL_TOOL_NAMES,
|
||||||
|
REDIRECTION_NAMES,
|
||||||
initializeShellParsers,
|
initializeShellParsers,
|
||||||
splitCommands,
|
parseCommandDetails,
|
||||||
|
stripShellWrapper,
|
||||||
hasRedirection,
|
hasRedirection,
|
||||||
extractStringFromParseEntry,
|
extractStringFromParseEntry,
|
||||||
} from '../utils/shell-utils.js';
|
} from '../utils/shell-utils.js';
|
||||||
@@ -359,7 +361,8 @@ export class PolicyEngine {
|
|||||||
}
|
}
|
||||||
|
|
||||||
await initializeShellParsers();
|
await initializeShellParsers();
|
||||||
const subCommands = splitCommands(command);
|
const parsed = parseCommandDetails(command);
|
||||||
|
const subCommands = parsed?.details ?? [];
|
||||||
|
|
||||||
if (subCommands.length === 0) {
|
if (subCommands.length === 0) {
|
||||||
// If the matched rule says DENY, we should respect it immediately even if parsing fails.
|
// If the matched rule says DENY, we should respect it immediately even if parsing fails.
|
||||||
@@ -380,115 +383,109 @@ export class PolicyEngine {
|
|||||||
);
|
);
|
||||||
|
|
||||||
// Parsing logic failed, we can't trust it. Use default decision ASK_USER (or DENY in non-interactive).
|
// Parsing logic failed, we can't trust it. Use default decision ASK_USER (or DENY in non-interactive).
|
||||||
// We return the rule that matched so the evaluation loop terminates.
|
|
||||||
return {
|
return {
|
||||||
decision: this.defaultDecision,
|
decision: this.defaultDecision,
|
||||||
rule,
|
rule,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
// If there are multiple parts, or if we just want to validate the single part against DENY rules
|
debugLogger.debug(
|
||||||
if (subCommands.length > 0) {
|
`[PolicyEngine.check] Validating shell command: ${subCommands.length} parts`,
|
||||||
debugLogger.debug(
|
);
|
||||||
`[PolicyEngine.check] Validating shell command: ${subCommands.length} parts`,
|
|
||||||
);
|
|
||||||
|
|
||||||
if (ruleDecision === PolicyDecision.DENY) {
|
if (ruleDecision === PolicyDecision.DENY) {
|
||||||
return { decision: PolicyDecision.DENY, rule };
|
return { decision: PolicyDecision.DENY, rule };
|
||||||
}
|
}
|
||||||
|
|
||||||
// Start optimistically. If all parts are ALLOW, the whole is ALLOW.
|
// Start with the decision from the rule or heuristics.
|
||||||
// We will downgrade if any part is ASK_USER or DENY.
|
// If the tool call was already downgraded (e.g. by heuristics), we start there.
|
||||||
let aggregateDecision = PolicyDecision.ALLOW;
|
let aggregateDecision = ruleDecision;
|
||||||
let responsibleRule: PolicyRule | undefined;
|
|
||||||
|
|
||||||
// Check for redirection on the full command string
|
// If heuristics downgraded the decision, we don't blame the rule.
|
||||||
if (this.shouldDowngradeForRedirection(command, allowRedirection)) {
|
let responsibleRule: PolicyRule | undefined =
|
||||||
|
rule && ruleDecision === rule.decision ? rule : undefined;
|
||||||
|
|
||||||
|
// Check for redirection on the full command string.
|
||||||
|
// Redirection always downgrades ALLOW to ASK_USER (it never upgrades).
|
||||||
|
if (this.shouldDowngradeForRedirection(command, allowRedirection)) {
|
||||||
|
if (aggregateDecision === PolicyDecision.ALLOW) {
|
||||||
debugLogger.debug(
|
debugLogger.debug(
|
||||||
`[PolicyEngine.check] Downgrading ALLOW to ASK_USER for redirected command: ${command}`,
|
`[PolicyEngine.check] Downgrading ALLOW to ASK_USER for redirected command: ${command}`,
|
||||||
);
|
);
|
||||||
aggregateDecision = PolicyDecision.ASK_USER;
|
aggregateDecision = PolicyDecision.ASK_USER;
|
||||||
responsibleRule = undefined; // Inherent policy
|
responsibleRule = undefined; // Inherent policy
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
for (const rawSubCmd of subCommands) {
|
for (const detail of subCommands) {
|
||||||
const subCmd = rawSubCmd.trim();
|
if (REDIRECTION_NAMES.has(detail.name)) {
|
||||||
// Prevent infinite recursion for the root command
|
continue;
|
||||||
if (subCmd === command) {
|
}
|
||||||
if (this.shouldDowngradeForRedirection(subCmd, allowRedirection)) {
|
|
||||||
debugLogger.debug(
|
const subCmd = detail.text.trim();
|
||||||
`[PolicyEngine.check] Downgrading ALLOW to ASK_USER for redirected command: ${subCmd}`,
|
const isAtomic =
|
||||||
);
|
subCmd === command ||
|
||||||
// Redirection always downgrades ALLOW to ASK_USER
|
(detail.startIndex === 0 && detail.text.length === command.length);
|
||||||
if (aggregateDecision === PolicyDecision.ALLOW) {
|
|
||||||
aggregateDecision = PolicyDecision.ASK_USER;
|
// Recursive check for shell wrappers (bash -c, etc.)
|
||||||
responsibleRule = undefined; // Inherent policy
|
const stripped = stripShellWrapper(subCmd);
|
||||||
}
|
if (stripped !== subCmd) {
|
||||||
|
const wrapperResult = await this.check(
|
||||||
|
{ name: toolName, args: { command: stripped, dir_path } },
|
||||||
|
serverName,
|
||||||
|
toolAnnotations,
|
||||||
|
subagent,
|
||||||
|
true,
|
||||||
|
);
|
||||||
|
|
||||||
|
if (wrapperResult.decision === PolicyDecision.DENY)
|
||||||
|
return wrapperResult;
|
||||||
|
if (wrapperResult.decision === PolicyDecision.ASK_USER) {
|
||||||
|
if (aggregateDecision === PolicyDecision.ALLOW) {
|
||||||
|
responsibleRule = wrapperResult.rule;
|
||||||
} else {
|
} else {
|
||||||
// Atomic command matching the rule.
|
responsibleRule ??= wrapperResult.rule;
|
||||||
if (
|
|
||||||
ruleDecision === PolicyDecision.ASK_USER &&
|
|
||||||
aggregateDecision === PolicyDecision.ALLOW
|
|
||||||
) {
|
|
||||||
aggregateDecision = PolicyDecision.ASK_USER;
|
|
||||||
responsibleRule = rule;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
continue;
|
aggregateDecision = PolicyDecision.ASK_USER;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!isAtomic) {
|
||||||
const subResult = await this.check(
|
const subResult = await this.check(
|
||||||
{ name: toolName, args: { command: subCmd, dir_path } },
|
{ name: toolName, args: { command: subCmd, dir_path } },
|
||||||
serverName,
|
serverName,
|
||||||
toolAnnotations,
|
toolAnnotations,
|
||||||
subagent,
|
subagent,
|
||||||
|
true,
|
||||||
);
|
);
|
||||||
|
|
||||||
// subResult.decision is already filtered through applyNonInteractiveMode by this.check()
|
if (subResult.decision === PolicyDecision.DENY) return subResult;
|
||||||
const subDecision = subResult.decision;
|
|
||||||
|
|
||||||
// If any part is DENIED, the whole command is DENY
|
if (subResult.decision === PolicyDecision.ASK_USER) {
|
||||||
if (subDecision === PolicyDecision.DENY) {
|
if (aggregateDecision === PolicyDecision.ALLOW) {
|
||||||
return {
|
|
||||||
decision: PolicyDecision.DENY,
|
|
||||||
rule: subResult.rule,
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
// If any part requires ASK_USER, the whole command requires ASK_USER
|
|
||||||
if (subDecision === PolicyDecision.ASK_USER) {
|
|
||||||
aggregateDecision = PolicyDecision.ASK_USER;
|
|
||||||
if (!responsibleRule) {
|
|
||||||
responsibleRule = subResult.rule;
|
responsibleRule = subResult.rule;
|
||||||
|
} else {
|
||||||
|
responsibleRule ??= subResult.rule;
|
||||||
}
|
}
|
||||||
|
aggregateDecision = PolicyDecision.ASK_USER;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check for redirection in allowed sub-commands
|
// Downgrade if sub-command has redirection
|
||||||
if (
|
if (
|
||||||
subDecision === PolicyDecision.ALLOW &&
|
subResult.decision === PolicyDecision.ALLOW &&
|
||||||
this.shouldDowngradeForRedirection(subCmd, allowRedirection)
|
this.shouldDowngradeForRedirection(subCmd, allowRedirection)
|
||||||
) {
|
) {
|
||||||
debugLogger.debug(
|
|
||||||
`[PolicyEngine.check] Downgrading ALLOW to ASK_USER for redirected command: ${subCmd}`,
|
|
||||||
);
|
|
||||||
if (aggregateDecision === PolicyDecision.ALLOW) {
|
if (aggregateDecision === PolicyDecision.ALLOW) {
|
||||||
aggregateDecision = PolicyDecision.ASK_USER;
|
aggregateDecision = PolicyDecision.ASK_USER;
|
||||||
responsibleRule = undefined;
|
responsibleRule = undefined;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return {
|
|
||||||
decision: aggregateDecision,
|
|
||||||
// If we stayed at ALLOW, we return the original rule (if any).
|
|
||||||
// If we downgraded, we return the responsible rule (or undefined if implicit).
|
|
||||||
rule: aggregateDecision === ruleDecision ? rule : responsibleRule,
|
|
||||||
};
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return {
|
return {
|
||||||
decision: ruleDecision,
|
decision: aggregateDecision,
|
||||||
rule,
|
rule: aggregateDecision === ruleDecision ? rule : responsibleRule,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -501,6 +498,7 @@ export class PolicyEngine {
|
|||||||
serverName: string | undefined,
|
serverName: string | undefined,
|
||||||
toolAnnotations?: Record<string, unknown>,
|
toolAnnotations?: Record<string, unknown>,
|
||||||
subagent?: string,
|
subagent?: string,
|
||||||
|
skipHeuristics = false,
|
||||||
): Promise<CheckResult> {
|
): Promise<CheckResult> {
|
||||||
// Case 1: Metadata injection is the primary and safest way to identify an MCP server.
|
// Case 1: Metadata injection is the primary and safest way to identify an MCP server.
|
||||||
// If we have explicit `_serverName` metadata (usually injected by tool-registry for active tools), use it.
|
// If we have explicit `_serverName` metadata (usually injected by tool-registry for active tools), use it.
|
||||||
@@ -594,6 +592,7 @@ export class PolicyEngine {
|
|||||||
|
|
||||||
let ruleDecision = rule.decision;
|
let ruleDecision = rule.decision;
|
||||||
if (
|
if (
|
||||||
|
!skipHeuristics &&
|
||||||
isShellCommand &&
|
isShellCommand &&
|
||||||
command &&
|
command &&
|
||||||
!('commandPrefix' in rule) &&
|
!('commandPrefix' in rule) &&
|
||||||
@@ -615,12 +614,10 @@ export class PolicyEngine {
|
|||||||
subagent,
|
subagent,
|
||||||
);
|
);
|
||||||
decision = shellResult.decision;
|
decision = shellResult.decision;
|
||||||
if (shellResult.rule) {
|
matchedRule = shellResult.rule;
|
||||||
matchedRule = shellResult.rule;
|
break;
|
||||||
break;
|
|
||||||
}
|
|
||||||
} else {
|
} else {
|
||||||
decision = rule.decision;
|
decision = ruleDecision;
|
||||||
matchedRule = rule;
|
matchedRule = rule;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@@ -643,7 +640,7 @@ export class PolicyEngine {
|
|||||||
);
|
);
|
||||||
if (toolName && SHELL_TOOL_NAMES.includes(toolName)) {
|
if (toolName && SHELL_TOOL_NAMES.includes(toolName)) {
|
||||||
let heuristicDecision = this.defaultDecision;
|
let heuristicDecision = this.defaultDecision;
|
||||||
if (command) {
|
if (!skipHeuristics && command) {
|
||||||
heuristicDecision = await this.applyShellHeuristics(
|
heuristicDecision = await this.applyShellHeuristics(
|
||||||
command,
|
command,
|
||||||
heuristicDecision,
|
heuristicDecision,
|
||||||
|
|||||||
@@ -0,0 +1,134 @@
|
|||||||
|
/**
|
||||||
|
* @license
|
||||||
|
* Copyright 2026 Google LLC
|
||||||
|
* SPDX-License-Identifier: Apache-2.0
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { describe, it, expect, beforeAll } from 'vitest';
|
||||||
|
import { PolicyEngine } from './policy-engine.js';
|
||||||
|
import { PolicyDecision, ApprovalMode } from './types.js';
|
||||||
|
import { initializeShellParsers } from '../utils/shell-utils.js';
|
||||||
|
import { buildArgsPatterns } from './utils.js';
|
||||||
|
|
||||||
|
describe('PolicyEngine - Shell Safety Regression Suite', () => {
|
||||||
|
let engine: PolicyEngine;
|
||||||
|
|
||||||
|
beforeAll(async () => {
|
||||||
|
await initializeShellParsers();
|
||||||
|
});
|
||||||
|
|
||||||
|
const setupEngine = (allowedCommands: string[]) => {
|
||||||
|
const rules = allowedCommands.map((cmd) => ({
|
||||||
|
toolName: 'run_shell_command',
|
||||||
|
decision: PolicyDecision.ALLOW,
|
||||||
|
argsPattern: new RegExp(buildArgsPatterns(undefined, cmd)[0]!),
|
||||||
|
priority: 10,
|
||||||
|
}));
|
||||||
|
|
||||||
|
return new PolicyEngine({
|
||||||
|
rules,
|
||||||
|
approvalMode: ApprovalMode.DEFAULT,
|
||||||
|
defaultDecision: PolicyDecision.ASK_USER,
|
||||||
|
});
|
||||||
|
};
|
||||||
|
|
||||||
|
it('should block unauthorized chained command with &&', async () => {
|
||||||
|
engine = setupEngine(['echo']);
|
||||||
|
const result = await engine.check(
|
||||||
|
{ name: 'run_shell_command', args: { command: 'echo hi && ls' } },
|
||||||
|
undefined,
|
||||||
|
);
|
||||||
|
expect(result.decision).toBe(PolicyDecision.ASK_USER);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should allow authorized chained command with &&', async () => {
|
||||||
|
engine = setupEngine(['echo', 'ls']);
|
||||||
|
const result = await engine.check(
|
||||||
|
{ name: 'run_shell_command', args: { command: 'echo hi && ls' } },
|
||||||
|
undefined,
|
||||||
|
);
|
||||||
|
expect(result.decision).toBe(PolicyDecision.ALLOW);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should block unauthorized chained command with ||', async () => {
|
||||||
|
engine = setupEngine(['false']);
|
||||||
|
const result = await engine.check(
|
||||||
|
{ name: 'run_shell_command', args: { command: 'false || ls' } },
|
||||||
|
undefined,
|
||||||
|
);
|
||||||
|
expect(result.decision).toBe(PolicyDecision.ASK_USER);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should block unauthorized chained command with ;', async () => {
|
||||||
|
engine = setupEngine(['echo']);
|
||||||
|
const result = await engine.check(
|
||||||
|
{ name: 'run_shell_command', args: { command: 'echo hi; ls' } },
|
||||||
|
undefined,
|
||||||
|
);
|
||||||
|
expect(result.decision).toBe(PolicyDecision.ASK_USER);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should block unauthorized command in pipe |', async () => {
|
||||||
|
engine = setupEngine(['echo']);
|
||||||
|
const result = await engine.check(
|
||||||
|
{ name: 'run_shell_command', args: { command: 'echo hi | grep "hi"' } },
|
||||||
|
undefined,
|
||||||
|
);
|
||||||
|
expect(result.decision).toBe(PolicyDecision.ASK_USER);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should allow authorized command in pipe |', async () => {
|
||||||
|
engine = setupEngine(['echo', 'grep']);
|
||||||
|
const result = await engine.check(
|
||||||
|
{ name: 'run_shell_command', args: { command: 'echo hi | grep "hi"' } },
|
||||||
|
undefined,
|
||||||
|
);
|
||||||
|
expect(result.decision).toBe(PolicyDecision.ALLOW);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should block unauthorized chained command with &', async () => {
|
||||||
|
engine = setupEngine(['echo']);
|
||||||
|
const result = await engine.check(
|
||||||
|
{ name: 'run_shell_command', args: { command: 'echo hi & ls' } },
|
||||||
|
undefined,
|
||||||
|
);
|
||||||
|
expect(result.decision).toBe(PolicyDecision.ASK_USER);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should allow authorized chained command with &', async () => {
|
||||||
|
engine = setupEngine(['echo', 'ls']);
|
||||||
|
const result = await engine.check(
|
||||||
|
{ name: 'run_shell_command', args: { command: 'echo hi & ls' } },
|
||||||
|
undefined,
|
||||||
|
);
|
||||||
|
expect(result.decision).toBe(PolicyDecision.ALLOW);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should block unauthorized command in nested substitution', async () => {
|
||||||
|
engine = setupEngine(['echo', 'cat']);
|
||||||
|
const result = await engine.check(
|
||||||
|
{ name: 'run_shell_command', args: { command: 'echo $(cat $(ls))' } },
|
||||||
|
undefined,
|
||||||
|
);
|
||||||
|
expect(result.decision).toBe(PolicyDecision.ASK_USER);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should allow authorized command in nested substitution', async () => {
|
||||||
|
engine = setupEngine(['echo', 'cat', 'ls']);
|
||||||
|
const result = await engine.check(
|
||||||
|
{ name: 'run_shell_command', args: { command: 'echo $(cat $(ls))' } },
|
||||||
|
undefined,
|
||||||
|
);
|
||||||
|
expect(result.decision).toBe(PolicyDecision.ALLOW);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should block command redirection if not explicitly allowed', async () => {
|
||||||
|
engine = setupEngine(['echo']);
|
||||||
|
const result = await engine.check(
|
||||||
|
{ name: 'run_shell_command', args: { command: 'echo hi > /tmp/test' } },
|
||||||
|
undefined,
|
||||||
|
);
|
||||||
|
// Inherent policy: redirection downgrades to ASK_USER
|
||||||
|
expect(result.decision).toBe(PolicyDecision.ASK_USER);
|
||||||
|
});
|
||||||
|
});
|
||||||
@@ -59,6 +59,30 @@ vi.mock('../utils/shell-utils.js', async (importOriginal) => {
|
|||||||
return {
|
return {
|
||||||
...actual,
|
...actual,
|
||||||
initializeShellParsers: vi.fn(),
|
initializeShellParsers: vi.fn(),
|
||||||
|
parseCommandDetails: (command: string) => {
|
||||||
|
if (Object.prototype.hasOwnProperty.call(commandMap, command)) {
|
||||||
|
const subcommands = commandMap[command];
|
||||||
|
return {
|
||||||
|
details: subcommands.map((text) => ({
|
||||||
|
name: text.split(' ')[0],
|
||||||
|
text,
|
||||||
|
startIndex: command.indexOf(text),
|
||||||
|
})),
|
||||||
|
hasError: subcommands.length === 0 && command.includes('&&&'),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
return {
|
||||||
|
details: [
|
||||||
|
{
|
||||||
|
name: command.split(' ')[0],
|
||||||
|
text: command,
|
||||||
|
startIndex: 0,
|
||||||
|
},
|
||||||
|
],
|
||||||
|
hasError: false,
|
||||||
|
};
|
||||||
|
},
|
||||||
|
stripShellWrapper: (command: string) => command,
|
||||||
splitCommands: (command: string) => {
|
splitCommands: (command: string) => {
|
||||||
if (Object.prototype.hasOwnProperty.call(commandMap, command)) {
|
if (Object.prototype.hasOwnProperty.call(commandMap, command)) {
|
||||||
return commandMap[command];
|
return commandMap[command];
|
||||||
|
|||||||
@@ -0,0 +1,97 @@
|
|||||||
|
/**
|
||||||
|
* @license
|
||||||
|
* Copyright 2026 Google LLC
|
||||||
|
* SPDX-License-Identifier: Apache-2.0
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { expect, describe, it, beforeAll, vi } from 'vitest';
|
||||||
|
import { PolicyEngine } from './policy-engine.js';
|
||||||
|
import { PolicyDecision } from './types.js';
|
||||||
|
import { initializeShellParsers } from '../utils/shell-utils.js';
|
||||||
|
|
||||||
|
// Mock node:os to ensure shell-utils logic always thinks it's on a POSIX-like system.
|
||||||
|
// This ensures that internal calls to getShellConfiguration() and isWindows()
|
||||||
|
// within the shell-utils module return 'bash' configuration, even on Windows CI.
|
||||||
|
vi.mock('node:os', async (importOriginal) => {
|
||||||
|
const actual = await importOriginal<typeof import('node:os')>();
|
||||||
|
return {
|
||||||
|
...actual,
|
||||||
|
default: {
|
||||||
|
...actual,
|
||||||
|
platform: () => 'linux',
|
||||||
|
},
|
||||||
|
platform: () => 'linux',
|
||||||
|
};
|
||||||
|
});
|
||||||
|
|
||||||
|
// Mock shell-utils to ensure consistent behavior across platforms (especially Windows CI)
|
||||||
|
// We want to test PolicyEngine logic with Bash syntax rules.
|
||||||
|
vi.mock('../utils/shell-utils.js', async (importOriginal) => {
|
||||||
|
const actual =
|
||||||
|
await importOriginal<typeof import('../utils/shell-utils.js')>();
|
||||||
|
return {
|
||||||
|
...actual,
|
||||||
|
getShellConfiguration: () => ({
|
||||||
|
executable: 'bash',
|
||||||
|
argsPrefix: ['-c'],
|
||||||
|
shell: 'bash',
|
||||||
|
}),
|
||||||
|
};
|
||||||
|
});
|
||||||
|
|
||||||
|
describe('PolicyEngine Command Substitution Validation', () => {
|
||||||
|
beforeAll(async () => {
|
||||||
|
await initializeShellParsers();
|
||||||
|
});
|
||||||
|
|
||||||
|
const setupEngine = (blockedCmd: string) =>
|
||||||
|
new PolicyEngine({
|
||||||
|
defaultDecision: PolicyDecision.ALLOW,
|
||||||
|
rules: [
|
||||||
|
{
|
||||||
|
toolName: 'run_shell_command',
|
||||||
|
argsPattern: new RegExp(`"command":"${blockedCmd}"`),
|
||||||
|
decision: PolicyDecision.DENY,
|
||||||
|
},
|
||||||
|
],
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should block echo $(dangerous_cmd) when dangerous_cmd is explicitly blocked', async () => {
|
||||||
|
const engine = setupEngine('dangerous_cmd');
|
||||||
|
const result = await engine.check(
|
||||||
|
{ name: 'run_shell_command', args: { command: 'echo $(dangerous_cmd)' } },
|
||||||
|
'test-server',
|
||||||
|
);
|
||||||
|
expect(result.decision).toBe(PolicyDecision.DENY);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should block backtick substitution `dangerous_cmd`', async () => {
|
||||||
|
const engine = setupEngine('dangerous_cmd');
|
||||||
|
const result = await engine.check(
|
||||||
|
{ name: 'run_shell_command', args: { command: 'echo `dangerous_cmd`' } },
|
||||||
|
'test-server',
|
||||||
|
);
|
||||||
|
expect(result.decision).toBe(PolicyDecision.DENY);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should block commands inside subshells (dangerous_cmd)', async () => {
|
||||||
|
const engine = setupEngine('dangerous_cmd');
|
||||||
|
const result = await engine.check(
|
||||||
|
{ name: 'run_shell_command', args: { command: '(dangerous_cmd)' } },
|
||||||
|
'test-server',
|
||||||
|
);
|
||||||
|
expect(result.decision).toBe(PolicyDecision.DENY);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should handle nested substitutions deeply', async () => {
|
||||||
|
const engine = setupEngine('deep_danger');
|
||||||
|
const result = await engine.check(
|
||||||
|
{
|
||||||
|
name: 'run_shell_command',
|
||||||
|
args: { command: 'echo $(ls $(deep_danger))' },
|
||||||
|
},
|
||||||
|
'test-server',
|
||||||
|
);
|
||||||
|
expect(result.decision).toBe(PolicyDecision.DENY);
|
||||||
|
});
|
||||||
|
});
|
||||||
@@ -335,8 +335,10 @@ export interface PolicySettings {
|
|||||||
allowed?: string[];
|
allowed?: string[];
|
||||||
};
|
};
|
||||||
tools?: {
|
tools?: {
|
||||||
|
core?: string[];
|
||||||
exclude?: string[];
|
exclude?: string[];
|
||||||
allowed?: string[];
|
allowed?: string[];
|
||||||
|
confirmationRequired?: string[];
|
||||||
};
|
};
|
||||||
mcpServers?: Record<string, { trust?: boolean }>;
|
mcpServers?: Record<string, { trust?: boolean }>;
|
||||||
// User provided policies that will replace the USER level policies in ~/.gemini/policies
|
// User provided policies that will replace the USER level policies in ~/.gemini/policies
|
||||||
|
|||||||
@@ -1993,13 +1993,15 @@ describe('getRipgrepPath', () => {
|
|||||||
vi.mocked(fileExists).mockImplementation(
|
vi.mocked(fileExists).mockImplementation(
|
||||||
async (checkPath) =>
|
async (checkPath) =>
|
||||||
checkPath.includes(path.normalize('core/vendor/ripgrep')) &&
|
checkPath.includes(path.normalize('core/vendor/ripgrep')) &&
|
||||||
!checkPath.includes('tools'),
|
!checkPath.includes(path.join(path.sep, 'tools', path.sep)),
|
||||||
);
|
);
|
||||||
|
|
||||||
const resolvedPath = await getRipgrepPath();
|
const resolvedPath = await getRipgrepPath();
|
||||||
expect(resolvedPath).not.toBeNull();
|
expect(resolvedPath).not.toBeNull();
|
||||||
expect(resolvedPath).toContain(path.normalize('core/vendor/ripgrep'));
|
expect(resolvedPath).toContain(path.normalize('core/vendor/ripgrep'));
|
||||||
expect(resolvedPath).not.toContain('tools');
|
expect(resolvedPath).not.toContain(
|
||||||
|
path.join(path.sep, 'tools', path.sep),
|
||||||
|
);
|
||||||
});
|
});
|
||||||
|
|
||||||
it('should return null if binary is missing from both paths', async () => {
|
it('should return null if binary is missing from both paths', async () => {
|
||||||
|
|||||||
@@ -240,11 +240,15 @@ foreach ($commandAst in $commandAsts) {
|
|||||||
'utf16le',
|
'utf16le',
|
||||||
).toString('base64');
|
).toString('base64');
|
||||||
|
|
||||||
const REDIRECTION_NAMES = new Set([
|
export const REDIRECTION_NAMES = new Set([
|
||||||
'redirection (<)',
|
'redirection (<)',
|
||||||
'redirection (>)',
|
'redirection (>)',
|
||||||
'heredoc (<<)',
|
'heredoc (<<)',
|
||||||
'herestring (<<<)',
|
'herestring (<<<)',
|
||||||
|
'command substitution',
|
||||||
|
'backtick substitution',
|
||||||
|
'process substitution',
|
||||||
|
'subshell',
|
||||||
]);
|
]);
|
||||||
|
|
||||||
function createParser(): Parser | null {
|
function createParser(): Parser | null {
|
||||||
@@ -360,6 +364,14 @@ function extractNameFromNode(node: Node): string | null {
|
|||||||
return 'heredoc (<<)';
|
return 'heredoc (<<)';
|
||||||
case 'herestring_redirect':
|
case 'herestring_redirect':
|
||||||
return 'herestring (<<<)';
|
return 'herestring (<<<)';
|
||||||
|
case 'command_substitution':
|
||||||
|
return 'command substitution';
|
||||||
|
case 'backtick_substitution':
|
||||||
|
return 'backtick substitution';
|
||||||
|
case 'process_substitution':
|
||||||
|
return 'process substitution';
|
||||||
|
case 'subshell':
|
||||||
|
return 'subshell';
|
||||||
default:
|
default:
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -2531,6 +2531,15 @@
|
|||||||
"type": "string"
|
"type": "string"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"confirmationRequired": {
|
||||||
|
"title": "Confirmation Required",
|
||||||
|
"description": "Tool names that always require user confirmation. Takes precedence over allowed tools and core tool allowlists.",
|
||||||
|
"markdownDescription": "Tool names that always require user confirmation. Takes precedence over allowed tools and core tool allowlists.\n\n- Category: `Advanced`\n- Requires restart: `yes`",
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
},
|
||||||
"exclude": {
|
"exclude": {
|
||||||
"title": "Exclude Tools",
|
"title": "Exclude Tools",
|
||||||
"description": "Tool names to exclude from discovery.",
|
"description": "Tool names to exclude from discovery.",
|
||||||
|
|||||||
Reference in New Issue
Block a user