feat(core): enhance shell command validation and add core tools allowlist (#25720)

Co-authored-by: David Pierce <davidapierce@google.com>
Co-authored-by: Keith Schaab <keithsc@google.com>
Co-authored-by: Keith Schaab <keith.schaab@gmail.com>
Co-authored-by: Emily Hedlund <ehedlund@google.com>
This commit is contained in:
Gal Zahavi
2026-04-23 13:26:01 -07:00
committed by GitHub
parent c024064f47
commit 27927c55e5
16 changed files with 632 additions and 88 deletions
+6
View File
@@ -1467,6 +1467,12 @@ their corresponding top-level category object in your `settings.json` file.
- **Default:** `undefined` - **Default:** `undefined`
- **Requires restart:** Yes - **Requires restart:** Yes
- **`tools.confirmationRequired`** (array):
- **Description:** Tool names that always require user confirmation. Takes
precedence over allowed tools and core tool allowlists.
- **Default:** `undefined`
- **Requires restart:** Yes
- **`tools.exclude`** (array): - **`tools.exclude`** (array):
- **Description:** Tool names to exclude from discovery. - **Description:** Tool names to exclude from discovery.
- **Default:** `undefined` - **Default:** `undefined`
+55
View File
@@ -18,6 +18,11 @@ describe('save_memory', () => {
suiteName: 'default', suiteName: 'default',
suiteType: 'behavioral', suiteType: 'behavioral',
name: rememberingFavoriteColor, name: rememberingFavoriteColor,
params: {
settings: {
experimental: { memoryV2: false },
},
},
prompt: `remember that my favorite color is blue. prompt: `remember that my favorite color is blue.
@@ -40,6 +45,11 @@ describe('save_memory', () => {
suiteName: 'default', suiteName: 'default',
suiteType: 'behavioral', suiteType: 'behavioral',
name: rememberingCommandRestrictions, name: rememberingCommandRestrictions,
params: {
settings: {
experimental: { memoryV2: false },
},
},
prompt: `I don't want you to ever run npm commands.`, prompt: `I don't want you to ever run npm commands.`,
assert: async (rig, result) => { assert: async (rig, result) => {
@@ -61,6 +71,11 @@ describe('save_memory', () => {
suiteName: 'default', suiteName: 'default',
suiteType: 'behavioral', suiteType: 'behavioral',
name: rememberingWorkflow, name: rememberingWorkflow,
params: {
settings: {
experimental: { memoryV2: false },
},
},
prompt: `I want you to always lint after building.`, prompt: `I want you to always lint after building.`,
assert: async (rig, result) => { assert: async (rig, result) => {
@@ -83,6 +98,11 @@ describe('save_memory', () => {
suiteName: 'default', suiteName: 'default',
suiteType: 'behavioral', suiteType: 'behavioral',
name: ignoringTemporaryInformation, name: ignoringTemporaryInformation,
params: {
settings: {
experimental: { memoryV2: false },
},
},
prompt: `I'm going to get a coffee.`, prompt: `I'm going to get a coffee.`,
assert: async (rig, result) => { assert: async (rig, result) => {
@@ -108,6 +128,11 @@ describe('save_memory', () => {
suiteName: 'default', suiteName: 'default',
suiteType: 'behavioral', suiteType: 'behavioral',
name: rememberingPetName, name: rememberingPetName,
params: {
settings: {
experimental: { memoryV2: false },
},
},
prompt: `Please remember that my dog's name is Buddy.`, prompt: `Please remember that my dog's name is Buddy.`,
assert: async (rig, result) => { assert: async (rig, result) => {
@@ -129,6 +154,11 @@ describe('save_memory', () => {
suiteName: 'default', suiteName: 'default',
suiteType: 'behavioral', suiteType: 'behavioral',
name: rememberingCommandAlias, name: rememberingCommandAlias,
params: {
settings: {
experimental: { memoryV2: false },
},
},
prompt: `When I say 'start server', you should run 'npm run dev'.`, prompt: `When I say 'start server', you should run 'npm run dev'.`,
assert: async (rig, result) => { assert: async (rig, result) => {
@@ -151,6 +181,11 @@ describe('save_memory', () => {
suiteName: 'default', suiteName: 'default',
suiteType: 'behavioral', suiteType: 'behavioral',
name: savingDbSchemaLocationAsProjectMemory, name: savingDbSchemaLocationAsProjectMemory,
params: {
settings: {
experimental: { memoryV2: false },
},
},
prompt: `The database schema for this workspace is located in \`db/schema.sql\`.`, prompt: `The database schema for this workspace is located in \`db/schema.sql\`.`,
assert: async (rig, result) => { assert: async (rig, result) => {
const wasToolCalled = await rig.waitForToolCall( const wasToolCalled = await rig.waitForToolCall(
@@ -180,6 +215,11 @@ describe('save_memory', () => {
suiteName: 'default', suiteName: 'default',
suiteType: 'behavioral', suiteType: 'behavioral',
name: rememberingCodingStyle, name: rememberingCodingStyle,
params: {
settings: {
experimental: { memoryV2: false },
},
},
prompt: `I prefer to use tabs instead of spaces for indentation.`, prompt: `I prefer to use tabs instead of spaces for indentation.`,
assert: async (rig, result) => { assert: async (rig, result) => {
@@ -202,6 +242,11 @@ describe('save_memory', () => {
suiteName: 'default', suiteName: 'default',
suiteType: 'behavioral', suiteType: 'behavioral',
name: savingBuildArtifactLocationAsProjectMemory, name: savingBuildArtifactLocationAsProjectMemory,
params: {
settings: {
experimental: { memoryV2: false },
},
},
prompt: `In this workspace, build artifacts are stored in the \`dist/artifacts\` directory.`, prompt: `In this workspace, build artifacts are stored in the \`dist/artifacts\` directory.`,
assert: async (rig, result) => { assert: async (rig, result) => {
const wasToolCalled = await rig.waitForToolCall( const wasToolCalled = await rig.waitForToolCall(
@@ -231,6 +276,11 @@ describe('save_memory', () => {
suiteName: 'default', suiteName: 'default',
suiteType: 'behavioral', suiteType: 'behavioral',
name: savingMainEntryPointAsProjectMemory, name: savingMainEntryPointAsProjectMemory,
params: {
settings: {
experimental: { memoryV2: false },
},
},
prompt: `The main entry point for this workspace is \`src/index.js\`.`, prompt: `The main entry point for this workspace is \`src/index.js\`.`,
assert: async (rig, result) => { assert: async (rig, result) => {
const wasToolCalled = await rig.waitForToolCall( const wasToolCalled = await rig.waitForToolCall(
@@ -259,6 +309,11 @@ describe('save_memory', () => {
suiteName: 'default', suiteName: 'default',
suiteType: 'behavioral', suiteType: 'behavioral',
name: rememberingBirthday, name: rememberingBirthday,
params: {
settings: {
experimental: { memoryV2: false },
},
},
prompt: `My birthday is on June 15th.`, prompt: `My birthday is on June 15th.`,
assert: async (rig, result) => { assert: async (rig, result) => {
+1
View File
@@ -172,6 +172,7 @@ export async function internalEvalTest(evalCase: EvalCase) {
timeout: evalCase.timeout, timeout: evalCase.timeout,
env: { env: {
GEMINI_CLI_ACTIVITY_LOG_TARGET: activityLogFile, GEMINI_CLI_ACTIVITY_LOG_TARGET: activityLogFile,
GEMINI_CLI_TRUST_WORKSPACE: 'true',
}, },
}); });
+13
View File
@@ -1667,6 +1667,19 @@ const SETTINGS_SCHEMA = {
showInDialog: false, showInDialog: false,
items: { type: 'string' }, items: { type: 'string' },
}, },
confirmationRequired: {
type: 'array',
label: 'Confirmation Required',
category: 'Advanced',
requiresRestart: true,
default: undefined as string[] | undefined,
description: oneLine`
Tool names that always require user confirmation.
Takes precedence over allowed tools and core tool allowlists.
`,
showInDialog: false,
items: { type: 'string' },
},
exclude: { exclude: {
type: 'array', type: 'array',
label: 'Exclude Tools', label: 'Exclude Tools',
@@ -168,6 +168,13 @@ exports[`InputPrompt > mouse interaction > should toggle paste expansion on doub
" "
`; `;
exports[`InputPrompt > mouse interaction > should toggle paste expansion on double-click 4`] = `
"▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀
> [Pasted Text: 10 lines]
▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄
"
`;
exports[`InputPrompt > multiline rendering > should correctly render multiline input including blank lines 1`] = ` exports[`InputPrompt > multiline rendering > should correctly render multiline input including blank lines 1`] = `
"──────────────────────────────────────────────────────────────────────────────────────────────────── "────────────────────────────────────────────────────────────────────────────────────────────────────
> hello > hello
+87 -10
View File
@@ -74,7 +74,9 @@ export const ADMIN_POLICY_TIER = 5;
export const MCP_EXCLUDED_PRIORITY = USER_POLICY_TIER + 0.9; export const MCP_EXCLUDED_PRIORITY = USER_POLICY_TIER + 0.9;
export const EXCLUDE_TOOLS_FLAG_PRIORITY = USER_POLICY_TIER + 0.4; export const EXCLUDE_TOOLS_FLAG_PRIORITY = USER_POLICY_TIER + 0.4;
export const CONFIRMATION_REQUIRED_PRIORITY = USER_POLICY_TIER + 0.35;
export const ALLOWED_TOOLS_FLAG_PRIORITY = USER_POLICY_TIER + 0.3; export const ALLOWED_TOOLS_FLAG_PRIORITY = USER_POLICY_TIER + 0.3;
export const CORE_TOOLS_FLAG_PRIORITY = USER_POLICY_TIER + 0.25;
export const TRUSTED_MCP_SERVER_PRIORITY = USER_POLICY_TIER + 0.2; export const TRUSTED_MCP_SERVER_PRIORITY = USER_POLICY_TIER + 0.2;
export const ALLOWED_MCP_SERVER_PRIORITY = USER_POLICY_TIER + 0.1; export const ALLOWED_MCP_SERVER_PRIORITY = USER_POLICY_TIER + 0.1;
@@ -434,10 +436,21 @@ export async function createPolicyEngineConfig(
} }
} }
// Tools that are explicitly allowed in the settings. const nonPlanModes = [
// Priority: ALLOWED_TOOLS_FLAG_PRIORITY (user tier - explicit temporary allows) ApprovalMode.DEFAULT,
if (settings.tools?.allowed) { ApprovalMode.AUTO_EDIT,
for (const tool of settings.tools.allowed) { ApprovalMode.YOLO,
];
const mapToolsToRules = (
tools: string[],
priority: number,
source: string,
modes?: ApprovalMode[],
addDefaultDenyForTools = false,
) => {
const toolsWithNarrowing = new Set<string>();
for (const tool of tools) {
// Check for legacy format: toolName(args) // Check for legacy format: toolName(args)
const match = tool.match(/^([a-zA-Z0-9_-]+)\((.*)\)$/); const match = tool.match(/^([a-zA-Z0-9_-]+)\((.*)\)$/);
if (match) { if (match) {
@@ -449,15 +462,17 @@ export async function createPolicyEngineConfig(
// Treat args as a command prefix for shell tool // Treat args as a command prefix for shell tool
if (toolName === SHELL_TOOL_NAME) { if (toolName === SHELL_TOOL_NAME) {
toolsWithNarrowing.add(toolName);
const patterns = buildArgsPatterns(undefined, args); const patterns = buildArgsPatterns(undefined, args);
for (const pattern of patterns) { for (const pattern of patterns) {
if (pattern) { if (pattern) {
rules.push({ rules.push({
toolName, toolName,
decision: PolicyDecision.ALLOW, decision: PolicyDecision.ALLOW,
priority: ALLOWED_TOOLS_FLAG_PRIORITY, priority,
argsPattern: new RegExp(pattern), argsPattern: new RegExp(pattern),
source: 'Settings (Tools Allowed)', source,
modes,
}); });
} }
} }
@@ -467,8 +482,9 @@ export async function createPolicyEngineConfig(
rules.push({ rules.push({
toolName, toolName,
decision: PolicyDecision.ALLOW, decision: PolicyDecision.ALLOW,
priority: ALLOWED_TOOLS_FLAG_PRIORITY, priority,
source: 'Settings (Tools Allowed)', source,
modes,
}); });
} }
} else { } else {
@@ -479,11 +495,70 @@ export async function createPolicyEngineConfig(
rules.push({ rules.push({
toolName, toolName,
decision: PolicyDecision.ALLOW, decision: PolicyDecision.ALLOW,
priority: ALLOWED_TOOLS_FLAG_PRIORITY, priority,
source: 'Settings (Tools Allowed)', source,
modes,
}); });
} }
} }
if (addDefaultDenyForTools) {
for (const toolName of toolsWithNarrowing) {
rules.push({
toolName,
decision: PolicyDecision.DENY,
priority: priority - 0.01,
source: `${source} (Narrowing Enforcement)`,
modes,
});
}
}
};
// Tools that are explicitly allowed in the settings.
// Priority: ALLOWED_TOOLS_FLAG_PRIORITY (user tier - explicit temporary allows)
if (settings.tools?.allowed) {
mapToolsToRules(
settings.tools.allowed,
ALLOWED_TOOLS_FLAG_PRIORITY,
'Settings (Tools Allowed)',
undefined,
true,
);
}
// Tools that explicitly require confirmation in the settings.
// Priority: CONFIRMATION_REQUIRED_PRIORITY (overrides allowed and core)
if (settings.tools?.confirmationRequired) {
for (const tool of settings.tools.confirmationRequired) {
rules.push({
toolName: SHELL_TOOL_NAMES.includes(tool) ? SHELL_TOOL_NAME : tool,
decision: PolicyDecision.ASK_USER,
priority: CONFIRMATION_REQUIRED_PRIORITY,
source: 'Settings (Confirmation Required)',
});
}
}
// Core tools that are restricted in the settings.
// Priority: CORE_TOOLS_FLAG_PRIORITY (user tier - core tool allowlist)
if (settings.tools?.core) {
mapToolsToRules(
settings.tools.core,
CORE_TOOLS_FLAG_PRIORITY,
'Settings (Core Tools)',
nonPlanModes,
);
// If core tools are restricted, we should add a default DENY rule for everything else
// at a slightly lower priority than the explicit allows.
rules.push({
toolName: '*',
decision: PolicyDecision.DENY,
priority: CORE_TOOLS_FLAG_PRIORITY - 0.01,
source: 'Settings (Core Tools Allowlist Enforcement)',
modes: nonPlanModes,
});
} }
// MCP servers that are trusted in the settings. // MCP servers that are trusted in the settings.
@@ -501,6 +576,7 @@ export async function createPolicyEngineConfig(
decision: PolicyDecision.ALLOW, decision: PolicyDecision.ALLOW,
priority: TRUSTED_MCP_SERVER_PRIORITY, priority: TRUSTED_MCP_SERVER_PRIORITY,
source: 'Settings (MCP Trusted)', source: 'Settings (MCP Trusted)',
modes: nonPlanModes,
}); });
} }
} }
@@ -519,6 +595,7 @@ export async function createPolicyEngineConfig(
decision: PolicyDecision.ALLOW, decision: PolicyDecision.ALLOW,
priority: ALLOWED_MCP_SERVER_PRIORITY, priority: ALLOWED_MCP_SERVER_PRIORITY,
source: 'Settings (MCP Allowed)', source: 'Settings (MCP Allowed)',
modes: nonPlanModes,
}); });
} }
} }
@@ -0,0 +1,76 @@
/**
* @license
* Copyright 2026 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import { describe, it, expect } from 'vitest';
import { createPolicyEngineConfig } from './config.js';
import { PolicyEngine } from './policy-engine.js';
import { PolicyDecision, ApprovalMode } from './types.js';
describe('PolicyEngine - Core Tools Mapping', () => {
it('should allow tools explicitly listed in settings.tools.core', async () => {
const settings = {
tools: {
core: ['run_shell_command(ls)', 'run_shell_command(git status)'],
},
};
const config = await createPolicyEngineConfig(
settings,
ApprovalMode.DEFAULT,
undefined,
true, // interactive
);
const engine = new PolicyEngine(config);
// Test simple tool name
const result1 = await engine.check(
{ name: 'run_shell_command', args: { command: 'ls' } },
undefined,
);
expect(result1.decision).toBe(PolicyDecision.ALLOW);
// Test tool name with args
const result2 = await engine.check(
{ name: 'run_shell_command', args: { command: 'git status' } },
undefined,
);
expect(result2.decision).toBe(PolicyDecision.ALLOW);
// Test tool not in core list
const result3 = await engine.check(
{ name: 'run_shell_command', args: { command: 'npm test' } },
undefined,
);
// Should be DENIED because of strict allowlist
expect(result3.decision).toBe(PolicyDecision.DENY);
});
it('should allow tools in tools.core even if they are restricted by default policies', async () => {
// By default run_shell_command is ASK_USER.
// Putting it in tools.core should make it ALLOW.
const settings = {
tools: {
core: ['run_shell_command'],
},
};
const config = await createPolicyEngineConfig(
settings,
ApprovalMode.DEFAULT,
undefined,
true,
);
const engine = new PolicyEngine(config);
const result = await engine.check(
{ name: 'run_shell_command', args: { command: 'any command' } },
undefined,
);
expect(result.decision).toBe(PolicyDecision.ALLOW);
});
});
+34 -2
View File
@@ -43,6 +43,35 @@ vi.mock('../utils/shell-utils.js', async (importOriginal) => {
} }
return [command]; return [command];
}), }),
parseCommandDetails: vi.fn().mockImplementation((command: string) => {
// Basic mock implementation for PolicyEngine test needs
const commands = command.includes('&&')
? command.split('&&').map((c) => c.trim())
: [command.trim()];
// Detect $(...) or `...` and add as sub-commands for recursion tests
const subCommands = [...commands];
for (const cmd of commands) {
const subMatch = cmd.match(/\$\((.*)\)/) || cmd.match(/`(.*)`/);
if (subMatch?.[1]) {
subCommands.push(subMatch[1].trim());
}
}
return {
details: subCommands.map((c, i) => ({
name: c.split(' ')[0],
text: c,
startIndex: i === 0 ? 0 : -1, // Simple root indication
})),
hasError: false,
};
}),
stripShellWrapper: vi.fn().mockImplementation((command: string) => {
// Simple mock for stripping wrappers
const match = command.match(/^(?:bash|sh|zsh)\s+-c\s+["'](.*)["']$/i);
return match ? match[1] : command;
}),
hasRedirection: vi.fn().mockImplementation( hasRedirection: vi.fn().mockImplementation(
(command: string) => (command: string) =>
// Simple mock: true if '>' is present, unless it looks like "-> arrow" // Simple mock: true if '>' is present, unless it looks like "-> arrow"
@@ -1862,7 +1891,6 @@ describe('PolicyEngine', () => {
}); });
it('should return ASK_USER in non-YOLO mode if shell command parsing fails', async () => { it('should return ASK_USER in non-YOLO mode if shell command parsing fails', async () => {
const { splitCommands } = await import('../utils/shell-utils.js');
const rules: PolicyRule[] = [ const rules: PolicyRule[] = [
{ {
toolName: 'run_shell_command', toolName: 'run_shell_command',
@@ -1877,7 +1905,11 @@ describe('PolicyEngine', () => {
}); });
// Simulate parsing failure // Simulate parsing failure
vi.mocked(splitCommands).mockReturnValueOnce([]); const { parseCommandDetails } = await import('../utils/shell-utils.js');
vi.mocked(parseCommandDetails).mockReturnValueOnce({
details: [],
hasError: true,
});
const result = await engine.check( const result = await engine.check(
{ name: 'run_shell_command', args: { command: 'complex command' } }, { name: 'run_shell_command', args: { command: 'complex command' } },
+70 -73
View File
@@ -7,8 +7,10 @@
import { type FunctionCall } from '@google/genai'; import { type FunctionCall } from '@google/genai';
import { import {
SHELL_TOOL_NAMES, SHELL_TOOL_NAMES,
REDIRECTION_NAMES,
initializeShellParsers, initializeShellParsers,
splitCommands, parseCommandDetails,
stripShellWrapper,
hasRedirection, hasRedirection,
extractStringFromParseEntry, extractStringFromParseEntry,
} from '../utils/shell-utils.js'; } from '../utils/shell-utils.js';
@@ -359,7 +361,8 @@ export class PolicyEngine {
} }
await initializeShellParsers(); await initializeShellParsers();
const subCommands = splitCommands(command); const parsed = parseCommandDetails(command);
const subCommands = parsed?.details ?? [];
if (subCommands.length === 0) { if (subCommands.length === 0) {
// If the matched rule says DENY, we should respect it immediately even if parsing fails. // If the matched rule says DENY, we should respect it immediately even if parsing fails.
@@ -380,115 +383,109 @@ export class PolicyEngine {
); );
// Parsing logic failed, we can't trust it. Use default decision ASK_USER (or DENY in non-interactive). // Parsing logic failed, we can't trust it. Use default decision ASK_USER (or DENY in non-interactive).
// We return the rule that matched so the evaluation loop terminates.
return { return {
decision: this.defaultDecision, decision: this.defaultDecision,
rule, rule,
}; };
} }
// If there are multiple parts, or if we just want to validate the single part against DENY rules debugLogger.debug(
if (subCommands.length > 0) { `[PolicyEngine.check] Validating shell command: ${subCommands.length} parts`,
debugLogger.debug( );
`[PolicyEngine.check] Validating shell command: ${subCommands.length} parts`,
);
if (ruleDecision === PolicyDecision.DENY) { if (ruleDecision === PolicyDecision.DENY) {
return { decision: PolicyDecision.DENY, rule }; return { decision: PolicyDecision.DENY, rule };
} }
// Start optimistically. If all parts are ALLOW, the whole is ALLOW. // Start with the decision from the rule or heuristics.
// We will downgrade if any part is ASK_USER or DENY. // If the tool call was already downgraded (e.g. by heuristics), we start there.
let aggregateDecision = PolicyDecision.ALLOW; let aggregateDecision = ruleDecision;
let responsibleRule: PolicyRule | undefined;
// Check for redirection on the full command string // If heuristics downgraded the decision, we don't blame the rule.
if (this.shouldDowngradeForRedirection(command, allowRedirection)) { let responsibleRule: PolicyRule | undefined =
rule && ruleDecision === rule.decision ? rule : undefined;
// Check for redirection on the full command string.
// Redirection always downgrades ALLOW to ASK_USER (it never upgrades).
if (this.shouldDowngradeForRedirection(command, allowRedirection)) {
if (aggregateDecision === PolicyDecision.ALLOW) {
debugLogger.debug( debugLogger.debug(
`[PolicyEngine.check] Downgrading ALLOW to ASK_USER for redirected command: ${command}`, `[PolicyEngine.check] Downgrading ALLOW to ASK_USER for redirected command: ${command}`,
); );
aggregateDecision = PolicyDecision.ASK_USER; aggregateDecision = PolicyDecision.ASK_USER;
responsibleRule = undefined; // Inherent policy responsibleRule = undefined; // Inherent policy
} }
}
for (const rawSubCmd of subCommands) { for (const detail of subCommands) {
const subCmd = rawSubCmd.trim(); if (REDIRECTION_NAMES.has(detail.name)) {
// Prevent infinite recursion for the root command continue;
if (subCmd === command) { }
if (this.shouldDowngradeForRedirection(subCmd, allowRedirection)) {
debugLogger.debug( const subCmd = detail.text.trim();
`[PolicyEngine.check] Downgrading ALLOW to ASK_USER for redirected command: ${subCmd}`, const isAtomic =
); subCmd === command ||
// Redirection always downgrades ALLOW to ASK_USER (detail.startIndex === 0 && detail.text.length === command.length);
if (aggregateDecision === PolicyDecision.ALLOW) {
aggregateDecision = PolicyDecision.ASK_USER; // Recursive check for shell wrappers (bash -c, etc.)
responsibleRule = undefined; // Inherent policy const stripped = stripShellWrapper(subCmd);
} if (stripped !== subCmd) {
const wrapperResult = await this.check(
{ name: toolName, args: { command: stripped, dir_path } },
serverName,
toolAnnotations,
subagent,
true,
);
if (wrapperResult.decision === PolicyDecision.DENY)
return wrapperResult;
if (wrapperResult.decision === PolicyDecision.ASK_USER) {
if (aggregateDecision === PolicyDecision.ALLOW) {
responsibleRule = wrapperResult.rule;
} else { } else {
// Atomic command matching the rule. responsibleRule ??= wrapperResult.rule;
if (
ruleDecision === PolicyDecision.ASK_USER &&
aggregateDecision === PolicyDecision.ALLOW
) {
aggregateDecision = PolicyDecision.ASK_USER;
responsibleRule = rule;
}
} }
continue; aggregateDecision = PolicyDecision.ASK_USER;
} }
}
if (!isAtomic) {
const subResult = await this.check( const subResult = await this.check(
{ name: toolName, args: { command: subCmd, dir_path } }, { name: toolName, args: { command: subCmd, dir_path } },
serverName, serverName,
toolAnnotations, toolAnnotations,
subagent, subagent,
true,
); );
// subResult.decision is already filtered through applyNonInteractiveMode by this.check() if (subResult.decision === PolicyDecision.DENY) return subResult;
const subDecision = subResult.decision;
// If any part is DENIED, the whole command is DENY if (subResult.decision === PolicyDecision.ASK_USER) {
if (subDecision === PolicyDecision.DENY) { if (aggregateDecision === PolicyDecision.ALLOW) {
return {
decision: PolicyDecision.DENY,
rule: subResult.rule,
};
}
// If any part requires ASK_USER, the whole command requires ASK_USER
if (subDecision === PolicyDecision.ASK_USER) {
aggregateDecision = PolicyDecision.ASK_USER;
if (!responsibleRule) {
responsibleRule = subResult.rule; responsibleRule = subResult.rule;
} else {
responsibleRule ??= subResult.rule;
} }
aggregateDecision = PolicyDecision.ASK_USER;
} }
// Check for redirection in allowed sub-commands // Downgrade if sub-command has redirection
if ( if (
subDecision === PolicyDecision.ALLOW && subResult.decision === PolicyDecision.ALLOW &&
this.shouldDowngradeForRedirection(subCmd, allowRedirection) this.shouldDowngradeForRedirection(subCmd, allowRedirection)
) { ) {
debugLogger.debug(
`[PolicyEngine.check] Downgrading ALLOW to ASK_USER for redirected command: ${subCmd}`,
);
if (aggregateDecision === PolicyDecision.ALLOW) { if (aggregateDecision === PolicyDecision.ALLOW) {
aggregateDecision = PolicyDecision.ASK_USER; aggregateDecision = PolicyDecision.ASK_USER;
responsibleRule = undefined; responsibleRule = undefined;
} }
} }
} }
return {
decision: aggregateDecision,
// If we stayed at ALLOW, we return the original rule (if any).
// If we downgraded, we return the responsible rule (or undefined if implicit).
rule: aggregateDecision === ruleDecision ? rule : responsibleRule,
};
} }
return { return {
decision: ruleDecision, decision: aggregateDecision,
rule, rule: aggregateDecision === ruleDecision ? rule : responsibleRule,
}; };
} }
@@ -501,6 +498,7 @@ export class PolicyEngine {
serverName: string | undefined, serverName: string | undefined,
toolAnnotations?: Record<string, unknown>, toolAnnotations?: Record<string, unknown>,
subagent?: string, subagent?: string,
skipHeuristics = false,
): Promise<CheckResult> { ): Promise<CheckResult> {
// Case 1: Metadata injection is the primary and safest way to identify an MCP server. // Case 1: Metadata injection is the primary and safest way to identify an MCP server.
// If we have explicit `_serverName` metadata (usually injected by tool-registry for active tools), use it. // If we have explicit `_serverName` metadata (usually injected by tool-registry for active tools), use it.
@@ -594,6 +592,7 @@ export class PolicyEngine {
let ruleDecision = rule.decision; let ruleDecision = rule.decision;
if ( if (
!skipHeuristics &&
isShellCommand && isShellCommand &&
command && command &&
!('commandPrefix' in rule) && !('commandPrefix' in rule) &&
@@ -615,12 +614,10 @@ export class PolicyEngine {
subagent, subagent,
); );
decision = shellResult.decision; decision = shellResult.decision;
if (shellResult.rule) { matchedRule = shellResult.rule;
matchedRule = shellResult.rule; break;
break;
}
} else { } else {
decision = rule.decision; decision = ruleDecision;
matchedRule = rule; matchedRule = rule;
break; break;
} }
@@ -643,7 +640,7 @@ export class PolicyEngine {
); );
if (toolName && SHELL_TOOL_NAMES.includes(toolName)) { if (toolName && SHELL_TOOL_NAMES.includes(toolName)) {
let heuristicDecision = this.defaultDecision; let heuristicDecision = this.defaultDecision;
if (command) { if (!skipHeuristics && command) {
heuristicDecision = await this.applyShellHeuristics( heuristicDecision = await this.applyShellHeuristics(
command, command,
heuristicDecision, heuristicDecision,
@@ -0,0 +1,134 @@
/**
* @license
* Copyright 2026 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import { describe, it, expect, beforeAll } from 'vitest';
import { PolicyEngine } from './policy-engine.js';
import { PolicyDecision, ApprovalMode } from './types.js';
import { initializeShellParsers } from '../utils/shell-utils.js';
import { buildArgsPatterns } from './utils.js';
describe('PolicyEngine - Shell Safety Regression Suite', () => {
let engine: PolicyEngine;
beforeAll(async () => {
await initializeShellParsers();
});
const setupEngine = (allowedCommands: string[]) => {
const rules = allowedCommands.map((cmd) => ({
toolName: 'run_shell_command',
decision: PolicyDecision.ALLOW,
argsPattern: new RegExp(buildArgsPatterns(undefined, cmd)[0]!),
priority: 10,
}));
return new PolicyEngine({
rules,
approvalMode: ApprovalMode.DEFAULT,
defaultDecision: PolicyDecision.ASK_USER,
});
};
it('should block unauthorized chained command with &&', async () => {
engine = setupEngine(['echo']);
const result = await engine.check(
{ name: 'run_shell_command', args: { command: 'echo hi && ls' } },
undefined,
);
expect(result.decision).toBe(PolicyDecision.ASK_USER);
});
it('should allow authorized chained command with &&', async () => {
engine = setupEngine(['echo', 'ls']);
const result = await engine.check(
{ name: 'run_shell_command', args: { command: 'echo hi && ls' } },
undefined,
);
expect(result.decision).toBe(PolicyDecision.ALLOW);
});
it('should block unauthorized chained command with ||', async () => {
engine = setupEngine(['false']);
const result = await engine.check(
{ name: 'run_shell_command', args: { command: 'false || ls' } },
undefined,
);
expect(result.decision).toBe(PolicyDecision.ASK_USER);
});
it('should block unauthorized chained command with ;', async () => {
engine = setupEngine(['echo']);
const result = await engine.check(
{ name: 'run_shell_command', args: { command: 'echo hi; ls' } },
undefined,
);
expect(result.decision).toBe(PolicyDecision.ASK_USER);
});
it('should block unauthorized command in pipe |', async () => {
engine = setupEngine(['echo']);
const result = await engine.check(
{ name: 'run_shell_command', args: { command: 'echo hi | grep "hi"' } },
undefined,
);
expect(result.decision).toBe(PolicyDecision.ASK_USER);
});
it('should allow authorized command in pipe |', async () => {
engine = setupEngine(['echo', 'grep']);
const result = await engine.check(
{ name: 'run_shell_command', args: { command: 'echo hi | grep "hi"' } },
undefined,
);
expect(result.decision).toBe(PolicyDecision.ALLOW);
});
it('should block unauthorized chained command with &', async () => {
engine = setupEngine(['echo']);
const result = await engine.check(
{ name: 'run_shell_command', args: { command: 'echo hi & ls' } },
undefined,
);
expect(result.decision).toBe(PolicyDecision.ASK_USER);
});
it('should allow authorized chained command with &', async () => {
engine = setupEngine(['echo', 'ls']);
const result = await engine.check(
{ name: 'run_shell_command', args: { command: 'echo hi & ls' } },
undefined,
);
expect(result.decision).toBe(PolicyDecision.ALLOW);
});
it('should block unauthorized command in nested substitution', async () => {
engine = setupEngine(['echo', 'cat']);
const result = await engine.check(
{ name: 'run_shell_command', args: { command: 'echo $(cat $(ls))' } },
undefined,
);
expect(result.decision).toBe(PolicyDecision.ASK_USER);
});
it('should allow authorized command in nested substitution', async () => {
engine = setupEngine(['echo', 'cat', 'ls']);
const result = await engine.check(
{ name: 'run_shell_command', args: { command: 'echo $(cat $(ls))' } },
undefined,
);
expect(result.decision).toBe(PolicyDecision.ALLOW);
});
it('should block command redirection if not explicitly allowed', async () => {
engine = setupEngine(['echo']);
const result = await engine.check(
{ name: 'run_shell_command', args: { command: 'echo hi > /tmp/test' } },
undefined,
);
// Inherent policy: redirection downgrades to ASK_USER
expect(result.decision).toBe(PolicyDecision.ASK_USER);
});
});
@@ -59,6 +59,30 @@ vi.mock('../utils/shell-utils.js', async (importOriginal) => {
return { return {
...actual, ...actual,
initializeShellParsers: vi.fn(), initializeShellParsers: vi.fn(),
parseCommandDetails: (command: string) => {
if (Object.prototype.hasOwnProperty.call(commandMap, command)) {
const subcommands = commandMap[command];
return {
details: subcommands.map((text) => ({
name: text.split(' ')[0],
text,
startIndex: command.indexOf(text),
})),
hasError: subcommands.length === 0 && command.includes('&&&'),
};
}
return {
details: [
{
name: command.split(' ')[0],
text: command,
startIndex: 0,
},
],
hasError: false,
};
},
stripShellWrapper: (command: string) => command,
splitCommands: (command: string) => { splitCommands: (command: string) => {
if (Object.prototype.hasOwnProperty.call(commandMap, command)) { if (Object.prototype.hasOwnProperty.call(commandMap, command)) {
return commandMap[command]; return commandMap[command];
@@ -0,0 +1,97 @@
/**
* @license
* Copyright 2026 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import { expect, describe, it, beforeAll, vi } from 'vitest';
import { PolicyEngine } from './policy-engine.js';
import { PolicyDecision } from './types.js';
import { initializeShellParsers } from '../utils/shell-utils.js';
// Mock node:os to ensure shell-utils logic always thinks it's on a POSIX-like system.
// This ensures that internal calls to getShellConfiguration() and isWindows()
// within the shell-utils module return 'bash' configuration, even on Windows CI.
vi.mock('node:os', async (importOriginal) => {
const actual = await importOriginal<typeof import('node:os')>();
return {
...actual,
default: {
...actual,
platform: () => 'linux',
},
platform: () => 'linux',
};
});
// Mock shell-utils to ensure consistent behavior across platforms (especially Windows CI)
// We want to test PolicyEngine logic with Bash syntax rules.
vi.mock('../utils/shell-utils.js', async (importOriginal) => {
const actual =
await importOriginal<typeof import('../utils/shell-utils.js')>();
return {
...actual,
getShellConfiguration: () => ({
executable: 'bash',
argsPrefix: ['-c'],
shell: 'bash',
}),
};
});
describe('PolicyEngine Command Substitution Validation', () => {
beforeAll(async () => {
await initializeShellParsers();
});
const setupEngine = (blockedCmd: string) =>
new PolicyEngine({
defaultDecision: PolicyDecision.ALLOW,
rules: [
{
toolName: 'run_shell_command',
argsPattern: new RegExp(`"command":"${blockedCmd}"`),
decision: PolicyDecision.DENY,
},
],
});
it('should block echo $(dangerous_cmd) when dangerous_cmd is explicitly blocked', async () => {
const engine = setupEngine('dangerous_cmd');
const result = await engine.check(
{ name: 'run_shell_command', args: { command: 'echo $(dangerous_cmd)' } },
'test-server',
);
expect(result.decision).toBe(PolicyDecision.DENY);
});
it('should block backtick substitution `dangerous_cmd`', async () => {
const engine = setupEngine('dangerous_cmd');
const result = await engine.check(
{ name: 'run_shell_command', args: { command: 'echo `dangerous_cmd`' } },
'test-server',
);
expect(result.decision).toBe(PolicyDecision.DENY);
});
it('should block commands inside subshells (dangerous_cmd)', async () => {
const engine = setupEngine('dangerous_cmd');
const result = await engine.check(
{ name: 'run_shell_command', args: { command: '(dangerous_cmd)' } },
'test-server',
);
expect(result.decision).toBe(PolicyDecision.DENY);
});
it('should handle nested substitutions deeply', async () => {
const engine = setupEngine('deep_danger');
const result = await engine.check(
{
name: 'run_shell_command',
args: { command: 'echo $(ls $(deep_danger))' },
},
'test-server',
);
expect(result.decision).toBe(PolicyDecision.DENY);
});
});
+2
View File
@@ -335,8 +335,10 @@ export interface PolicySettings {
allowed?: string[]; allowed?: string[];
}; };
tools?: { tools?: {
core?: string[];
exclude?: string[]; exclude?: string[];
allowed?: string[]; allowed?: string[];
confirmationRequired?: string[];
}; };
mcpServers?: Record<string, { trust?: boolean }>; mcpServers?: Record<string, { trust?: boolean }>;
// User provided policies that will replace the USER level policies in ~/.gemini/policies // User provided policies that will replace the USER level policies in ~/.gemini/policies
+4 -2
View File
@@ -1993,13 +1993,15 @@ describe('getRipgrepPath', () => {
vi.mocked(fileExists).mockImplementation( vi.mocked(fileExists).mockImplementation(
async (checkPath) => async (checkPath) =>
checkPath.includes(path.normalize('core/vendor/ripgrep')) && checkPath.includes(path.normalize('core/vendor/ripgrep')) &&
!checkPath.includes('tools'), !checkPath.includes(path.join(path.sep, 'tools', path.sep)),
); );
const resolvedPath = await getRipgrepPath(); const resolvedPath = await getRipgrepPath();
expect(resolvedPath).not.toBeNull(); expect(resolvedPath).not.toBeNull();
expect(resolvedPath).toContain(path.normalize('core/vendor/ripgrep')); expect(resolvedPath).toContain(path.normalize('core/vendor/ripgrep'));
expect(resolvedPath).not.toContain('tools'); expect(resolvedPath).not.toContain(
path.join(path.sep, 'tools', path.sep),
);
}); });
it('should return null if binary is missing from both paths', async () => { it('should return null if binary is missing from both paths', async () => {
+13 -1
View File
@@ -240,11 +240,15 @@ foreach ($commandAst in $commandAsts) {
'utf16le', 'utf16le',
).toString('base64'); ).toString('base64');
const REDIRECTION_NAMES = new Set([ export const REDIRECTION_NAMES = new Set([
'redirection (<)', 'redirection (<)',
'redirection (>)', 'redirection (>)',
'heredoc (<<)', 'heredoc (<<)',
'herestring (<<<)', 'herestring (<<<)',
'command substitution',
'backtick substitution',
'process substitution',
'subshell',
]); ]);
function createParser(): Parser | null { function createParser(): Parser | null {
@@ -360,6 +364,14 @@ function extractNameFromNode(node: Node): string | null {
return 'heredoc (<<)'; return 'heredoc (<<)';
case 'herestring_redirect': case 'herestring_redirect':
return 'herestring (<<<)'; return 'herestring (<<<)';
case 'command_substitution':
return 'command substitution';
case 'backtick_substitution':
return 'backtick substitution';
case 'process_substitution':
return 'process substitution';
case 'subshell':
return 'subshell';
default: default:
return null; return null;
} }
+9
View File
@@ -2531,6 +2531,15 @@
"type": "string" "type": "string"
} }
}, },
"confirmationRequired": {
"title": "Confirmation Required",
"description": "Tool names that always require user confirmation. Takes precedence over allowed tools and core tool allowlists.",
"markdownDescription": "Tool names that always require user confirmation. Takes precedence over allowed tools and core tool allowlists.\n\n- Category: `Advanced`\n- Requires restart: `yes`",
"type": "array",
"items": {
"type": "string"
}
},
"exclude": { "exclude": {
"title": "Exclude Tools", "title": "Exclude Tools",
"description": "Tool names to exclude from discovery.", "description": "Tool names to exclude from discovery.",