diff --git a/docs/reference/configuration.md b/docs/reference/configuration.md index 56dd6b4b5d..46b2ff980e 100644 --- a/docs/reference/configuration.md +++ b/docs/reference/configuration.md @@ -1467,6 +1467,12 @@ their corresponding top-level category object in your `settings.json` file. - **Default:** `undefined` - **Requires restart:** Yes +- **`tools.confirmationRequired`** (array): + - **Description:** Tool names that always require user confirmation. Takes + precedence over allowed tools and core tool allowlists. + - **Default:** `undefined` + - **Requires restart:** Yes + - **`tools.exclude`** (array): - **Description:** Tool names to exclude from discovery. - **Default:** `undefined` diff --git a/evals/save_memory.eval.ts b/evals/save_memory.eval.ts index 8680f8eba8..b31167fb4a 100644 --- a/evals/save_memory.eval.ts +++ b/evals/save_memory.eval.ts @@ -18,6 +18,11 @@ describe('save_memory', () => { suiteName: 'default', suiteType: 'behavioral', name: rememberingFavoriteColor, + params: { + settings: { + experimental: { memoryV2: false }, + }, + }, prompt: `remember that my favorite color is blue. @@ -40,6 +45,11 @@ describe('save_memory', () => { suiteName: 'default', suiteType: 'behavioral', name: rememberingCommandRestrictions, + params: { + settings: { + experimental: { memoryV2: false }, + }, + }, prompt: `I don't want you to ever run npm commands.`, assert: async (rig, result) => { @@ -61,6 +71,11 @@ describe('save_memory', () => { suiteName: 'default', suiteType: 'behavioral', name: rememberingWorkflow, + params: { + settings: { + experimental: { memoryV2: false }, + }, + }, prompt: `I want you to always lint after building.`, assert: async (rig, result) => { @@ -83,6 +98,11 @@ describe('save_memory', () => { suiteName: 'default', suiteType: 'behavioral', name: ignoringTemporaryInformation, + params: { + settings: { + experimental: { memoryV2: false }, + }, + }, prompt: `I'm going to get a coffee.`, assert: async (rig, result) => { @@ -108,6 +128,11 @@ describe('save_memory', () => { suiteName: 'default', suiteType: 'behavioral', name: rememberingPetName, + params: { + settings: { + experimental: { memoryV2: false }, + }, + }, prompt: `Please remember that my dog's name is Buddy.`, assert: async (rig, result) => { @@ -129,6 +154,11 @@ describe('save_memory', () => { suiteName: 'default', suiteType: 'behavioral', name: rememberingCommandAlias, + params: { + settings: { + experimental: { memoryV2: false }, + }, + }, prompt: `When I say 'start server', you should run 'npm run dev'.`, assert: async (rig, result) => { @@ -151,6 +181,11 @@ describe('save_memory', () => { suiteName: 'default', suiteType: 'behavioral', name: savingDbSchemaLocationAsProjectMemory, + params: { + settings: { + experimental: { memoryV2: false }, + }, + }, prompt: `The database schema for this workspace is located in \`db/schema.sql\`.`, assert: async (rig, result) => { const wasToolCalled = await rig.waitForToolCall( @@ -180,6 +215,11 @@ describe('save_memory', () => { suiteName: 'default', suiteType: 'behavioral', name: rememberingCodingStyle, + params: { + settings: { + experimental: { memoryV2: false }, + }, + }, prompt: `I prefer to use tabs instead of spaces for indentation.`, assert: async (rig, result) => { @@ -202,6 +242,11 @@ describe('save_memory', () => { suiteName: 'default', suiteType: 'behavioral', name: savingBuildArtifactLocationAsProjectMemory, + params: { + settings: { + experimental: { memoryV2: false }, + }, + }, prompt: `In this workspace, build artifacts are stored in the \`dist/artifacts\` directory.`, assert: async (rig, result) => { const wasToolCalled = await rig.waitForToolCall( @@ -231,6 +276,11 @@ describe('save_memory', () => { suiteName: 'default', suiteType: 'behavioral', name: savingMainEntryPointAsProjectMemory, + params: { + settings: { + experimental: { memoryV2: false }, + }, + }, prompt: `The main entry point for this workspace is \`src/index.js\`.`, assert: async (rig, result) => { const wasToolCalled = await rig.waitForToolCall( @@ -259,6 +309,11 @@ describe('save_memory', () => { suiteName: 'default', suiteType: 'behavioral', name: rememberingBirthday, + params: { + settings: { + experimental: { memoryV2: false }, + }, + }, prompt: `My birthday is on June 15th.`, assert: async (rig, result) => { diff --git a/evals/test-helper.ts b/evals/test-helper.ts index 7369a6919c..af6bade201 100644 --- a/evals/test-helper.ts +++ b/evals/test-helper.ts @@ -172,6 +172,7 @@ export async function internalEvalTest(evalCase: EvalCase) { timeout: evalCase.timeout, env: { GEMINI_CLI_ACTIVITY_LOG_TARGET: activityLogFile, + GEMINI_CLI_TRUST_WORKSPACE: 'true', }, }); diff --git a/packages/cli/src/config/settingsSchema.ts b/packages/cli/src/config/settingsSchema.ts index f5da86b60a..05d4cfae7f 100644 --- a/packages/cli/src/config/settingsSchema.ts +++ b/packages/cli/src/config/settingsSchema.ts @@ -1667,6 +1667,19 @@ const SETTINGS_SCHEMA = { showInDialog: false, items: { type: 'string' }, }, + confirmationRequired: { + type: 'array', + label: 'Confirmation Required', + category: 'Advanced', + requiresRestart: true, + default: undefined as string[] | undefined, + description: oneLine` + Tool names that always require user confirmation. + Takes precedence over allowed tools and core tool allowlists. + `, + showInDialog: false, + items: { type: 'string' }, + }, exclude: { type: 'array', label: 'Exclude Tools', diff --git a/packages/cli/src/ui/components/__snapshots__/InputPrompt.test.tsx.snap b/packages/cli/src/ui/components/__snapshots__/InputPrompt.test.tsx.snap index db449ce4d7..4830e90db1 100644 --- a/packages/cli/src/ui/components/__snapshots__/InputPrompt.test.tsx.snap +++ b/packages/cli/src/ui/components/__snapshots__/InputPrompt.test.tsx.snap @@ -168,6 +168,13 @@ exports[`InputPrompt > mouse interaction > should toggle paste expansion on doub " `; +exports[`InputPrompt > mouse interaction > should toggle paste expansion on double-click 4`] = ` +"▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀ + > [Pasted Text: 10 lines] +▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄ +" +`; + exports[`InputPrompt > multiline rendering > should correctly render multiline input including blank lines 1`] = ` "──────────────────────────────────────────────────────────────────────────────────────────────────── > hello diff --git a/packages/core/src/policy/config.ts b/packages/core/src/policy/config.ts index 359054add3..6d978479cb 100644 --- a/packages/core/src/policy/config.ts +++ b/packages/core/src/policy/config.ts @@ -74,7 +74,9 @@ export const ADMIN_POLICY_TIER = 5; export const MCP_EXCLUDED_PRIORITY = USER_POLICY_TIER + 0.9; export const EXCLUDE_TOOLS_FLAG_PRIORITY = USER_POLICY_TIER + 0.4; +export const CONFIRMATION_REQUIRED_PRIORITY = USER_POLICY_TIER + 0.35; export const ALLOWED_TOOLS_FLAG_PRIORITY = USER_POLICY_TIER + 0.3; +export const CORE_TOOLS_FLAG_PRIORITY = USER_POLICY_TIER + 0.25; export const TRUSTED_MCP_SERVER_PRIORITY = USER_POLICY_TIER + 0.2; export const ALLOWED_MCP_SERVER_PRIORITY = USER_POLICY_TIER + 0.1; @@ -434,10 +436,21 @@ export async function createPolicyEngineConfig( } } - // Tools that are explicitly allowed in the settings. - // Priority: ALLOWED_TOOLS_FLAG_PRIORITY (user tier - explicit temporary allows) - if (settings.tools?.allowed) { - for (const tool of settings.tools.allowed) { + const nonPlanModes = [ + ApprovalMode.DEFAULT, + ApprovalMode.AUTO_EDIT, + ApprovalMode.YOLO, + ]; + + const mapToolsToRules = ( + tools: string[], + priority: number, + source: string, + modes?: ApprovalMode[], + addDefaultDenyForTools = false, + ) => { + const toolsWithNarrowing = new Set(); + for (const tool of tools) { // Check for legacy format: toolName(args) const match = tool.match(/^([a-zA-Z0-9_-]+)\((.*)\)$/); if (match) { @@ -449,15 +462,17 @@ export async function createPolicyEngineConfig( // Treat args as a command prefix for shell tool if (toolName === SHELL_TOOL_NAME) { + toolsWithNarrowing.add(toolName); const patterns = buildArgsPatterns(undefined, args); for (const pattern of patterns) { if (pattern) { rules.push({ toolName, decision: PolicyDecision.ALLOW, - priority: ALLOWED_TOOLS_FLAG_PRIORITY, + priority, argsPattern: new RegExp(pattern), - source: 'Settings (Tools Allowed)', + source, + modes, }); } } @@ -467,8 +482,9 @@ export async function createPolicyEngineConfig( rules.push({ toolName, decision: PolicyDecision.ALLOW, - priority: ALLOWED_TOOLS_FLAG_PRIORITY, - source: 'Settings (Tools Allowed)', + priority, + source, + modes, }); } } else { @@ -479,11 +495,70 @@ export async function createPolicyEngineConfig( rules.push({ toolName, decision: PolicyDecision.ALLOW, - priority: ALLOWED_TOOLS_FLAG_PRIORITY, - source: 'Settings (Tools Allowed)', + priority, + source, + modes, }); } } + + if (addDefaultDenyForTools) { + for (const toolName of toolsWithNarrowing) { + rules.push({ + toolName, + decision: PolicyDecision.DENY, + priority: priority - 0.01, + source: `${source} (Narrowing Enforcement)`, + modes, + }); + } + } + }; + + // Tools that are explicitly allowed in the settings. + // Priority: ALLOWED_TOOLS_FLAG_PRIORITY (user tier - explicit temporary allows) + if (settings.tools?.allowed) { + mapToolsToRules( + settings.tools.allowed, + ALLOWED_TOOLS_FLAG_PRIORITY, + 'Settings (Tools Allowed)', + undefined, + true, + ); + } + + // Tools that explicitly require confirmation in the settings. + // Priority: CONFIRMATION_REQUIRED_PRIORITY (overrides allowed and core) + if (settings.tools?.confirmationRequired) { + for (const tool of settings.tools.confirmationRequired) { + rules.push({ + toolName: SHELL_TOOL_NAMES.includes(tool) ? SHELL_TOOL_NAME : tool, + decision: PolicyDecision.ASK_USER, + priority: CONFIRMATION_REQUIRED_PRIORITY, + source: 'Settings (Confirmation Required)', + }); + } + } + + // Core tools that are restricted in the settings. + // Priority: CORE_TOOLS_FLAG_PRIORITY (user tier - core tool allowlist) + if (settings.tools?.core) { + mapToolsToRules( + settings.tools.core, + CORE_TOOLS_FLAG_PRIORITY, + 'Settings (Core Tools)', + nonPlanModes, + ); + + // If core tools are restricted, we should add a default DENY rule for everything else + // at a slightly lower priority than the explicit allows. + rules.push({ + toolName: '*', + decision: PolicyDecision.DENY, + priority: CORE_TOOLS_FLAG_PRIORITY - 0.01, + source: 'Settings (Core Tools Allowlist Enforcement)', + modes: nonPlanModes, + }); } // MCP servers that are trusted in the settings. @@ -501,6 +576,7 @@ export async function createPolicyEngineConfig( decision: PolicyDecision.ALLOW, priority: TRUSTED_MCP_SERVER_PRIORITY, source: 'Settings (MCP Trusted)', + modes: nonPlanModes, }); } } @@ -519,6 +595,7 @@ export async function createPolicyEngineConfig( decision: PolicyDecision.ALLOW, priority: ALLOWED_MCP_SERVER_PRIORITY, source: 'Settings (MCP Allowed)', + modes: nonPlanModes, }); } } diff --git a/packages/core/src/policy/core-tools-mapping.test.ts b/packages/core/src/policy/core-tools-mapping.test.ts new file mode 100644 index 0000000000..95877c6ac4 --- /dev/null +++ b/packages/core/src/policy/core-tools-mapping.test.ts @@ -0,0 +1,76 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { describe, it, expect } from 'vitest'; +import { createPolicyEngineConfig } from './config.js'; +import { PolicyEngine } from './policy-engine.js'; +import { PolicyDecision, ApprovalMode } from './types.js'; + +describe('PolicyEngine - Core Tools Mapping', () => { + it('should allow tools explicitly listed in settings.tools.core', async () => { + const settings = { + tools: { + core: ['run_shell_command(ls)', 'run_shell_command(git status)'], + }, + }; + + const config = await createPolicyEngineConfig( + settings, + ApprovalMode.DEFAULT, + undefined, + true, // interactive + ); + + const engine = new PolicyEngine(config); + + // Test simple tool name + const result1 = await engine.check( + { name: 'run_shell_command', args: { command: 'ls' } }, + undefined, + ); + expect(result1.decision).toBe(PolicyDecision.ALLOW); + + // Test tool name with args + const result2 = await engine.check( + { name: 'run_shell_command', args: { command: 'git status' } }, + undefined, + ); + expect(result2.decision).toBe(PolicyDecision.ALLOW); + + // Test tool not in core list + const result3 = await engine.check( + { name: 'run_shell_command', args: { command: 'npm test' } }, + undefined, + ); + // Should be DENIED because of strict allowlist + expect(result3.decision).toBe(PolicyDecision.DENY); + }); + + it('should allow tools in tools.core even if they are restricted by default policies', async () => { + // By default run_shell_command is ASK_USER. + // Putting it in tools.core should make it ALLOW. + const settings = { + tools: { + core: ['run_shell_command'], + }, + }; + + const config = await createPolicyEngineConfig( + settings, + ApprovalMode.DEFAULT, + undefined, + true, + ); + + const engine = new PolicyEngine(config); + + const result = await engine.check( + { name: 'run_shell_command', args: { command: 'any command' } }, + undefined, + ); + expect(result.decision).toBe(PolicyDecision.ALLOW); + }); +}); diff --git a/packages/core/src/policy/policy-engine.test.ts b/packages/core/src/policy/policy-engine.test.ts index 5606c49793..8604a79961 100644 --- a/packages/core/src/policy/policy-engine.test.ts +++ b/packages/core/src/policy/policy-engine.test.ts @@ -43,6 +43,35 @@ vi.mock('../utils/shell-utils.js', async (importOriginal) => { } return [command]; }), + parseCommandDetails: vi.fn().mockImplementation((command: string) => { + // Basic mock implementation for PolicyEngine test needs + const commands = command.includes('&&') + ? command.split('&&').map((c) => c.trim()) + : [command.trim()]; + + // Detect $(...) or `...` and add as sub-commands for recursion tests + const subCommands = [...commands]; + for (const cmd of commands) { + const subMatch = cmd.match(/\$\((.*)\)/) || cmd.match(/`(.*)`/); + if (subMatch?.[1]) { + subCommands.push(subMatch[1].trim()); + } + } + + return { + details: subCommands.map((c, i) => ({ + name: c.split(' ')[0], + text: c, + startIndex: i === 0 ? 0 : -1, // Simple root indication + })), + hasError: false, + }; + }), + stripShellWrapper: vi.fn().mockImplementation((command: string) => { + // Simple mock for stripping wrappers + const match = command.match(/^(?:bash|sh|zsh)\s+-c\s+["'](.*)["']$/i); + return match ? match[1] : command; + }), hasRedirection: vi.fn().mockImplementation( (command: string) => // Simple mock: true if '>' is present, unless it looks like "-> arrow" @@ -1862,7 +1891,6 @@ describe('PolicyEngine', () => { }); it('should return ASK_USER in non-YOLO mode if shell command parsing fails', async () => { - const { splitCommands } = await import('../utils/shell-utils.js'); const rules: PolicyRule[] = [ { toolName: 'run_shell_command', @@ -1877,7 +1905,11 @@ describe('PolicyEngine', () => { }); // Simulate parsing failure - vi.mocked(splitCommands).mockReturnValueOnce([]); + const { parseCommandDetails } = await import('../utils/shell-utils.js'); + vi.mocked(parseCommandDetails).mockReturnValueOnce({ + details: [], + hasError: true, + }); const result = await engine.check( { name: 'run_shell_command', args: { command: 'complex command' } }, diff --git a/packages/core/src/policy/policy-engine.ts b/packages/core/src/policy/policy-engine.ts index a9e049c74d..e0e3c61215 100644 --- a/packages/core/src/policy/policy-engine.ts +++ b/packages/core/src/policy/policy-engine.ts @@ -7,8 +7,10 @@ import { type FunctionCall } from '@google/genai'; import { SHELL_TOOL_NAMES, + REDIRECTION_NAMES, initializeShellParsers, - splitCommands, + parseCommandDetails, + stripShellWrapper, hasRedirection, extractStringFromParseEntry, } from '../utils/shell-utils.js'; @@ -359,7 +361,8 @@ export class PolicyEngine { } await initializeShellParsers(); - const subCommands = splitCommands(command); + const parsed = parseCommandDetails(command); + const subCommands = parsed?.details ?? []; if (subCommands.length === 0) { // If the matched rule says DENY, we should respect it immediately even if parsing fails. @@ -380,115 +383,109 @@ export class PolicyEngine { ); // Parsing logic failed, we can't trust it. Use default decision ASK_USER (or DENY in non-interactive). - // We return the rule that matched so the evaluation loop terminates. return { decision: this.defaultDecision, rule, }; } - // If there are multiple parts, or if we just want to validate the single part against DENY rules - if (subCommands.length > 0) { - debugLogger.debug( - `[PolicyEngine.check] Validating shell command: ${subCommands.length} parts`, - ); + debugLogger.debug( + `[PolicyEngine.check] Validating shell command: ${subCommands.length} parts`, + ); - if (ruleDecision === PolicyDecision.DENY) { - return { decision: PolicyDecision.DENY, rule }; - } + if (ruleDecision === PolicyDecision.DENY) { + return { decision: PolicyDecision.DENY, rule }; + } - // Start optimistically. If all parts are ALLOW, the whole is ALLOW. - // We will downgrade if any part is ASK_USER or DENY. - let aggregateDecision = PolicyDecision.ALLOW; - let responsibleRule: PolicyRule | undefined; + // Start with the decision from the rule or heuristics. + // If the tool call was already downgraded (e.g. by heuristics), we start there. + let aggregateDecision = ruleDecision; - // Check for redirection on the full command string - if (this.shouldDowngradeForRedirection(command, allowRedirection)) { + // If heuristics downgraded the decision, we don't blame the rule. + let responsibleRule: PolicyRule | undefined = + rule && ruleDecision === rule.decision ? rule : undefined; + + // Check for redirection on the full command string. + // Redirection always downgrades ALLOW to ASK_USER (it never upgrades). + if (this.shouldDowngradeForRedirection(command, allowRedirection)) { + if (aggregateDecision === PolicyDecision.ALLOW) { debugLogger.debug( `[PolicyEngine.check] Downgrading ALLOW to ASK_USER for redirected command: ${command}`, ); aggregateDecision = PolicyDecision.ASK_USER; responsibleRule = undefined; // Inherent policy } + } - for (const rawSubCmd of subCommands) { - const subCmd = rawSubCmd.trim(); - // Prevent infinite recursion for the root command - if (subCmd === command) { - if (this.shouldDowngradeForRedirection(subCmd, allowRedirection)) { - debugLogger.debug( - `[PolicyEngine.check] Downgrading ALLOW to ASK_USER for redirected command: ${subCmd}`, - ); - // Redirection always downgrades ALLOW to ASK_USER - if (aggregateDecision === PolicyDecision.ALLOW) { - aggregateDecision = PolicyDecision.ASK_USER; - responsibleRule = undefined; // Inherent policy - } + for (const detail of subCommands) { + if (REDIRECTION_NAMES.has(detail.name)) { + continue; + } + + const subCmd = detail.text.trim(); + const isAtomic = + subCmd === command || + (detail.startIndex === 0 && detail.text.length === command.length); + + // Recursive check for shell wrappers (bash -c, etc.) + const stripped = stripShellWrapper(subCmd); + if (stripped !== subCmd) { + const wrapperResult = await this.check( + { name: toolName, args: { command: stripped, dir_path } }, + serverName, + toolAnnotations, + subagent, + true, + ); + + if (wrapperResult.decision === PolicyDecision.DENY) + return wrapperResult; + if (wrapperResult.decision === PolicyDecision.ASK_USER) { + if (aggregateDecision === PolicyDecision.ALLOW) { + responsibleRule = wrapperResult.rule; } else { - // Atomic command matching the rule. - if ( - ruleDecision === PolicyDecision.ASK_USER && - aggregateDecision === PolicyDecision.ALLOW - ) { - aggregateDecision = PolicyDecision.ASK_USER; - responsibleRule = rule; - } + responsibleRule ??= wrapperResult.rule; } - continue; + aggregateDecision = PolicyDecision.ASK_USER; } + } + if (!isAtomic) { const subResult = await this.check( { name: toolName, args: { command: subCmd, dir_path } }, serverName, toolAnnotations, subagent, + true, ); - // subResult.decision is already filtered through applyNonInteractiveMode by this.check() - const subDecision = subResult.decision; + if (subResult.decision === PolicyDecision.DENY) return subResult; - // If any part is DENIED, the whole command is DENY - if (subDecision === PolicyDecision.DENY) { - return { - decision: PolicyDecision.DENY, - rule: subResult.rule, - }; - } - - // If any part requires ASK_USER, the whole command requires ASK_USER - if (subDecision === PolicyDecision.ASK_USER) { - aggregateDecision = PolicyDecision.ASK_USER; - if (!responsibleRule) { + if (subResult.decision === PolicyDecision.ASK_USER) { + if (aggregateDecision === PolicyDecision.ALLOW) { responsibleRule = subResult.rule; + } else { + responsibleRule ??= subResult.rule; } + aggregateDecision = PolicyDecision.ASK_USER; } - // Check for redirection in allowed sub-commands + // Downgrade if sub-command has redirection if ( - subDecision === PolicyDecision.ALLOW && + subResult.decision === PolicyDecision.ALLOW && this.shouldDowngradeForRedirection(subCmd, allowRedirection) ) { - debugLogger.debug( - `[PolicyEngine.check] Downgrading ALLOW to ASK_USER for redirected command: ${subCmd}`, - ); if (aggregateDecision === PolicyDecision.ALLOW) { aggregateDecision = PolicyDecision.ASK_USER; responsibleRule = undefined; } } } - - return { - decision: aggregateDecision, - // If we stayed at ALLOW, we return the original rule (if any). - // If we downgraded, we return the responsible rule (or undefined if implicit). - rule: aggregateDecision === ruleDecision ? rule : responsibleRule, - }; } return { - decision: ruleDecision, - rule, + decision: aggregateDecision, + rule: aggregateDecision === ruleDecision ? rule : responsibleRule, }; } @@ -501,6 +498,7 @@ export class PolicyEngine { serverName: string | undefined, toolAnnotations?: Record, subagent?: string, + skipHeuristics = false, ): Promise { // Case 1: Metadata injection is the primary and safest way to identify an MCP server. // If we have explicit `_serverName` metadata (usually injected by tool-registry for active tools), use it. @@ -594,6 +592,7 @@ export class PolicyEngine { let ruleDecision = rule.decision; if ( + !skipHeuristics && isShellCommand && command && !('commandPrefix' in rule) && @@ -615,12 +614,10 @@ export class PolicyEngine { subagent, ); decision = shellResult.decision; - if (shellResult.rule) { - matchedRule = shellResult.rule; - break; - } + matchedRule = shellResult.rule; + break; } else { - decision = rule.decision; + decision = ruleDecision; matchedRule = rule; break; } @@ -643,7 +640,7 @@ export class PolicyEngine { ); if (toolName && SHELL_TOOL_NAMES.includes(toolName)) { let heuristicDecision = this.defaultDecision; - if (command) { + if (!skipHeuristics && command) { heuristicDecision = await this.applyShellHeuristics( command, heuristicDecision, diff --git a/packages/core/src/policy/shell-safety-regression.test.ts b/packages/core/src/policy/shell-safety-regression.test.ts new file mode 100644 index 0000000000..1a1d608959 --- /dev/null +++ b/packages/core/src/policy/shell-safety-regression.test.ts @@ -0,0 +1,134 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { describe, it, expect, beforeAll } from 'vitest'; +import { PolicyEngine } from './policy-engine.js'; +import { PolicyDecision, ApprovalMode } from './types.js'; +import { initializeShellParsers } from '../utils/shell-utils.js'; +import { buildArgsPatterns } from './utils.js'; + +describe('PolicyEngine - Shell Safety Regression Suite', () => { + let engine: PolicyEngine; + + beforeAll(async () => { + await initializeShellParsers(); + }); + + const setupEngine = (allowedCommands: string[]) => { + const rules = allowedCommands.map((cmd) => ({ + toolName: 'run_shell_command', + decision: PolicyDecision.ALLOW, + argsPattern: new RegExp(buildArgsPatterns(undefined, cmd)[0]!), + priority: 10, + })); + + return new PolicyEngine({ + rules, + approvalMode: ApprovalMode.DEFAULT, + defaultDecision: PolicyDecision.ASK_USER, + }); + }; + + it('should block unauthorized chained command with &&', async () => { + engine = setupEngine(['echo']); + const result = await engine.check( + { name: 'run_shell_command', args: { command: 'echo hi && ls' } }, + undefined, + ); + expect(result.decision).toBe(PolicyDecision.ASK_USER); + }); + + it('should allow authorized chained command with &&', async () => { + engine = setupEngine(['echo', 'ls']); + const result = await engine.check( + { name: 'run_shell_command', args: { command: 'echo hi && ls' } }, + undefined, + ); + expect(result.decision).toBe(PolicyDecision.ALLOW); + }); + + it('should block unauthorized chained command with ||', async () => { + engine = setupEngine(['false']); + const result = await engine.check( + { name: 'run_shell_command', args: { command: 'false || ls' } }, + undefined, + ); + expect(result.decision).toBe(PolicyDecision.ASK_USER); + }); + + it('should block unauthorized chained command with ;', async () => { + engine = setupEngine(['echo']); + const result = await engine.check( + { name: 'run_shell_command', args: { command: 'echo hi; ls' } }, + undefined, + ); + expect(result.decision).toBe(PolicyDecision.ASK_USER); + }); + + it('should block unauthorized command in pipe |', async () => { + engine = setupEngine(['echo']); + const result = await engine.check( + { name: 'run_shell_command', args: { command: 'echo hi | grep "hi"' } }, + undefined, + ); + expect(result.decision).toBe(PolicyDecision.ASK_USER); + }); + + it('should allow authorized command in pipe |', async () => { + engine = setupEngine(['echo', 'grep']); + const result = await engine.check( + { name: 'run_shell_command', args: { command: 'echo hi | grep "hi"' } }, + undefined, + ); + expect(result.decision).toBe(PolicyDecision.ALLOW); + }); + + it('should block unauthorized chained command with &', async () => { + engine = setupEngine(['echo']); + const result = await engine.check( + { name: 'run_shell_command', args: { command: 'echo hi & ls' } }, + undefined, + ); + expect(result.decision).toBe(PolicyDecision.ASK_USER); + }); + + it('should allow authorized chained command with &', async () => { + engine = setupEngine(['echo', 'ls']); + const result = await engine.check( + { name: 'run_shell_command', args: { command: 'echo hi & ls' } }, + undefined, + ); + expect(result.decision).toBe(PolicyDecision.ALLOW); + }); + + it('should block unauthorized command in nested substitution', async () => { + engine = setupEngine(['echo', 'cat']); + const result = await engine.check( + { name: 'run_shell_command', args: { command: 'echo $(cat $(ls))' } }, + undefined, + ); + expect(result.decision).toBe(PolicyDecision.ASK_USER); + }); + + it('should allow authorized command in nested substitution', async () => { + engine = setupEngine(['echo', 'cat', 'ls']); + const result = await engine.check( + { name: 'run_shell_command', args: { command: 'echo $(cat $(ls))' } }, + undefined, + ); + expect(result.decision).toBe(PolicyDecision.ALLOW); + }); + + it('should block command redirection if not explicitly allowed', async () => { + engine = setupEngine(['echo']); + const result = await engine.check( + { name: 'run_shell_command', args: { command: 'echo hi > /tmp/test' } }, + undefined, + ); + // Inherent policy: redirection downgrades to ASK_USER + expect(result.decision).toBe(PolicyDecision.ASK_USER); + }); +}); diff --git a/packages/core/src/policy/shell-safety.test.ts b/packages/core/src/policy/shell-safety.test.ts index 340264485e..51d3d26294 100644 --- a/packages/core/src/policy/shell-safety.test.ts +++ b/packages/core/src/policy/shell-safety.test.ts @@ -59,6 +59,30 @@ vi.mock('../utils/shell-utils.js', async (importOriginal) => { return { ...actual, initializeShellParsers: vi.fn(), + parseCommandDetails: (command: string) => { + if (Object.prototype.hasOwnProperty.call(commandMap, command)) { + const subcommands = commandMap[command]; + return { + details: subcommands.map((text) => ({ + name: text.split(' ')[0], + text, + startIndex: command.indexOf(text), + })), + hasError: subcommands.length === 0 && command.includes('&&&'), + }; + } + return { + details: [ + { + name: command.split(' ')[0], + text: command, + startIndex: 0, + }, + ], + hasError: false, + }; + }, + stripShellWrapper: (command: string) => command, splitCommands: (command: string) => { if (Object.prototype.hasOwnProperty.call(commandMap, command)) { return commandMap[command]; diff --git a/packages/core/src/policy/shell-substitution.test.ts b/packages/core/src/policy/shell-substitution.test.ts new file mode 100644 index 0000000000..cc28847233 --- /dev/null +++ b/packages/core/src/policy/shell-substitution.test.ts @@ -0,0 +1,97 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { expect, describe, it, beforeAll, vi } from 'vitest'; +import { PolicyEngine } from './policy-engine.js'; +import { PolicyDecision } from './types.js'; +import { initializeShellParsers } from '../utils/shell-utils.js'; + +// Mock node:os to ensure shell-utils logic always thinks it's on a POSIX-like system. +// This ensures that internal calls to getShellConfiguration() and isWindows() +// within the shell-utils module return 'bash' configuration, even on Windows CI. +vi.mock('node:os', async (importOriginal) => { + const actual = await importOriginal(); + return { + ...actual, + default: { + ...actual, + platform: () => 'linux', + }, + platform: () => 'linux', + }; +}); + +// Mock shell-utils to ensure consistent behavior across platforms (especially Windows CI) +// We want to test PolicyEngine logic with Bash syntax rules. +vi.mock('../utils/shell-utils.js', async (importOriginal) => { + const actual = + await importOriginal(); + return { + ...actual, + getShellConfiguration: () => ({ + executable: 'bash', + argsPrefix: ['-c'], + shell: 'bash', + }), + }; +}); + +describe('PolicyEngine Command Substitution Validation', () => { + beforeAll(async () => { + await initializeShellParsers(); + }); + + const setupEngine = (blockedCmd: string) => + new PolicyEngine({ + defaultDecision: PolicyDecision.ALLOW, + rules: [ + { + toolName: 'run_shell_command', + argsPattern: new RegExp(`"command":"${blockedCmd}"`), + decision: PolicyDecision.DENY, + }, + ], + }); + + it('should block echo $(dangerous_cmd) when dangerous_cmd is explicitly blocked', async () => { + const engine = setupEngine('dangerous_cmd'); + const result = await engine.check( + { name: 'run_shell_command', args: { command: 'echo $(dangerous_cmd)' } }, + 'test-server', + ); + expect(result.decision).toBe(PolicyDecision.DENY); + }); + + it('should block backtick substitution `dangerous_cmd`', async () => { + const engine = setupEngine('dangerous_cmd'); + const result = await engine.check( + { name: 'run_shell_command', args: { command: 'echo `dangerous_cmd`' } }, + 'test-server', + ); + expect(result.decision).toBe(PolicyDecision.DENY); + }); + + it('should block commands inside subshells (dangerous_cmd)', async () => { + const engine = setupEngine('dangerous_cmd'); + const result = await engine.check( + { name: 'run_shell_command', args: { command: '(dangerous_cmd)' } }, + 'test-server', + ); + expect(result.decision).toBe(PolicyDecision.DENY); + }); + + it('should handle nested substitutions deeply', async () => { + const engine = setupEngine('deep_danger'); + const result = await engine.check( + { + name: 'run_shell_command', + args: { command: 'echo $(ls $(deep_danger))' }, + }, + 'test-server', + ); + expect(result.decision).toBe(PolicyDecision.DENY); + }); +}); diff --git a/packages/core/src/policy/types.ts b/packages/core/src/policy/types.ts index b843129c99..672e3f8416 100644 --- a/packages/core/src/policy/types.ts +++ b/packages/core/src/policy/types.ts @@ -335,8 +335,10 @@ export interface PolicySettings { allowed?: string[]; }; tools?: { + core?: string[]; exclude?: string[]; allowed?: string[]; + confirmationRequired?: string[]; }; mcpServers?: Record; // User provided policies that will replace the USER level policies in ~/.gemini/policies diff --git a/packages/core/src/tools/ripGrep.test.ts b/packages/core/src/tools/ripGrep.test.ts index 9ad575833a..bd3cd21189 100644 --- a/packages/core/src/tools/ripGrep.test.ts +++ b/packages/core/src/tools/ripGrep.test.ts @@ -1993,13 +1993,15 @@ describe('getRipgrepPath', () => { vi.mocked(fileExists).mockImplementation( async (checkPath) => checkPath.includes(path.normalize('core/vendor/ripgrep')) && - !checkPath.includes('tools'), + !checkPath.includes(path.join(path.sep, 'tools', path.sep)), ); const resolvedPath = await getRipgrepPath(); expect(resolvedPath).not.toBeNull(); expect(resolvedPath).toContain(path.normalize('core/vendor/ripgrep')); - expect(resolvedPath).not.toContain('tools'); + expect(resolvedPath).not.toContain( + path.join(path.sep, 'tools', path.sep), + ); }); it('should return null if binary is missing from both paths', async () => { diff --git a/packages/core/src/utils/shell-utils.ts b/packages/core/src/utils/shell-utils.ts index 6f02579df9..a14b28227f 100644 --- a/packages/core/src/utils/shell-utils.ts +++ b/packages/core/src/utils/shell-utils.ts @@ -240,11 +240,15 @@ foreach ($commandAst in $commandAsts) { 'utf16le', ).toString('base64'); -const REDIRECTION_NAMES = new Set([ +export const REDIRECTION_NAMES = new Set([ 'redirection (<)', 'redirection (>)', 'heredoc (<<)', 'herestring (<<<)', + 'command substitution', + 'backtick substitution', + 'process substitution', + 'subshell', ]); function createParser(): Parser | null { @@ -360,6 +364,14 @@ function extractNameFromNode(node: Node): string | null { return 'heredoc (<<)'; case 'herestring_redirect': return 'herestring (<<<)'; + case 'command_substitution': + return 'command substitution'; + case 'backtick_substitution': + return 'backtick substitution'; + case 'process_substitution': + return 'process substitution'; + case 'subshell': + return 'subshell'; default: return null; } diff --git a/schemas/settings.schema.json b/schemas/settings.schema.json index e7b362fc4e..0e4d005037 100644 --- a/schemas/settings.schema.json +++ b/schemas/settings.schema.json @@ -2531,6 +2531,15 @@ "type": "string" } }, + "confirmationRequired": { + "title": "Confirmation Required", + "description": "Tool names that always require user confirmation. Takes precedence over allowed tools and core tool allowlists.", + "markdownDescription": "Tool names that always require user confirmation. Takes precedence over allowed tools and core tool allowlists.\n\n- Category: `Advanced`\n- Requires restart: `yes`", + "type": "array", + "items": { + "type": "string" + } + }, "exclude": { "title": "Exclude Tools", "description": "Tool names to exclude from discovery.",