feat(core): enhance shell command validation and add core tools allowlist (#25720)

Co-authored-by: David Pierce <davidapierce@google.com> Co-authored-by: Keith Schaab <keithsc@google.com> Co-authored-by: Keith Schaab <keith.schaab@gmail.com> Co-authored-by: Emily Hedlund <ehedlund@google.com>
2026-05-13 05:12:55 -07:00 · 2026-04-23 13:26:01 -07:00
parent c024064f47
commit 27927c55e5
16 changed files with 632 additions and 88 deletions
@@ -1467,6 +1467,12 @@ their corresponding top-level category object in your `settings.json` file.
  - **Default:** `undefined`
  - **Requires restart:** Yes
 - **`tools.confirmationRequired`** (array):
  - **Description:** Tool names that always require user confirmation. Takes
    precedence over allowed tools and core tool allowlists.
  - **Default:** `undefined`
  - **Requires restart:** Yes
 - **`tools.exclude`** (array):
  - **Description:** Tool names to exclude from discovery.
  - **Default:** `undefined`
@@ -18,6 +18,11 @@ describe('save_memory', () => {
    suiteName: 'default',
    suiteType: 'behavioral',
    name: rememberingFavoriteColor,
    params: {
      settings: {
        experimental: { memoryV2: false },
      },
    },
    prompt: `remember that my favorite color is  blue.
@@ -40,6 +45,11 @@ describe('save_memory', () => {
    suiteName: 'default',
    suiteType: 'behavioral',
    name: rememberingCommandRestrictions,
    params: {
      settings: {
        experimental: { memoryV2: false },
      },
    },
    prompt: `I don't want you to ever run npm commands.`,
    assert: async (rig, result) => {
@@ -61,6 +71,11 @@ describe('save_memory', () => {
    suiteName: 'default',
    suiteType: 'behavioral',
    name: rememberingWorkflow,
    params: {
      settings: {
        experimental: { memoryV2: false },
      },
    },
    prompt: `I want you to always lint after building.`,
    assert: async (rig, result) => {
@@ -83,6 +98,11 @@ describe('save_memory', () => {
    suiteName: 'default',
    suiteType: 'behavioral',
    name: ignoringTemporaryInformation,
    params: {
      settings: {
        experimental: { memoryV2: false },
      },
    },
    prompt: `I'm going to get a coffee.`,
    assert: async (rig, result) => {
@@ -108,6 +128,11 @@ describe('save_memory', () => {
    suiteName: 'default',
    suiteType: 'behavioral',
    name: rememberingPetName,
    params: {
      settings: {
        experimental: { memoryV2: false },
      },
    },
    prompt: `Please remember that my dog's name is Buddy.`,
    assert: async (rig, result) => {
@@ -129,6 +154,11 @@ describe('save_memory', () => {
    suiteName: 'default',
    suiteType: 'behavioral',
    name: rememberingCommandAlias,
    params: {
      settings: {
        experimental: { memoryV2: false },
      },
    },
    prompt: `When I say 'start server', you should run 'npm run dev'.`,
    assert: async (rig, result) => {
@@ -151,6 +181,11 @@ describe('save_memory', () => {
    suiteName: 'default',
    suiteType: 'behavioral',
    name: savingDbSchemaLocationAsProjectMemory,
    params: {
      settings: {
        experimental: { memoryV2: false },
      },
    },
    prompt: `The database schema for this workspace is located in \`db/schema.sql\`.`,
    assert: async (rig, result) => {
      const wasToolCalled = await rig.waitForToolCall(
@@ -180,6 +215,11 @@ describe('save_memory', () => {
    suiteName: 'default',
    suiteType: 'behavioral',
    name: rememberingCodingStyle,
    params: {
      settings: {
        experimental: { memoryV2: false },
      },
    },
    prompt: `I prefer to use tabs instead of spaces for indentation.`,
    assert: async (rig, result) => {
@@ -202,6 +242,11 @@ describe('save_memory', () => {
    suiteName: 'default',
    suiteType: 'behavioral',
    name: savingBuildArtifactLocationAsProjectMemory,
    params: {
      settings: {
        experimental: { memoryV2: false },
      },
    },
    prompt: `In this workspace, build artifacts are stored in the \`dist/artifacts\` directory.`,
    assert: async (rig, result) => {
      const wasToolCalled = await rig.waitForToolCall(
@@ -231,6 +276,11 @@ describe('save_memory', () => {
    suiteName: 'default',
    suiteType: 'behavioral',
    name: savingMainEntryPointAsProjectMemory,
    params: {
      settings: {
        experimental: { memoryV2: false },
      },
    },
    prompt: `The main entry point for this workspace is \`src/index.js\`.`,
    assert: async (rig, result) => {
      const wasToolCalled = await rig.waitForToolCall(
@@ -259,6 +309,11 @@ describe('save_memory', () => {
    suiteName: 'default',
    suiteType: 'behavioral',
    name: rememberingBirthday,
    params: {
      settings: {
        experimental: { memoryV2: false },
      },
    },
    prompt: `My birthday is on June 15th.`,
    assert: async (rig, result) => {
@@ -172,6 +172,7 @@ export async function internalEvalTest(evalCase: EvalCase) {
        timeout: evalCase.timeout,
        env: {
          GEMINI_CLI_ACTIVITY_LOG_TARGET: activityLogFile,
          GEMINI_CLI_TRUST_WORKSPACE: 'true',
        },
      });
@@ -1667,6 +1667,19 @@ const SETTINGS_SCHEMA = {
        showInDialog: false,
        items: { type: 'string' },
      },
      confirmationRequired: {
        type: 'array',
        label: 'Confirmation Required',
        category: 'Advanced',
        requiresRestart: true,
        default: undefined as string[] | undefined,
        description: oneLine`
          Tool names that always require user confirmation.
          Takes precedence over allowed tools and core tool allowlists.
        `,
        showInDialog: false,
        items: { type: 'string' },
      },
      exclude: {
        type: 'array',
        label: 'Exclude Tools',
@@ -168,6 +168,13 @@ exports[`InputPrompt > mouse interaction > should toggle paste expansion on doub
 "
 `;
 exports[`InputPrompt > mouse interaction > should toggle paste expansion on double-click 4`] = `
 "▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀
 > [Pasted Text: 10 lines]                                                                          
 ▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄
 "
 `;
 exports[`InputPrompt > multiline rendering > should correctly render multiline input including blank lines 1`] = `
 "────────────────────────────────────────────────────────────────────────────────────────────────────
 > hello
@@ -74,7 +74,9 @@ export const ADMIN_POLICY_TIER = 5;
 export const MCP_EXCLUDED_PRIORITY = USER_POLICY_TIER + 0.9;
 export const EXCLUDE_TOOLS_FLAG_PRIORITY = USER_POLICY_TIER + 0.4;
 export const CONFIRMATION_REQUIRED_PRIORITY = USER_POLICY_TIER + 0.35;
 export const ALLOWED_TOOLS_FLAG_PRIORITY = USER_POLICY_TIER + 0.3;
 export const CORE_TOOLS_FLAG_PRIORITY = USER_POLICY_TIER + 0.25;
 export const TRUSTED_MCP_SERVER_PRIORITY = USER_POLICY_TIER + 0.2;
 export const ALLOWED_MCP_SERVER_PRIORITY = USER_POLICY_TIER + 0.1;
@@ -434,10 +436,21 @@ export async function createPolicyEngineConfig(
    }
  }
-  // Tools that are explicitly allowed in the settings.
+  const nonPlanModes = [
-  // Priority: ALLOWED_TOOLS_FLAG_PRIORITY (user tier - explicit temporary allows)
+    ApprovalMode.DEFAULT,
-  if (settings.tools?.allowed) {
+    ApprovalMode.AUTO_EDIT,
-    for (const tool of settings.tools.allowed) {
+    ApprovalMode.YOLO,
  ];
  const mapToolsToRules = (
    tools: string[],
    priority: number,
    source: string,
    modes?: ApprovalMode[],
    addDefaultDenyForTools = false,
  ) => {
    const toolsWithNarrowing = new Set<string>();
    for (const tool of tools) {
      // Check for legacy format: toolName(args)
      const match = tool.match(/^([a-zA-Z0-9_-]+)\((.*)\)$/);
      if (match) {
@@ -449,15 +462,17 @@ export async function createPolicyEngineConfig(
        // Treat args as a command prefix for shell tool
        if (toolName === SHELL_TOOL_NAME) {
          toolsWithNarrowing.add(toolName);
          const patterns = buildArgsPatterns(undefined, args);
          for (const pattern of patterns) {
            if (pattern) {
              rules.push({
                toolName,
                decision: PolicyDecision.ALLOW,
-                priority: ALLOWED_TOOLS_FLAG_PRIORITY,
+                priority,
                argsPattern: new RegExp(pattern),
-                source: 'Settings (Tools Allowed)',
+                source,
                modes,
              });
            }
          }
@@ -467,8 +482,9 @@ export async function createPolicyEngineConfig(
          rules.push({
            toolName,
            decision: PolicyDecision.ALLOW,
-            priority: ALLOWED_TOOLS_FLAG_PRIORITY,
+            priority,
-            source: 'Settings (Tools Allowed)',
+            source,
            modes,
          });
        }
      } else {
@@ -479,11 +495,70 @@ export async function createPolicyEngineConfig(
        rules.push({
          toolName,
          decision: PolicyDecision.ALLOW,
-          priority: ALLOWED_TOOLS_FLAG_PRIORITY,
+          priority,
-          source: 'Settings (Tools Allowed)',
+          source,
          modes,
        });
      }
    }
    if (addDefaultDenyForTools) {
      for (const toolName of toolsWithNarrowing) {
        rules.push({
          toolName,
          decision: PolicyDecision.DENY,
          priority: priority - 0.01,
          source: `${source} (Narrowing Enforcement)`,
          modes,
        });
      }
    }
  };
  // Tools that are explicitly allowed in the settings.
  // Priority: ALLOWED_TOOLS_FLAG_PRIORITY (user tier - explicit temporary allows)
  if (settings.tools?.allowed) {
    mapToolsToRules(
      settings.tools.allowed,
      ALLOWED_TOOLS_FLAG_PRIORITY,
      'Settings (Tools Allowed)',
      undefined,
      true,
    );
  }
  // Tools that explicitly require confirmation in the settings.
  // Priority: CONFIRMATION_REQUIRED_PRIORITY (overrides allowed and core)
  if (settings.tools?.confirmationRequired) {
    for (const tool of settings.tools.confirmationRequired) {
      rules.push({
        toolName: SHELL_TOOL_NAMES.includes(tool) ? SHELL_TOOL_NAME : tool,
        decision: PolicyDecision.ASK_USER,
        priority: CONFIRMATION_REQUIRED_PRIORITY,
        source: 'Settings (Confirmation Required)',
      });
    }
  }
  // Core tools that are restricted in the settings.
  // Priority: CORE_TOOLS_FLAG_PRIORITY (user tier - core tool allowlist)
  if (settings.tools?.core) {
    mapToolsToRules(
      settings.tools.core,
      CORE_TOOLS_FLAG_PRIORITY,
      'Settings (Core Tools)',
      nonPlanModes,
    );
    // If core tools are restricted, we should add a default DENY rule for everything else
    // at a slightly lower priority than the explicit allows.
    rules.push({
      toolName: '*',
      decision: PolicyDecision.DENY,
      priority: CORE_TOOLS_FLAG_PRIORITY - 0.01,
      source: 'Settings (Core Tools Allowlist Enforcement)',
      modes: nonPlanModes,
    });
  }
  // MCP servers that are trusted in the settings.
@@ -501,6 +576,7 @@ export async function createPolicyEngineConfig(
          decision: PolicyDecision.ALLOW,
          priority: TRUSTED_MCP_SERVER_PRIORITY,
          source: 'Settings (MCP Trusted)',
          modes: nonPlanModes,
        });
      }
    }
@@ -519,6 +595,7 @@ export async function createPolicyEngineConfig(
        decision: PolicyDecision.ALLOW,
        priority: ALLOWED_MCP_SERVER_PRIORITY,
        source: 'Settings (MCP Allowed)',
        modes: nonPlanModes,
      });
    }
  }
@@ -0,0 +1,76 @@
 /**
 * @license
 * Copyright 2026 Google LLC
 * SPDX-License-Identifier: Apache-2.0
 */
 import { describe, it, expect } from 'vitest';
 import { createPolicyEngineConfig } from './config.js';
 import { PolicyEngine } from './policy-engine.js';
 import { PolicyDecision, ApprovalMode } from './types.js';
 describe('PolicyEngine - Core Tools Mapping', () => {
  it('should allow tools explicitly listed in settings.tools.core', async () => {
    const settings = {
      tools: {
        core: ['run_shell_command(ls)', 'run_shell_command(git status)'],
      },
    };
    const config = await createPolicyEngineConfig(
      settings,
      ApprovalMode.DEFAULT,
      undefined,
      true, // interactive
    );
    const engine = new PolicyEngine(config);
    // Test simple tool name
    const result1 = await engine.check(
      { name: 'run_shell_command', args: { command: 'ls' } },
      undefined,
    );
    expect(result1.decision).toBe(PolicyDecision.ALLOW);
    // Test tool name with args
    const result2 = await engine.check(
      { name: 'run_shell_command', args: { command: 'git status' } },
      undefined,
    );
    expect(result2.decision).toBe(PolicyDecision.ALLOW);
    // Test tool not in core list
    const result3 = await engine.check(
      { name: 'run_shell_command', args: { command: 'npm test' } },
      undefined,
    );
    // Should be DENIED because of strict allowlist
    expect(result3.decision).toBe(PolicyDecision.DENY);
  });
  it('should allow tools in tools.core even if they are restricted by default policies', async () => {
    // By default run_shell_command is ASK_USER.
    // Putting it in tools.core should make it ALLOW.
    const settings = {
      tools: {
        core: ['run_shell_command'],
      },
    };
    const config = await createPolicyEngineConfig(
      settings,
      ApprovalMode.DEFAULT,
      undefined,
      true,
    );
    const engine = new PolicyEngine(config);
    const result = await engine.check(
      { name: 'run_shell_command', args: { command: 'any command' } },
      undefined,
    );
    expect(result.decision).toBe(PolicyDecision.ALLOW);
  });
 });
@@ -43,6 +43,35 @@ vi.mock('../utils/shell-utils.js', async (importOriginal) => {
      }
      return [command];
    }),
    parseCommandDetails: vi.fn().mockImplementation((command: string) => {
      // Basic mock implementation for PolicyEngine test needs
      const commands = command.includes('&&')
        ? command.split('&&').map((c) => c.trim())
        : [command.trim()];
      // Detect $(...) or `...` and add as sub-commands for recursion tests
      const subCommands = [...commands];
      for (const cmd of commands) {
        const subMatch = cmd.match(/\$\((.*)\)/) || cmd.match(/`(.*)`/);
        if (subMatch?.[1]) {
          subCommands.push(subMatch[1].trim());
        }
      }
      return {
        details: subCommands.map((c, i) => ({
          name: c.split(' ')[0],
          text: c,
          startIndex: i === 0 ? 0 : -1, // Simple root indication
        })),
        hasError: false,
      };
    }),
    stripShellWrapper: vi.fn().mockImplementation((command: string) => {
      // Simple mock for stripping wrappers
      const match = command.match(/^(?:bash|sh|zsh)\s+-c\s+["'](.*)["']$/i);
      return match ? match[1] : command;
    }),
    hasRedirection: vi.fn().mockImplementation(
      (command: string) =>
        // Simple mock: true if '>' is present, unless it looks like "-> arrow"
@@ -1862,7 +1891,6 @@ describe('PolicyEngine', () => {
    });
    it('should return ASK_USER in non-YOLO mode if shell command parsing fails', async () => {
      const { splitCommands } = await import('../utils/shell-utils.js');
      const rules: PolicyRule[] = [
        {
          toolName: 'run_shell_command',
@@ -1877,7 +1905,11 @@ describe('PolicyEngine', () => {
      });
      // Simulate parsing failure
-      vi.mocked(splitCommands).mockReturnValueOnce([]);
+      const { parseCommandDetails } = await import('../utils/shell-utils.js');
      vi.mocked(parseCommandDetails).mockReturnValueOnce({
        details: [],
        hasError: true,
      });
      const result = await engine.check(
        { name: 'run_shell_command', args: { command: 'complex command' } },
@@ -7,8 +7,10 @@
 import { type FunctionCall } from '@google/genai';
 import {
  SHELL_TOOL_NAMES,
  REDIRECTION_NAMES,
  initializeShellParsers,
-  splitCommands,
+  parseCommandDetails,
  stripShellWrapper,
  hasRedirection,
  extractStringFromParseEntry,
 } from '../utils/shell-utils.js';
@@ -359,7 +361,8 @@ export class PolicyEngine {
    }
    await initializeShellParsers();
-    const subCommands = splitCommands(command);
+    const parsed = parseCommandDetails(command);
    const subCommands = parsed?.details ?? [];
    if (subCommands.length === 0) {
      // If the matched rule says DENY, we should respect it immediately even if parsing fails.
@@ -380,115 +383,109 @@ export class PolicyEngine {
      );
      // Parsing logic failed, we can't trust it. Use default decision ASK_USER (or DENY in non-interactive).
      // We return the rule that matched so the evaluation loop terminates.
      return {
        decision: this.defaultDecision,
        rule,
      };
    }
-    // If there are multiple parts, or if we just want to validate the single part against DENY rules
+    debugLogger.debug(
-    if (subCommands.length > 0) {
+      `[PolicyEngine.check] Validating shell command: ${subCommands.length} parts`,
-      debugLogger.debug(
+    );
        `[PolicyEngine.check] Validating shell command: ${subCommands.length} parts`,
      );
-      if (ruleDecision === PolicyDecision.DENY) {
+    if (ruleDecision === PolicyDecision.DENY) {
-        return { decision: PolicyDecision.DENY, rule };
+      return { decision: PolicyDecision.DENY, rule };
-      }
+    }
-      // Start optimistically. If all parts are ALLOW, the whole is ALLOW.
+    // Start with the decision from the rule or heuristics.
-      // We will downgrade if any part is ASK_USER or DENY.
+    // If the tool call was already downgraded (e.g. by heuristics), we start there.
-      let aggregateDecision = PolicyDecision.ALLOW;
+    let aggregateDecision = ruleDecision;
      let responsibleRule: PolicyRule | undefined;
-      // Check for redirection on the full command string
+    // If heuristics downgraded the decision, we don't blame the rule.
-      if (this.shouldDowngradeForRedirection(command, allowRedirection)) {
+    let responsibleRule: PolicyRule | undefined =
      rule && ruleDecision === rule.decision ? rule : undefined;
    // Check for redirection on the full command string.
    // Redirection always downgrades ALLOW to ASK_USER (it never upgrades).
    if (this.shouldDowngradeForRedirection(command, allowRedirection)) {
      if (aggregateDecision === PolicyDecision.ALLOW) {
        debugLogger.debug(
          `[PolicyEngine.check] Downgrading ALLOW to ASK_USER for redirected command: ${command}`,
        );
        aggregateDecision = PolicyDecision.ASK_USER;
        responsibleRule = undefined; // Inherent policy
      }
    }
-      for (const rawSubCmd of subCommands) {
+    for (const detail of subCommands) {
-        const subCmd = rawSubCmd.trim();
+      if (REDIRECTION_NAMES.has(detail.name)) {
-        // Prevent infinite recursion for the root command
+        continue;
-        if (subCmd === command) {
+      }
-          if (this.shouldDowngradeForRedirection(subCmd, allowRedirection)) {
+
-            debugLogger.debug(
+      const subCmd = detail.text.trim();
-              `[PolicyEngine.check] Downgrading ALLOW to ASK_USER for redirected command: ${subCmd}`,
+      const isAtomic =
-            );
+        subCmd === command ||
-            // Redirection always downgrades ALLOW to ASK_USER
+        (detail.startIndex === 0 && detail.text.length === command.length);
-            if (aggregateDecision === PolicyDecision.ALLOW) {
+
-              aggregateDecision = PolicyDecision.ASK_USER;
+      // Recursive check for shell wrappers (bash -c, etc.)
-              responsibleRule = undefined; // Inherent policy
+      const stripped = stripShellWrapper(subCmd);
-            }
+      if (stripped !== subCmd) {
        const wrapperResult = await this.check(
          { name: toolName, args: { command: stripped, dir_path } },
          serverName,
          toolAnnotations,
          subagent,
          true,
        );
        if (wrapperResult.decision === PolicyDecision.DENY)
          return wrapperResult;
        if (wrapperResult.decision === PolicyDecision.ASK_USER) {
          if (aggregateDecision === PolicyDecision.ALLOW) {
            responsibleRule = wrapperResult.rule;
          } else {
-            // Atomic command matching the rule.
+            responsibleRule ??= wrapperResult.rule;
            if (
              ruleDecision === PolicyDecision.ASK_USER &&
              aggregateDecision === PolicyDecision.ALLOW
            ) {
              aggregateDecision = PolicyDecision.ASK_USER;
              responsibleRule = rule;
            }
          }
-          continue;
+          aggregateDecision = PolicyDecision.ASK_USER;
        }
      }
      if (!isAtomic) {
        const subResult = await this.check(
          { name: toolName, args: { command: subCmd, dir_path } },
          serverName,
          toolAnnotations,
          subagent,
          true,
        );
-        // subResult.decision is already filtered through applyNonInteractiveMode by this.check()
+        if (subResult.decision === PolicyDecision.DENY) return subResult;
        const subDecision = subResult.decision;
-        // If any part is DENIED, the whole command is DENY
+        if (subResult.decision === PolicyDecision.ASK_USER) {
-        if (subDecision === PolicyDecision.DENY) {
+          if (aggregateDecision === PolicyDecision.ALLOW) {
          return {
            decision: PolicyDecision.DENY,
            rule: subResult.rule,
          };
        }
        // If any part requires ASK_USER, the whole command requires ASK_USER
        if (subDecision === PolicyDecision.ASK_USER) {
          aggregateDecision = PolicyDecision.ASK_USER;
          if (!responsibleRule) {
            responsibleRule = subResult.rule;
          } else {
            responsibleRule ??= subResult.rule;
          }
          aggregateDecision = PolicyDecision.ASK_USER;
        }
-        // Check for redirection in allowed sub-commands
+        // Downgrade if sub-command has redirection
        if (
-          subDecision === PolicyDecision.ALLOW &&
+          subResult.decision === PolicyDecision.ALLOW &&
          this.shouldDowngradeForRedirection(subCmd, allowRedirection)
        ) {
          debugLogger.debug(
            `[PolicyEngine.check] Downgrading ALLOW to ASK_USER for redirected command: ${subCmd}`,
          );
          if (aggregateDecision === PolicyDecision.ALLOW) {
            aggregateDecision = PolicyDecision.ASK_USER;
            responsibleRule = undefined;
          }
        }
      }
      return {
        decision: aggregateDecision,
        // If we stayed at ALLOW, we return the original rule (if any).
        // If we downgraded, we return the responsible rule (or undefined if implicit).
        rule: aggregateDecision === ruleDecision ? rule : responsibleRule,
      };
    }
    return {
-      decision: ruleDecision,
+      decision: aggregateDecision,
-      rule,
+      rule: aggregateDecision === ruleDecision ? rule : responsibleRule,
    };
  }
@@ -501,6 +498,7 @@ export class PolicyEngine {
    serverName: string | undefined,
    toolAnnotations?: Record<string, unknown>,
    subagent?: string,
    skipHeuristics = false,
  ): Promise<CheckResult> {
    // Case 1: Metadata injection is the primary and safest way to identify an MCP server.
    // If we have explicit `_serverName` metadata (usually injected by tool-registry for active tools), use it.
@@ -594,6 +592,7 @@ export class PolicyEngine {
        let ruleDecision = rule.decision;
        if (
          !skipHeuristics &&
          isShellCommand &&
          command &&
          !('commandPrefix' in rule) &&
@@ -615,12 +614,10 @@ export class PolicyEngine {
            subagent,
          );
          decision = shellResult.decision;
-          if (shellResult.rule) {
+          matchedRule = shellResult.rule;
-            matchedRule = shellResult.rule;
+          break;
            break;
          }
        } else {
-          decision = rule.decision;
+          decision = ruleDecision;
          matchedRule = rule;
          break;
        }
@@ -643,7 +640,7 @@ export class PolicyEngine {
      );
      if (toolName && SHELL_TOOL_NAMES.includes(toolName)) {
        let heuristicDecision = this.defaultDecision;
-        if (command) {
+        if (!skipHeuristics && command) {
          heuristicDecision = await this.applyShellHeuristics(
            command,
            heuristicDecision,
@@ -0,0 +1,134 @@
 /**
 * @license
 * Copyright 2026 Google LLC
 * SPDX-License-Identifier: Apache-2.0
 */
 import { describe, it, expect, beforeAll } from 'vitest';
 import { PolicyEngine } from './policy-engine.js';
 import { PolicyDecision, ApprovalMode } from './types.js';
 import { initializeShellParsers } from '../utils/shell-utils.js';
 import { buildArgsPatterns } from './utils.js';
 describe('PolicyEngine - Shell Safety Regression Suite', () => {
  let engine: PolicyEngine;
  beforeAll(async () => {
    await initializeShellParsers();
  });
  const setupEngine = (allowedCommands: string[]) => {
    const rules = allowedCommands.map((cmd) => ({
      toolName: 'run_shell_command',
      decision: PolicyDecision.ALLOW,
      argsPattern: new RegExp(buildArgsPatterns(undefined, cmd)[0]!),
      priority: 10,
    }));
    return new PolicyEngine({
      rules,
      approvalMode: ApprovalMode.DEFAULT,
      defaultDecision: PolicyDecision.ASK_USER,
    });
  };
  it('should block unauthorized chained command with &&', async () => {
    engine = setupEngine(['echo']);
    const result = await engine.check(
      { name: 'run_shell_command', args: { command: 'echo hi && ls' } },
      undefined,
    );
    expect(result.decision).toBe(PolicyDecision.ASK_USER);
  });
  it('should allow authorized chained command with &&', async () => {
    engine = setupEngine(['echo', 'ls']);
    const result = await engine.check(
      { name: 'run_shell_command', args: { command: 'echo hi && ls' } },
      undefined,
    );
    expect(result.decision).toBe(PolicyDecision.ALLOW);
  });
  it('should block unauthorized chained command with ||', async () => {
    engine = setupEngine(['false']);
    const result = await engine.check(
      { name: 'run_shell_command', args: { command: 'false || ls' } },
      undefined,
    );
    expect(result.decision).toBe(PolicyDecision.ASK_USER);
  });
  it('should block unauthorized chained command with ;', async () => {
    engine = setupEngine(['echo']);
    const result = await engine.check(
      { name: 'run_shell_command', args: { command: 'echo hi; ls' } },
      undefined,
    );
    expect(result.decision).toBe(PolicyDecision.ASK_USER);
  });
  it('should block unauthorized command in pipe |', async () => {
    engine = setupEngine(['echo']);
    const result = await engine.check(
      { name: 'run_shell_command', args: { command: 'echo hi | grep "hi"' } },
      undefined,
    );
    expect(result.decision).toBe(PolicyDecision.ASK_USER);
  });
  it('should allow authorized command in pipe |', async () => {
    engine = setupEngine(['echo', 'grep']);
    const result = await engine.check(
      { name: 'run_shell_command', args: { command: 'echo hi | grep "hi"' } },
      undefined,
    );
    expect(result.decision).toBe(PolicyDecision.ALLOW);
  });
  it('should block unauthorized chained command with &', async () => {
    engine = setupEngine(['echo']);
    const result = await engine.check(
      { name: 'run_shell_command', args: { command: 'echo hi & ls' } },
      undefined,
    );
    expect(result.decision).toBe(PolicyDecision.ASK_USER);
  });
  it('should allow authorized chained command with &', async () => {
    engine = setupEngine(['echo', 'ls']);
    const result = await engine.check(
      { name: 'run_shell_command', args: { command: 'echo hi & ls' } },
      undefined,
    );
    expect(result.decision).toBe(PolicyDecision.ALLOW);
  });
  it('should block unauthorized command in nested substitution', async () => {
    engine = setupEngine(['echo', 'cat']);
    const result = await engine.check(
      { name: 'run_shell_command', args: { command: 'echo $(cat $(ls))' } },
      undefined,
    );
    expect(result.decision).toBe(PolicyDecision.ASK_USER);
  });
  it('should allow authorized command in nested substitution', async () => {
    engine = setupEngine(['echo', 'cat', 'ls']);
    const result = await engine.check(
      { name: 'run_shell_command', args: { command: 'echo $(cat $(ls))' } },
      undefined,
    );
    expect(result.decision).toBe(PolicyDecision.ALLOW);
  });
  it('should block command redirection if not explicitly allowed', async () => {
    engine = setupEngine(['echo']);
    const result = await engine.check(
      { name: 'run_shell_command', args: { command: 'echo hi > /tmp/test' } },
      undefined,
    );
    // Inherent policy: redirection downgrades to ASK_USER
    expect(result.decision).toBe(PolicyDecision.ASK_USER);
  });
 });
@@ -59,6 +59,30 @@ vi.mock('../utils/shell-utils.js', async (importOriginal) => {
  return {
    ...actual,
    initializeShellParsers: vi.fn(),
    parseCommandDetails: (command: string) => {
      if (Object.prototype.hasOwnProperty.call(commandMap, command)) {
        const subcommands = commandMap[command];
        return {
          details: subcommands.map((text) => ({
            name: text.split(' ')[0],
            text,
            startIndex: command.indexOf(text),
          })),
          hasError: subcommands.length === 0 && command.includes('&&&'),
        };
      }
      return {
        details: [
          {
            name: command.split(' ')[0],
            text: command,
            startIndex: 0,
          },
        ],
        hasError: false,
      };
    },
    stripShellWrapper: (command: string) => command,
    splitCommands: (command: string) => {
      if (Object.prototype.hasOwnProperty.call(commandMap, command)) {
        return commandMap[command];
@@ -0,0 +1,97 @@
 /**
 * @license
 * Copyright 2026 Google LLC
 * SPDX-License-Identifier: Apache-2.0
 */
 import { expect, describe, it, beforeAll, vi } from 'vitest';
 import { PolicyEngine } from './policy-engine.js';
 import { PolicyDecision } from './types.js';
 import { initializeShellParsers } from '../utils/shell-utils.js';
 // Mock node:os to ensure shell-utils logic always thinks it's on a POSIX-like system.
 // This ensures that internal calls to getShellConfiguration() and isWindows()
 // within the shell-utils module return 'bash' configuration, even on Windows CI.
 vi.mock('node:os', async (importOriginal) => {
  const actual = await importOriginal<typeof import('node:os')>();
  return {
    ...actual,
    default: {
      ...actual,
      platform: () => 'linux',
    },
    platform: () => 'linux',
  };
 });
 // Mock shell-utils to ensure consistent behavior across platforms (especially Windows CI)
 // We want to test PolicyEngine logic with Bash syntax rules.
 vi.mock('../utils/shell-utils.js', async (importOriginal) => {
  const actual =
    await importOriginal<typeof import('../utils/shell-utils.js')>();
  return {
    ...actual,
    getShellConfiguration: () => ({
      executable: 'bash',
      argsPrefix: ['-c'],
      shell: 'bash',
    }),
  };
 });
 describe('PolicyEngine Command Substitution Validation', () => {
  beforeAll(async () => {
    await initializeShellParsers();
  });
  const setupEngine = (blockedCmd: string) =>
    new PolicyEngine({
      defaultDecision: PolicyDecision.ALLOW,
      rules: [
        {
          toolName: 'run_shell_command',
          argsPattern: new RegExp(`"command":"${blockedCmd}"`),
          decision: PolicyDecision.DENY,
        },
      ],
    });
  it('should block echo $(dangerous_cmd) when dangerous_cmd is explicitly blocked', async () => {
    const engine = setupEngine('dangerous_cmd');
    const result = await engine.check(
      { name: 'run_shell_command', args: { command: 'echo $(dangerous_cmd)' } },
      'test-server',
    );
    expect(result.decision).toBe(PolicyDecision.DENY);
  });
  it('should block backtick substitution `dangerous_cmd`', async () => {
    const engine = setupEngine('dangerous_cmd');
    const result = await engine.check(
      { name: 'run_shell_command', args: { command: 'echo `dangerous_cmd`' } },
      'test-server',
    );
    expect(result.decision).toBe(PolicyDecision.DENY);
  });
  it('should block commands inside subshells (dangerous_cmd)', async () => {
    const engine = setupEngine('dangerous_cmd');
    const result = await engine.check(
      { name: 'run_shell_command', args: { command: '(dangerous_cmd)' } },
      'test-server',
    );
    expect(result.decision).toBe(PolicyDecision.DENY);
  });
  it('should handle nested substitutions deeply', async () => {
    const engine = setupEngine('deep_danger');
    const result = await engine.check(
      {
        name: 'run_shell_command',
        args: { command: 'echo $(ls $(deep_danger))' },
      },
      'test-server',
    );
    expect(result.decision).toBe(PolicyDecision.DENY);
  });
 });
@@ -335,8 +335,10 @@ export interface PolicySettings {
    allowed?: string[];
  };
  tools?: {
    core?: string[];
    exclude?: string[];
    allowed?: string[];
    confirmationRequired?: string[];
  };
  mcpServers?: Record<string, { trust?: boolean }>;
  // User provided policies that will replace the USER level policies in ~/.gemini/policies
@@ -1993,13 +1993,15 @@ describe('getRipgrepPath', () => {
      vi.mocked(fileExists).mockImplementation(
        async (checkPath) =>
          checkPath.includes(path.normalize('core/vendor/ripgrep')) &&
-          !checkPath.includes('tools'),
+          !checkPath.includes(path.join(path.sep, 'tools', path.sep)),
      );
      const resolvedPath = await getRipgrepPath();
      expect(resolvedPath).not.toBeNull();
      expect(resolvedPath).toContain(path.normalize('core/vendor/ripgrep'));
-      expect(resolvedPath).not.toContain('tools');
+      expect(resolvedPath).not.toContain(
        path.join(path.sep, 'tools', path.sep),
      );
    });
    it('should return null if binary is missing from both paths', async () => {
@@ -240,11 +240,15 @@ foreach ($commandAst in $commandAsts) {
  'utf16le',
 ).toString('base64');
-const REDIRECTION_NAMES = new Set([
+export const REDIRECTION_NAMES = new Set([
  'redirection (<)',
  'redirection (>)',
  'heredoc (<<)',
  'herestring (<<<)',
  'command substitution',
  'backtick substitution',
  'process substitution',
  'subshell',
 ]);
 function createParser(): Parser | null {
@@ -360,6 +364,14 @@ function extractNameFromNode(node: Node): string | null {
      return 'heredoc (<<)';
    case 'herestring_redirect':
      return 'herestring (<<<)';
    case 'command_substitution':
      return 'command substitution';
    case 'backtick_substitution':
      return 'backtick substitution';
    case 'process_substitution':
      return 'process substitution';
    case 'subshell':
      return 'subshell';
    default:
      return null;
  }
@@ -2531,6 +2531,15 @@
            "type": "string"
          }
        },
        "confirmationRequired": {
          "title": "Confirmation Required",
          "description": "Tool names that always require user confirmation. Takes precedence over allowed tools and core tool allowlists.",
          "markdownDescription": "Tool names that always require user confirmation. Takes precedence over allowed tools and core tool allowlists.\n\n- Category: `Advanced`\n- Requires restart: `yes`",
          "type": "array",
          "items": {
            "type": "string"
          }
        },
        "exclude": {
          "title": "Exclude Tools",
          "description": "Tool names to exclude from discovery.",