feat(plan): enforce strict read-only policy and halt execution on violation (#16849)

2026-04-25 12:34:38 -07:00 · 2026-01-16 12:56:48 -05:00
parent 013a4e02ff
commit 5241174827
4 changed files with 179 additions and 8 deletions
@@ -287,6 +287,43 @@ describe('Policy Engine Integration Tests', () => {
      ).toBe(PolicyDecision.ASK_USER);
    });

+    it('should handle Plan mode correctly', async () => {
+      const settings: Settings = {};
+
+      const config = await createPolicyEngineConfig(
+        settings,
+        ApprovalMode.PLAN,
+      );
+      const engine = new PolicyEngine(config);
+
+      // Read and search tools should be allowed
+      expect(
+        (await engine.check({ name: 'read_file' }, undefined)).decision,
+      ).toBe(PolicyDecision.ALLOW);
+      expect(
+        (await engine.check({ name: 'google_web_search' }, undefined)).decision,
+      ).toBe(PolicyDecision.ALLOW);
+      expect(
+        (await engine.check({ name: 'list_directory' }, undefined)).decision,
+      ).toBe(PolicyDecision.ALLOW);
+
+      // Other tools should be denied via catch all
+      expect(
+        (await engine.check({ name: 'replace' }, undefined)).decision,
+      ).toBe(PolicyDecision.DENY);
+      expect(
+        (await engine.check({ name: 'write_file' }, undefined)).decision,
+      ).toBe(PolicyDecision.DENY);
+      expect(
+        (await engine.check({ name: 'run_shell_command' }, undefined)).decision,
+      ).toBe(PolicyDecision.DENY);
+
+      // Unknown tools should be denied via catch-all
+      expect(
+        (await engine.check({ name: 'unknown_tool' }, undefined)).decision,
+      ).toBe(PolicyDecision.DENY);
+    });
+
    it('should verify priority ordering works correctly in practice', async () => {
      const settings: Settings = {
        tools: {
@@ -7,7 +7,10 @@
 import { describe, it, expect, vi } from 'vitest';
 import type { Mock } from 'vitest';
 import type { CallableTool } from '@google/genai';
-import { CoreToolScheduler } from './coreToolScheduler.js';
+import {
+  CoreToolScheduler,
+  PLAN_MODE_DENIAL_MESSAGE,
+} from './coreToolScheduler.js';
 import type {
  ToolCall,
  WaitingToolCall,
@@ -32,6 +35,7 @@ import {
  ApprovalMode,
  HookSystem,
  PolicyDecision,
+  ToolErrorType,
 } from '../index.js';
 import { createMockMessageBus } from '../test-utils/mock-message-bus.js';
 import {
@@ -2078,4 +2082,53 @@ describe('CoreToolScheduler Sequential Execution', () => {

    expect(onAllToolCallsComplete).toHaveBeenCalledTimes(1);
  });
+
+  describe('Policy Decisions in Plan Mode', () => {
+    it('should return STOP_EXECUTION error type and informative message when denied in Plan Mode', async () => {
+      const mockTool = new MockTool({
+        name: 'dangerous_tool',
+        displayName: 'Dangerous Tool',
+        description: 'Does risky stuff',
+      });
+      const mockToolRegistry = {
+        getTool: () => mockTool,
+        getAllToolNames: () => ['dangerous_tool'],
+      } as unknown as ToolRegistry;
+
+      const onAllToolCallsComplete = vi.fn();
+
+      const mockConfig = createMockConfig({
+        getToolRegistry: () => mockToolRegistry,
+        getApprovalMode: () => ApprovalMode.PLAN,
+        getPolicyEngine: () =>
+          ({
+            check: async () => ({ decision: PolicyDecision.DENY }),
+          }) as unknown as PolicyEngine,
+      });
+
+      const scheduler = new CoreToolScheduler({
+        config: mockConfig,
+        onAllToolCallsComplete,
+        getPreferredEditor: () => 'vscode',
+      });
+
+      const request = {
+        callId: 'call-1',
+        name: 'dangerous_tool',
+        args: {},
+        isClientInitiated: false,
+        prompt_id: 'prompt-1',
+      };
+
+      await scheduler.schedule(request, new AbortController().signal);
+
+      expect(onAllToolCallsComplete).toHaveBeenCalledTimes(1);
+      const reportedTools = onAllToolCallsComplete.mock.calls[0][0];
+      const result = reportedTools[0];
+
+      expect(result.status).toBe('error');
+      expect(result.response.errorType).toBe(ToolErrorType.STOP_EXECUTION);
+      expect(result.response.error.message).toBe(PLAN_MODE_DENIAL_MESSAGE);
+    });
+  });
 });
@@ -14,7 +14,7 @@ import {
 } from '../tools/tools.js';
 import type { EditorType } from '../utils/editor.js';
 import type { Config } from '../config/config.js';
-import { PolicyDecision } from '../policy/types.js';
+import { PolicyDecision, ApprovalMode } from '../policy/types.js';
 import { logToolCall } from '../telemetry/loggers.js';
 import { ToolErrorType } from '../tools/tool-error.js';
 import { ToolCallEvent } from '../telemetry/types.js';
@@ -65,6 +65,9 @@ export type {
  ToolCallResponseInfo,
 };

+export const PLAN_MODE_DENIAL_MESSAGE =
+  'You are in Plan Mode - adjust your prompt to only use read and search tools.';
+
 const createErrorResponse = (
  request: ToolCallRequestInfo,
  error: Error,
@@ -603,16 +606,18 @@ export class CoreToolScheduler {
          .check(toolCallForPolicy, serverName);

        if (decision === PolicyDecision.DENY) {
-          const errorMessage = `Tool execution denied by policy.`;
+          let errorMessage = `Tool execution denied by policy.`;
+          let errorType = ToolErrorType.POLICY_VIOLATION;
+
+          if (this.config.getApprovalMode() === ApprovalMode.PLAN) {
+            errorMessage = PLAN_MODE_DENIAL_MESSAGE;
+            errorType = ToolErrorType.STOP_EXECUTION;
+          }
          this.setStatusInternal(
            reqInfo.callId,
            'error',
            signal,
-            createErrorResponse(
-              reqInfo,
-              new Error(errorMessage),
-              ToolErrorType.POLICY_VIOLATION,
-            ),
+            createErrorResponse(reqInfo, new Error(errorMessage), errorType),
          );
          await this.checkAndNotifyCompletion(signal);
          return;
@@ -0,0 +1,76 @@
+# Priority system for policy rules:
+# - Higher priority numbers win over lower priority numbers
+# - When multiple rules match, the highest priority rule is applied
+# - Rules are evaluated in order of priority (highest first)
+#
+# Priority bands (tiers):
+# - Default policies (TOML): 1 + priority/1000 (e.g., priority 100 → 1.100)
+# - User policies (TOML): 2 + priority/1000 (e.g., priority 100 → 2.100)
+# - Admin policies (TOML): 3 + priority/1000 (e.g., priority 100 → 3.100)
+#
+# This ensures Admin > User > Default hierarchy is always preserved,
+# while allowing user-specified priorities to work within each tier.
+#
+# Settings-based and dynamic rules (all in user tier 2.x):
+#   2.95: Tools that the user has selected as "Always Allow" in the interactive UI
+#   2.9:  MCP servers excluded list (security: persistent server blocks)
+#   2.4:  Command line flag --exclude-tools (explicit temporary blocks)
+#   2.3:  Command line flag --allowed-tools (explicit temporary allows)
+#   2.2:  MCP servers with trust=true (persistent trusted servers)
+#   2.1:  MCP servers allowed list (persistent general server allows)
+#
+# TOML policy priorities (before transformation):
+#   10: Write tools default to ASK_USER (becomes 1.010 in default tier)
+#   20: Plan mode catch-all DENY override (becomes 1.020 in default tier)
+#   50: Read-only tools (becomes 1.050 in default tier)
+#   999: YOLO mode allow-all (becomes 1.999 in default tier)
+
+# Catch-All: Deny everything by default in Plan mode.
+
+[[rule]]
+decision = "deny"
+priority = 20
+modes = ["plan"]
+
+# Explicitly Allow Read-Only Tools in Plan mode.
+
+[[rule]]
+toolName = "glob"
+decision = "allow"
+priority = 50
+modes = ["plan"]
+
+[[rule]]
+toolName = "search_file_content"
+decision = "allow"
+priority = 50
+modes = ["plan"]
+
+[[rule]]
+toolName = "list_directory"
+decision = "allow"
+priority = 50
+modes = ["plan"]
+
+[[rule]]
+toolName = "read_file"
+decision = "allow"
+priority = 50
+modes = ["plan"]
+
+[[rule]]
+toolName = "read_many_files"
+decision = "allow"
+priority = 50
+modes = ["plan"]
+
+[[rule]]
+toolName = "google_web_search"
+decision = "allow"
+priority = 50
+modes = ["plan"]
+
+[[rule]]
+toolName = "SubagentInvocation"
+decision = "allow"
+priority = 50