feat(plan): enforce strict read-only policy and halt execution on violation (#16849)

This commit is contained in:
Jerop Kipruto
2026-01-16 12:56:48 -05:00
committed by GitHub
parent 013a4e02ff
commit 5241174827
4 changed files with 179 additions and 8 deletions

View File

@@ -287,6 +287,43 @@ describe('Policy Engine Integration Tests', () => {
).toBe(PolicyDecision.ASK_USER);
});
it('should handle Plan mode correctly', async () => {
const settings: Settings = {};
const config = await createPolicyEngineConfig(
settings,
ApprovalMode.PLAN,
);
const engine = new PolicyEngine(config);
// Read and search tools should be allowed
expect(
(await engine.check({ name: 'read_file' }, undefined)).decision,
).toBe(PolicyDecision.ALLOW);
expect(
(await engine.check({ name: 'google_web_search' }, undefined)).decision,
).toBe(PolicyDecision.ALLOW);
expect(
(await engine.check({ name: 'list_directory' }, undefined)).decision,
).toBe(PolicyDecision.ALLOW);
// Other tools should be denied via catch all
expect(
(await engine.check({ name: 'replace' }, undefined)).decision,
).toBe(PolicyDecision.DENY);
expect(
(await engine.check({ name: 'write_file' }, undefined)).decision,
).toBe(PolicyDecision.DENY);
expect(
(await engine.check({ name: 'run_shell_command' }, undefined)).decision,
).toBe(PolicyDecision.DENY);
// Unknown tools should be denied via catch-all
expect(
(await engine.check({ name: 'unknown_tool' }, undefined)).decision,
).toBe(PolicyDecision.DENY);
});
it('should verify priority ordering works correctly in practice', async () => {
const settings: Settings = {
tools: {

View File

@@ -7,7 +7,10 @@
import { describe, it, expect, vi } from 'vitest';
import type { Mock } from 'vitest';
import type { CallableTool } from '@google/genai';
import { CoreToolScheduler } from './coreToolScheduler.js';
import {
CoreToolScheduler,
PLAN_MODE_DENIAL_MESSAGE,
} from './coreToolScheduler.js';
import type {
ToolCall,
WaitingToolCall,
@@ -32,6 +35,7 @@ import {
ApprovalMode,
HookSystem,
PolicyDecision,
ToolErrorType,
} from '../index.js';
import { createMockMessageBus } from '../test-utils/mock-message-bus.js';
import {
@@ -2078,4 +2082,53 @@ describe('CoreToolScheduler Sequential Execution', () => {
expect(onAllToolCallsComplete).toHaveBeenCalledTimes(1);
});
describe('Policy Decisions in Plan Mode', () => {
it('should return STOP_EXECUTION error type and informative message when denied in Plan Mode', async () => {
const mockTool = new MockTool({
name: 'dangerous_tool',
displayName: 'Dangerous Tool',
description: 'Does risky stuff',
});
const mockToolRegistry = {
getTool: () => mockTool,
getAllToolNames: () => ['dangerous_tool'],
} as unknown as ToolRegistry;
const onAllToolCallsComplete = vi.fn();
const mockConfig = createMockConfig({
getToolRegistry: () => mockToolRegistry,
getApprovalMode: () => ApprovalMode.PLAN,
getPolicyEngine: () =>
({
check: async () => ({ decision: PolicyDecision.DENY }),
}) as unknown as PolicyEngine,
});
const scheduler = new CoreToolScheduler({
config: mockConfig,
onAllToolCallsComplete,
getPreferredEditor: () => 'vscode',
});
const request = {
callId: 'call-1',
name: 'dangerous_tool',
args: {},
isClientInitiated: false,
prompt_id: 'prompt-1',
};
await scheduler.schedule(request, new AbortController().signal);
expect(onAllToolCallsComplete).toHaveBeenCalledTimes(1);
const reportedTools = onAllToolCallsComplete.mock.calls[0][0];
const result = reportedTools[0];
expect(result.status).toBe('error');
expect(result.response.errorType).toBe(ToolErrorType.STOP_EXECUTION);
expect(result.response.error.message).toBe(PLAN_MODE_DENIAL_MESSAGE);
});
});
});

View File

@@ -14,7 +14,7 @@ import {
} from '../tools/tools.js';
import type { EditorType } from '../utils/editor.js';
import type { Config } from '../config/config.js';
import { PolicyDecision } from '../policy/types.js';
import { PolicyDecision, ApprovalMode } from '../policy/types.js';
import { logToolCall } from '../telemetry/loggers.js';
import { ToolErrorType } from '../tools/tool-error.js';
import { ToolCallEvent } from '../telemetry/types.js';
@@ -65,6 +65,9 @@ export type {
ToolCallResponseInfo,
};
export const PLAN_MODE_DENIAL_MESSAGE =
'You are in Plan Mode - adjust your prompt to only use read and search tools.';
const createErrorResponse = (
request: ToolCallRequestInfo,
error: Error,
@@ -603,16 +606,18 @@ export class CoreToolScheduler {
.check(toolCallForPolicy, serverName);
if (decision === PolicyDecision.DENY) {
const errorMessage = `Tool execution denied by policy.`;
let errorMessage = `Tool execution denied by policy.`;
let errorType = ToolErrorType.POLICY_VIOLATION;
if (this.config.getApprovalMode() === ApprovalMode.PLAN) {
errorMessage = PLAN_MODE_DENIAL_MESSAGE;
errorType = ToolErrorType.STOP_EXECUTION;
}
this.setStatusInternal(
reqInfo.callId,
'error',
signal,
createErrorResponse(
reqInfo,
new Error(errorMessage),
ToolErrorType.POLICY_VIOLATION,
),
createErrorResponse(reqInfo, new Error(errorMessage), errorType),
);
await this.checkAndNotifyCompletion(signal);
return;

View File

@@ -0,0 +1,76 @@
# Priority system for policy rules:
# - Higher priority numbers win over lower priority numbers
# - When multiple rules match, the highest priority rule is applied
# - Rules are evaluated in order of priority (highest first)
#
# Priority bands (tiers):
# - Default policies (TOML): 1 + priority/1000 (e.g., priority 100 → 1.100)
# - User policies (TOML): 2 + priority/1000 (e.g., priority 100 → 2.100)
# - Admin policies (TOML): 3 + priority/1000 (e.g., priority 100 → 3.100)
#
# This ensures Admin > User > Default hierarchy is always preserved,
# while allowing user-specified priorities to work within each tier.
#
# Settings-based and dynamic rules (all in user tier 2.x):
# 2.95: Tools that the user has selected as "Always Allow" in the interactive UI
# 2.9: MCP servers excluded list (security: persistent server blocks)
# 2.4: Command line flag --exclude-tools (explicit temporary blocks)
# 2.3: Command line flag --allowed-tools (explicit temporary allows)
# 2.2: MCP servers with trust=true (persistent trusted servers)
# 2.1: MCP servers allowed list (persistent general server allows)
#
# TOML policy priorities (before transformation):
# 10: Write tools default to ASK_USER (becomes 1.010 in default tier)
# 20: Plan mode catch-all DENY override (becomes 1.020 in default tier)
# 50: Read-only tools (becomes 1.050 in default tier)
# 999: YOLO mode allow-all (becomes 1.999 in default tier)
# Catch-All: Deny everything by default in Plan mode.
[[rule]]
decision = "deny"
priority = 20
modes = ["plan"]
# Explicitly Allow Read-Only Tools in Plan mode.
[[rule]]
toolName = "glob"
decision = "allow"
priority = 50
modes = ["plan"]
[[rule]]
toolName = "search_file_content"
decision = "allow"
priority = 50
modes = ["plan"]
[[rule]]
toolName = "list_directory"
decision = "allow"
priority = 50
modes = ["plan"]
[[rule]]
toolName = "read_file"
decision = "allow"
priority = 50
modes = ["plan"]
[[rule]]
toolName = "read_many_files"
decision = "allow"
priority = 50
modes = ["plan"]
[[rule]]
toolName = "google_web_search"
decision = "allow"
priority = 50
modes = ["plan"]
[[rule]]
toolName = "SubagentInvocation"
decision = "allow"
priority = 50