From 34b4f1c6e4f2468cd35caac8bde87011f2691063 Mon Sep 17 00:00:00 2001 From: ruomeng Date: Wed, 8 Apr 2026 11:58:29 -0400 Subject: [PATCH] refactor(plan): simplify policy priorities and consolidate read-only rules (#24849) --- .../config/policy-engine.integration.test.ts | 8 +-- packages/core/src/agents/registry.test.ts | 4 +- packages/core/src/policy/config.ts | 5 +- packages/core/src/policy/policies/plan.toml | 46 +++++---------- .../core/src/policy/policies/read-only.toml | 59 +++++++------------ .../core/src/policy/policies/tracker.toml | 34 ----------- .../core/src/policy/policy-engine.test.ts | 4 +- packages/core/src/policy/toml-loader.test.ts | 27 ++++++--- packages/core/src/policy/types.ts | 6 +- 9 files changed, 71 insertions(+), 122 deletions(-) delete mode 100644 packages/core/src/policy/policies/tracker.toml diff --git a/packages/cli/src/config/policy-engine.integration.test.ts b/packages/cli/src/config/policy-engine.integration.test.ts index edc06bfbf0..b7b9be1193 100644 --- a/packages/cli/src/config/policy-engine.integration.test.ts +++ b/packages/cli/src/config/policy-engine.integration.test.ts @@ -520,8 +520,8 @@ describe('Policy Engine Integration Tests', () => { const readOnlyToolRule = rules.find( (r) => r.toolName === 'glob' && !r.subagent, ); - // Priority 70 in default tier → 1.07 (Overriding Plan Mode Deny) - expect(readOnlyToolRule?.priority).toBeCloseTo(1.07, 5); + // Priority 50 in default tier → 1.05 (Overriding Plan Mode Deny) + expect(readOnlyToolRule?.priority).toBeCloseTo(1.05, 5); // Verify the engine applies these priorities correctly expect( @@ -677,8 +677,8 @@ describe('Policy Engine Integration Tests', () => { expect(server1Rule?.priority).toBe(4.1); // Allowed servers (user tier) const globRule = rules.find((r) => r.toolName === 'glob' && !r.subagent); - // Priority 70 in default tier → 1.07 - expect(globRule?.priority).toBeCloseTo(1.07, 5); // Auto-accept read-only + // Priority 50 in default tier → 1.05 + expect(globRule?.priority).toBeCloseTo(1.05, 5); // Auto-accept read-only // The PolicyEngine will sort these by priority when it's created const engine = new PolicyEngine(config); diff --git a/packages/core/src/agents/registry.test.ts b/packages/core/src/agents/registry.test.ts index 55517a20d5..22ac42e6ed 100644 --- a/packages/core/src/agents/registry.test.ts +++ b/packages/core/src/agents/registry.test.ts @@ -1075,7 +1075,7 @@ describe('AgentRegistry', () => { expect.objectContaining({ toolName: 'PolicyTestAgent', decision: PolicyDecision.ALLOW, - priority: 1.05, + priority: 1.03, }), ); }); @@ -1102,7 +1102,7 @@ describe('AgentRegistry', () => { expect.objectContaining({ toolName: 'RemotePolicyAgent', decision: PolicyDecision.ASK_USER, - priority: 1.05, + priority: 1.03, }), ); }); diff --git a/packages/core/src/policy/config.ts b/packages/core/src/policy/config.ts index 9147a66a9d..359054add3 100644 --- a/packages/core/src/policy/config.ts +++ b/packages/core/src/policy/config.ts @@ -398,9 +398,10 @@ export async function createPolicyEngineConfig( // TOML policy priorities (before transformation): // 10: Write tools default to ASK_USER (becomes 1.010 in default tier) // 15: Auto-edit tool override (becomes 1.015 in default tier) + // 30: Unknown subagents (blocked by Plan Mode's 40) + // 40: Plan mode catch-all DENY override (becomes 1.040 in default tier) // 50: Read-only tools (becomes 1.050 in default tier) - // 60: Plan mode catch-all DENY override (becomes 1.060 in default tier) - // 70: Plan mode explicit ALLOW override (becomes 1.070 in default tier) + // 70: Mode transition overrides (becomes 1.070 in default tier) // 999: YOLO mode allow-all (becomes 1.999 in default tier) // MCP servers that are explicitly excluded in settings.mcp.excluded diff --git a/packages/core/src/policy/policies/plan.toml b/packages/core/src/policy/policies/plan.toml index 80b59ba2d5..eaf1f9471b 100644 --- a/packages/core/src/policy/policies/plan.toml +++ b/packages/core/src/policy/policies/plan.toml @@ -23,8 +23,10 @@ # # TOML policy priorities (before transformation): # 10: Write tools default to ASK_USER (becomes 1.010 in default tier) -# 60: Plan mode catch-all DENY override (becomes 1.060 in default tier) -# 70: Plan mode explicit ALLOW override (becomes 1.070 in default tier) +# 30: Unknown subagents (blocked by Plan Mode's 40) +# 40: Plan mode catch-all DENY override (becomes 1.040 in default tier) +# 50: Read-only tools / Plan mode explicit ALLOW (becomes 1.050 in default tier) +# 70: Mode transition overrides (into/out of Plan Mode) # 999: YOLO mode allow-all (becomes 1.999 in default tier) # Mode Transitions (into/out of Plan Mode) @@ -59,6 +61,7 @@ interactive = true toolName = "exit_plan_mode" decision = "allow" priority = 70 +modes = ["plan"] interactive = false [[rule]] @@ -73,18 +76,23 @@ denyMessage = "You are not currently in Plan Mode. Use enter_plan_mode first to [[rule]] toolName = "*" decision = "deny" -priority = 60 +priority = 40 modes = ["plan"] denyMessage = "You are in Plan Mode with access to read-only tools. Execution of scripts (including those from skills) is blocked." # Explicitly Allow Read-Only Tools in Plan mode. +[[rule]] +toolName = ["activate_skill"] +decision = "allow" +priority = 50 +modes = ["plan"] [[rule]] toolName = "*" mcpName = "*" toolAnnotations = { readOnlyHint = true } decision = "ask_user" -priority = 70 +priority = 50 modes = ["plan"] interactive = true @@ -93,45 +101,21 @@ toolName = "*" mcpName = "*" toolAnnotations = { readOnlyHint = true } decision = "deny" -priority = 70 +priority = 50 modes = ["plan"] interactive = false -[[rule]] -toolName = [ - "glob", - "grep_search", - "list_directory", - "read_file", - "google_web_search", - "activate_skill", - "codebase_investigator", - "cli_help", - "get_internal_docs", - "complete_task" -] -decision = "allow" -priority = 70 -modes = ["plan"] - -# Topic grouping tool is innocuous and used for UI organization. -[[rule]] -toolName = "update_topic" -decision = "allow" -priority = 70 -modes = ["plan"] - [[rule]] toolName = ["ask_user", "save_memory", "web_fetch"] decision = "ask_user" -priority = 70 +priority = 50 modes = ["plan"] interactive = true [[rule]] toolName = ["ask_user", "save_memory", "web_fetch"] decision = "deny" -priority = 70 +priority = 50 modes = ["plan"] interactive = false diff --git a/packages/core/src/policy/policies/read-only.toml b/packages/core/src/policy/policies/read-only.toml index c56984b522..0a8b465fe8 100644 --- a/packages/core/src/policy/policies/read-only.toml +++ b/packages/core/src/policy/policies/read-only.toml @@ -28,43 +28,26 @@ # 999: YOLO mode allow-all (becomes 1.999 in default tier) [[rule]] -toolName = "glob" +toolName = [ + "glob", + "grep_search", + "list_directory", + "read_file", + "google_web_search", + "codebase_investigator", + "cli_help", + "get_internal_docs", + # Tracker tools for task management (safe as they only modify internal state) + "tracker_create_task", + "tracker_update_task", + "tracker_get_task", + "tracker_list_tasks", + "tracker_add_dependency", + "tracker_visualize", + # Topic grouping tool is innocuous and used for UI organization. + "update_topic", + # Core agent lifecycle tool + "complete_task" +] decision = "allow" priority = 50 - -[[rule]] -toolName = "grep_search" -decision = "allow" -priority = 50 - -[[rule]] -toolName = "list_directory" -decision = "allow" -priority = 50 - -[[rule]] -toolName = "read_file" -decision = "allow" -priority = 50 - -[[rule]] -toolName = "google_web_search" -decision = "allow" -priority = 50 - -[[rule]] -toolName = ["codebase_investigator", "cli_help", "get_internal_docs"] -decision = "allow" -priority = 50 - -# Topic grouping tool is innocuous and used for UI organization. -[[rule]] -toolName = "update_topic" -decision = "allow" -priority = 50 - -# Core agent lifecycle tool -[[rule]] -toolName = "complete_task" -decision = "allow" -priority = 50 \ No newline at end of file diff --git a/packages/core/src/policy/policies/tracker.toml b/packages/core/src/policy/policies/tracker.toml deleted file mode 100644 index e17c4fc387..0000000000 --- a/packages/core/src/policy/policies/tracker.toml +++ /dev/null @@ -1,34 +0,0 @@ -# Priority system for policy rules: -# - Higher priority numbers win over lower priority numbers -# - When multiple rules match, the highest priority rule is applied -# - Rules are evaluated in order of priority (highest first) -# -# Priority bands (tiers): -# - Default policies (TOML): 1 + priority/1000 (e.g., priority 100 → 1.100) -# - Extension policies (TOML): 2 + priority/1000 (e.g., priority 100 → 2.100) -# - Workspace policies (TOML): 3 + priority/1000 (e.g., priority 100 → 3.100) -# - User policies (TOML): 4 + priority/1000 (e.g., priority 100 → 4.100) -# - Admin policies (TOML): 5 + priority/1000 (e.g., priority 100 → 5.100) -# -# Settings-based and dynamic rules (all in user tier 4.x): -# 4.95: Tools that the user has selected as "Always Allow" in the interactive UI -# 4.9: MCP servers excluded list (security: persistent server blocks) -# 4.4: Command line flag --exclude-tools (explicit temporary blocks) -# 4.3: Command line flag --allowed-tools (explicit temporary allows) -# 4.2: MCP servers with trust=true (persistent trusted servers) -# 4.1: MCP servers allowed list (persistent general server allows) - -# Allow tracker tools to execute without asking the user. -# These tools are only registered when the tracker feature is enabled, -# so this rule is a no-op when the feature is disabled. -[[rule]] -toolName = [ - "tracker_create_task", - "tracker_update_task", - "tracker_get_task", - "tracker_list_tasks", - "tracker_add_dependency", - "tracker_visualize" -] -decision = "allow" -priority = 50 diff --git a/packages/core/src/policy/policy-engine.test.ts b/packages/core/src/policy/policy-engine.test.ts index 0299000f73..1d27107ee2 100644 --- a/packages/core/src/policy/policy-engine.test.ts +++ b/packages/core/src/policy/policy-engine.test.ts @@ -1715,13 +1715,13 @@ describe('PolicyEngine', () => { describe('Plan Mode vs Subagent Priority (Regression)', () => { it('should DENY subagents in Plan Mode despite dynamic allow rules', async () => { - // Plan Mode Deny (1.06) > Subagent Allow (1.05) + // Plan Mode Deny (1.04) > Subagent Allow (1.03) const fixedRules: PolicyRule[] = [ { toolName: '*', decision: PolicyDecision.DENY, - priority: 1.06, + priority: 1.04, modes: [ApprovalMode.PLAN], }, { diff --git a/packages/core/src/policy/toml-loader.test.ts b/packages/core/src/policy/toml-loader.test.ts index 6835e200b4..9c1e424c60 100644 --- a/packages/core/src/policy/toml-loader.test.ts +++ b/packages/core/src/policy/toml-loader.test.ts @@ -890,8 +890,8 @@ priority = 100 readOnlyHint: true, }); expect(annotationRule!.decision).toBe(PolicyDecision.ASK_USER); - // Priority 70 in tier 1 => 1.070 - expect(annotationRule!.priority).toBe(1.07); + // Priority 50 in tier 1 => 1.050 + expect(annotationRule!.priority).toBe(1.05); // Verify deny rule was loaded correctly const denyRule = result.rules.find( @@ -904,8 +904,8 @@ priority = 100 denyRule, 'Should have loaded the catch-all deny rule', ).toBeDefined(); - // Priority 60 in tier 1 => 1.060 - expect(denyRule!.priority).toBe(1.06); + // Priority 40 in tier 1 => 1.040 + expect(denyRule!.priority).toBe(1.04); // 2. Initialize Policy Engine in Plan Mode const engine = new PolicyEngine({ @@ -974,12 +974,23 @@ priority = 100 it('should override default subagent rules when in Plan Mode for unknown subagents', async () => { const planTomlPath = path.resolve(__dirname, 'policies', 'plan.toml'); - const fileContent = await fs.readFile(planTomlPath, 'utf-8'); + const readOnlyTomlPath = path.resolve( + __dirname, + 'policies', + 'read-only.toml', + ); + const planContent = await fs.readFile(planTomlPath, 'utf-8'); + const readOnlyContent = await fs.readFile(readOnlyTomlPath, 'utf-8'); + const tempPolicyDir = await fs.mkdtemp( path.join(os.tmpdir(), 'plan-policy-test-'), ); try { - await fs.writeFile(path.join(tempPolicyDir, 'plan.toml'), fileContent); + await fs.writeFile(path.join(tempPolicyDir, 'plan.toml'), planContent); + await fs.writeFile( + path.join(tempPolicyDir, 'read-only.toml'), + readOnlyContent, + ); const getPolicyTier = () => 1; // Default tier // 1. Load the actual Plan Mode policies @@ -1004,6 +1015,7 @@ priority = 100 // 4. Verify Behavior: // The Plan Mode "Catch-All Deny" (from plan.toml) should override the Subagent Allow + // Plan Mode Deny (1.04) > Subagent Allow (1.03) const checkResult = await engine.check( { name: 'unknown_subagent' }, undefined, @@ -1015,7 +1027,7 @@ priority = 100 ).toBe(PolicyDecision.DENY); // 5. Verify Explicit Allows still work - // e.g. 'read_file' should be allowed because its priority in plan.toml (70) is higher than the deny (60) + // e.g. 'read_file' should be allowed because its priority in read-only.toml (50) is higher than the deny (40) const readResult = await engine.check({ name: 'read_file' }, undefined); expect( readResult.decision, @@ -1023,6 +1035,7 @@ priority = 100 ).toBe(PolicyDecision.ALLOW); // 6. Verify Built-in Research Subagents are ALLOWED + // codebase_investigator is priority 50 in read-only.toml const codebaseResult = await engine.check( { name: 'codebase_investigator' }, undefined, diff --git a/packages/core/src/policy/types.ts b/packages/core/src/policy/types.ts index 622cde0abd..b843129c99 100644 --- a/packages/core/src/policy/types.ts +++ b/packages/core/src/policy/types.ts @@ -354,9 +354,11 @@ export interface CheckResult { /** * Priority for subagent tools (registered dynamically). - * Effective priority matching Tier 1 (Default) read-only tools. + * Effective priority matching Tier 1 (Default) at priority 30. + * This ensures they are blocked by Plan Mode (priority 40) while + * remaining above directive write tools (priority 10). */ -export const PRIORITY_SUBAGENT_TOOL = 1.05; +export const PRIORITY_SUBAGENT_TOOL = 1.03; /** * The fractional priority of "Always allow" rules (e.g., 950/1000).