refactor(core): replace manual syncPlanModeTools with declarative policy rules (#20596)

2026-05-13 05:12:55 -07:00 · 2026-03-02 17:30:50 -05:00
parent e43b1cff58
commit d05ba11a31
11 changed files with 198 additions and 226 deletions
@@ -182,10 +182,7 @@ describe('Plan Mode', () => {
        'I want to perform a complex refactoring. Please enter plan mode so we can design it first.',
    });
-    const enterPlanCallFound = await rig.waitForToolCall(
+    const enterPlanCallFound = await rig.waitForToolCall('enter_plan_mode');
      'enter_plan_mode',
      10000,
    );
    expect(enterPlanCallFound, 'Expected enter_plan_mode to be called').toBe(
      true,
    );
@@ -223,8 +223,6 @@ import type {
  ModelConfigService,
  ModelConfigServiceConfig,
 } from '../services/modelConfigService.js';
 import { ExitPlanModeTool } from '../tools/exit-plan-mode.js';
 import { EnterPlanModeTool } from '../tools/enter-plan-mode.js';
 import { LocalLiteRtLmClient } from '../core/localLiteRtLmClient.js';
 vi.mock('../core/baseLlmClient.js');
@@ -1204,6 +1202,28 @@ describe('Server Config (config.ts)', () => {
      expect(SubAgentToolMock).not.toHaveBeenCalled();
    });
    it('should register EnterPlanModeTool and ExitPlanModeTool when plan is enabled', async () => {
      const params: ConfigParameters = {
        ...baseParams,
        plan: true,
      };
      const config = new Config(params);
      await config.initialize();
      const registerToolMock = (
        (await vi.importMock('../tools/tool-registry')) as {
          ToolRegistry: { prototype: { registerTool: Mock } };
        }
      ).ToolRegistry.prototype.registerTool;
      const registeredTools = registerToolMock.mock.calls.map(
        (call) => call[0].constructor.name,
      );
      expect(registeredTools).toContain('EnterPlanModeTool');
      expect(registeredTools).toContain('ExitPlanModeTool');
    });
    describe('with minified tool class names', () => {
      beforeEach(() => {
        Object.defineProperty(
@@ -2961,131 +2981,6 @@ describe('Plans Directory Initialization', () => {
    expect(fs.promises.mkdir).not.toHaveBeenCalledWith(plansDir, {
      recursive: true,
    });
    const context = config.getWorkspaceContext();
    expect(context.getDirectories()).not.toContain(plansDir);
  });
 });
 describe('syncPlanModeTools', () => {
  const baseParams: ConfigParameters = {
    sessionId: 'test-session',
    targetDir: '.',
    debugMode: false,
    model: 'test-model',
    cwd: '.',
  };
  it('should register ExitPlanModeTool and unregister EnterPlanModeTool when in PLAN mode', async () => {
    const config = new Config({
      ...baseParams,
      approvalMode: ApprovalMode.PLAN,
    });
    const registry = new ToolRegistry(config, config.getMessageBus());
    vi.spyOn(config, 'getToolRegistry').mockReturnValue(registry);
    const registerSpy = vi.spyOn(registry, 'registerTool');
    const unregisterSpy = vi.spyOn(registry, 'unregisterTool');
    const getToolSpy = vi.spyOn(registry, 'getTool');
    getToolSpy.mockImplementation((name) => {
      if (name === 'enter_plan_mode')
        return new EnterPlanModeTool(config, config.getMessageBus());
      return undefined;
    });
    config.syncPlanModeTools();
    expect(unregisterSpy).toHaveBeenCalledWith('enter_plan_mode');
    expect(registerSpy).toHaveBeenCalledWith(expect.anything());
    const registeredTool = registerSpy.mock.calls[0][0];
    const { ExitPlanModeTool } = await import('../tools/exit-plan-mode.js');
    expect(registeredTool).toBeInstanceOf(ExitPlanModeTool);
  });
  it('should register EnterPlanModeTool and unregister ExitPlanModeTool when NOT in PLAN mode and experimental.plan is enabled', async () => {
    const config = new Config({
      ...baseParams,
      approvalMode: ApprovalMode.DEFAULT,
      plan: true,
    });
    const registry = new ToolRegistry(config, config.getMessageBus());
    vi.spyOn(config, 'getToolRegistry').mockReturnValue(registry);
    const registerSpy = vi.spyOn(registry, 'registerTool');
    const unregisterSpy = vi.spyOn(registry, 'unregisterTool');
    const getToolSpy = vi.spyOn(registry, 'getTool');
    getToolSpy.mockImplementation((name) => {
      if (name === 'exit_plan_mode')
        return new ExitPlanModeTool(config, config.getMessageBus());
      return undefined;
    });
    config.syncPlanModeTools();
    expect(unregisterSpy).toHaveBeenCalledWith('exit_plan_mode');
    expect(registerSpy).toHaveBeenCalledWith(expect.anything());
    const registeredTool = registerSpy.mock.calls[0][0];
    const { EnterPlanModeTool } = await import('../tools/enter-plan-mode.js');
    expect(registeredTool).toBeInstanceOf(EnterPlanModeTool);
  });
  it('should NOT register EnterPlanModeTool when experimental.plan is disabled', async () => {
    const config = new Config({
      ...baseParams,
      approvalMode: ApprovalMode.DEFAULT,
      plan: false,
    });
    const registry = new ToolRegistry(config, config.getMessageBus());
    vi.spyOn(config, 'getToolRegistry').mockReturnValue(registry);
    const registerSpy = vi.spyOn(registry, 'registerTool');
    vi.spyOn(registry, 'getTool').mockReturnValue(undefined);
    config.syncPlanModeTools();
    const { EnterPlanModeTool } = await import('../tools/enter-plan-mode.js');
    const registeredTool = registerSpy.mock.calls.find(
      (call) => call[0] instanceof EnterPlanModeTool,
    );
    expect(registeredTool).toBeUndefined();
  });
  it('should NOT register EnterPlanModeTool when in YOLO mode, even if plan is enabled', async () => {
    const config = new Config({
      ...baseParams,
      approvalMode: ApprovalMode.YOLO,
      plan: true,
    });
    const registry = new ToolRegistry(config, config.getMessageBus());
    vi.spyOn(config, 'getToolRegistry').mockReturnValue(registry);
    const registerSpy = vi.spyOn(registry, 'registerTool');
    vi.spyOn(registry, 'getTool').mockReturnValue(undefined);
    config.syncPlanModeTools();
    const { EnterPlanModeTool } = await import('../tools/enter-plan-mode.js');
    const registeredTool = registerSpy.mock.calls.find(
      (call) => call[0] instanceof EnterPlanModeTool,
    );
    expect(registeredTool).toBeUndefined();
  });
  it('should call geminiClient.setTools if initialized', async () => {
    const config = new Config(baseParams);
    const registry = new ToolRegistry(config, config.getMessageBus());
    vi.spyOn(config, 'getToolRegistry').mockReturnValue(registry);
    const client = config.getGeminiClient();
    vi.spyOn(client, 'isInitialized').mockReturnValue(true);
    const setToolsSpy = vi
      .spyOn(client, 'setTools')
      .mockResolvedValue(undefined);
    config.syncPlanModeTools();
    expect(setToolsSpy).toHaveBeenCalled();
  });
 });
@@ -370,10 +370,6 @@ import { McpClientManager } from '../tools/mcp-client-manager.js';
 import { type McpContext } from '../tools/mcp-client.js';
 import type { EnvironmentSanitizationConfig } from '../services/environmentSanitization.js';
 import { getErrorMessage } from '../utils/errors.js';
 import {
  ENTER_PLAN_MODE_TOOL_NAME,
  EXIT_PLAN_MODE_TOOL_NAME,
 } from '../tools/tool-names.js';
 export type { FileFilteringOptions };
 export {
@@ -1172,7 +1168,6 @@ export class Config implements McpContext {
    }
    await this.geminiClient.initialize();
    this.syncPlanModeTools();
    this.initialized = true;
  }
@@ -1998,52 +1993,15 @@ export class Config implements McpContext {
      (currentMode === ApprovalMode.YOLO || mode === ApprovalMode.YOLO);
    if (isPlanModeTransition || isYoloModeTransition) {
-      this.syncPlanModeTools();
+      if (this.geminiClient?.isInitialized()) {
        this.geminiClient.setTools().catch((err) => {
          debugLogger.error('Failed to update tools', err);
        });
      }
      this.updateSystemInstructionIfInitialized();
    }
  }
  /**
   * Synchronizes enter/exit plan mode tools based on current mode.
   */
  syncPlanModeTools(): void {
    const registry = this.getToolRegistry();
    if (!registry) {
      return;
    }
    const approvalMode = this.getApprovalMode();
    const isPlanMode = approvalMode === ApprovalMode.PLAN;
    const isYoloMode = approvalMode === ApprovalMode.YOLO;
    if (isPlanMode) {
      if (registry.getTool(ENTER_PLAN_MODE_TOOL_NAME)) {
        registry.unregisterTool(ENTER_PLAN_MODE_TOOL_NAME);
      }
      if (!registry.getTool(EXIT_PLAN_MODE_TOOL_NAME)) {
        registry.registerTool(new ExitPlanModeTool(this, this.messageBus));
      }
    } else {
      if (registry.getTool(EXIT_PLAN_MODE_TOOL_NAME)) {
        registry.unregisterTool(EXIT_PLAN_MODE_TOOL_NAME);
      }
      if (this.planEnabled && !isYoloMode) {
        if (!registry.getTool(ENTER_PLAN_MODE_TOOL_NAME)) {
          registry.registerTool(new EnterPlanModeTool(this, this.messageBus));
        }
      } else {
        if (registry.getTool(ENTER_PLAN_MODE_TOOL_NAME)) {
          registry.unregisterTool(ENTER_PLAN_MODE_TOOL_NAME);
        }
      }
    }
    if (this.geminiClient?.isInitialized()) {
      this.geminiClient.setTools().catch((err) => {
        debugLogger.error('Failed to update tools', err);
      });
    }
  }
  /**
   * Logs the duration of the current approval mode.
   */
@@ -5,20 +5,21 @@
 #
 # Priority bands (tiers):
 # - Default policies (TOML): 1 + priority/1000 (e.g., priority 100 → 1.100)
-# - Workspace policies (TOML): 2 + priority/1000 (e.g., priority 100 → 2.100)
+# - Extension policies (TOML): 2 + priority/1000 (e.g., priority 100 → 2.100)
-# - User policies (TOML): 3 + priority/1000 (e.g., priority 100 → 3.100)
+# - Workspace policies (TOML): 3 + priority/1000 (e.g., priority 100 → 3.100)
-# - Admin policies (TOML): 4 + priority/1000 (e.g., priority 100 → 4.100)
+# - User policies (TOML): 4 + priority/1000 (e.g., priority 100 → 4.100)
 # - Admin policies (TOML): 5 + priority/1000 (e.g., priority 100 → 5.100)
 #
-# This ensures Admin > User > Workspace > Default hierarchy is always preserved,
+# This ensures Admin > User > Workspace > Extension > Default hierarchy is always preserved,
 # while allowing user-specified priorities to work within each tier.
 #
-# Settings-based and dynamic rules (all in user tier 3.x):
+# Settings-based and dynamic rules (all in user tier 4.x):
-#   3.95: Tools that the user has selected as "Always Allow" in the interactive UI
+#   4.95: Tools that the user has selected as "Always Allow" in the interactive UI
-#   3.9:  MCP servers excluded list (security: persistent server blocks)
+#   4.9:  MCP servers excluded list (security: persistent server blocks)
-#   3.4:  Command line flag --exclude-tools (explicit temporary blocks)
+#   4.4:  Command line flag --exclude-tools (explicit temporary blocks)
-#   3.3:  Command line flag --allowed-tools (explicit temporary allows)
+#   4.3:  Command line flag --allowed-tools (explicit temporary allows)
-#   3.2:  MCP servers with trust=true (persistent trusted servers)
+#   4.2:  MCP servers with trust=true (persistent trusted servers)
-#   3.1:  MCP servers allowed list (persistent general server allows)
+#   4.1:  MCP servers allowed list (persistent general server allows)
 #
 # TOML policy priorities (before transformation):
 #   10: Write tools default to ASK_USER (becomes 1.010 in default tier)
@@ -26,6 +27,33 @@
 #   70: Plan mode explicit ALLOW override (becomes 1.070 in default tier)
 #   999: YOLO mode allow-all (becomes 1.999 in default tier)
 # Mode Transitions (into/out of Plan Mode)
 [[rule]]
 toolName = "enter_plan_mode"
 decision = "ask_user"
 priority = 50
 [[rule]]
 toolName = "enter_plan_mode"
 decision = "deny"
 priority = 70
 modes = ["plan"]
 deny_message = "You are already in Plan Mode."
 [[rule]]
 toolName = "exit_plan_mode"
 decision = "ask_user"
 priority = 70
 modes = ["plan"]
 [[rule]]
 toolName = "exit_plan_mode"
 decision = "deny"
 priority = 50
 deny_message = "You are not currently in Plan Mode. Use enter_plan_mode first to design a plan."
 # Catch-All: Deny everything by default in Plan mode.
 [[rule]]
@@ -50,7 +78,7 @@ priority = 70
 modes = ["plan"]
 [[rule]]
-toolName = ["ask_user", "exit_plan_mode", "save_memory"]
+toolName = ["ask_user", "save_memory"]
 decision = "ask_user"
 priority = 70
 modes = ["plan"]
@@ -5,20 +5,21 @@
 #
 # Priority bands (tiers):
 # - Default policies (TOML): 1 + priority/1000 (e.g., priority 100 → 1.100)
-# - Workspace policies (TOML): 2 + priority/1000 (e.g., priority 100 → 2.100)
+# - Extension policies (TOML): 2 + priority/1000 (e.g., priority 100 → 2.100)
-# - User policies (TOML): 3 + priority/1000 (e.g., priority 100 → 3.100)
+# - Workspace policies (TOML): 3 + priority/1000 (e.g., priority 100 → 3.100)
-# - Admin policies (TOML): 4 + priority/1000 (e.g., priority 100 → 4.100)
+# - User policies (TOML): 4 + priority/1000 (e.g., priority 100 → 4.100)
 # - Admin policies (TOML): 5 + priority/1000 (e.g., priority 100 → 5.100)
 #
-# This ensures Admin > User > Workspace > Default hierarchy is always preserved,
+# This ensures Admin > User > Workspace > Extension > Default hierarchy is always preserved,
 # while allowing user-specified priorities to work within each tier.
 #
-# Settings-based and dynamic rules (all in user tier 3.x):
+# Settings-based and dynamic rules (all in user tier 4.x):
-#   3.95: Tools that the user has selected as "Always Allow" in the interactive UI
+#   4.95: Tools that the user has selected as "Always Allow" in the interactive UI
-#   3.9:  MCP servers excluded list (security: persistent server blocks)
+#   4.9:  MCP servers excluded list (security: persistent server blocks)
-#   3.4:  Command line flag --exclude-tools (explicit temporary blocks)
+#   4.4:  Command line flag --exclude-tools (explicit temporary blocks)
-#   3.3:  Command line flag --allowed-tools (explicit temporary allows)
+#   4.3:  Command line flag --allowed-tools (explicit temporary allows)
-#   3.2:  MCP servers with trust=true (persistent trusted servers)
+#   4.2:  MCP servers with trust=true (persistent trusted servers)
-#   3.1:  MCP servers allowed list (persistent general server allows)
+#   4.1:  MCP servers allowed list (persistent general server allows)
 #
 # TOML policy priorities (before transformation):
 #   10: Write tools default to ASK_USER (becomes 1.010 in default tier)
@@ -5,20 +5,21 @@
 #
 # Priority bands (tiers):
 # - Default policies (TOML): 1 + priority/1000 (e.g., priority 100 → 1.100)
-# - Workspace policies (TOML): 2 + priority/1000 (e.g., priority 100 → 2.100)
+# - Extension policies (TOML): 2 + priority/1000 (e.g., priority 100 → 2.100)
-# - User policies (TOML): 3 + priority/1000 (e.g., priority 100 → 3.100)
+# - Workspace policies (TOML): 3 + priority/1000 (e.g., priority 100 → 3.100)
-# - Admin policies (TOML): 4 + priority/1000 (e.g., priority 100 → 4.100)
+# - User policies (TOML): 4 + priority/1000 (e.g., priority 100 → 4.100)
 # - Admin policies (TOML): 5 + priority/1000 (e.g., priority 100 → 5.100)
 #
-# This ensures Admin > User > Workspace > Default hierarchy is always preserved,
+# This ensures Admin > User > Workspace > Extension > Default hierarchy is always preserved,
 # while allowing user-specified priorities to work within each tier.
 #
-# Settings-based and dynamic rules (all in user tier 3.x):
+# Settings-based and dynamic rules (all in user tier 4.x):
-#   3.95: Tools that the user has selected as "Always Allow" in the interactive UI
+#   4.95: Tools that the user has selected as "Always Allow" in the interactive UI
-#   3.9:  MCP servers excluded list (security: persistent server blocks)
+#   4.9:  MCP servers excluded list (security: persistent server blocks)
-#   3.4:  Command line flag --exclude-tools (explicit temporary blocks)
+#   4.4:  Command line flag --exclude-tools (explicit temporary blocks)
-#   3.3:  Command line flag --allowed-tools (explicit temporary allows)
+#   4.3:  Command line flag --allowed-tools (explicit temporary allows)
-#   3.2:  MCP servers with trust=true (persistent trusted servers)
+#   4.2:  MCP servers with trust=true (persistent trusted servers)
-#   3.1:  MCP servers allowed list (persistent general server allows)
+#   4.1:  MCP servers allowed list (persistent general server allows)
 #
 # TOML policy priorities (before transformation):
 #   10: Write tools default to ASK_USER (becomes 1.010 in default tier)
@@ -5,20 +5,21 @@
 #
 # Priority bands (tiers):
 # - Default policies (TOML): 1 + priority/1000 (e.g., priority 100 → 1.100)
-# - Workspace policies (TOML): 2 + priority/1000 (e.g., priority 100 → 2.100)
+# - Extension policies (TOML): 2 + priority/1000 (e.g., priority 100 → 2.100)
-# - User policies (TOML): 3 + priority/1000 (e.g., priority 100 → 3.100)
+# - Workspace policies (TOML): 3 + priority/1000 (e.g., priority 100 → 3.100)
-# - Admin policies (TOML): 4 + priority/1000 (e.g., priority 100 → 4.100)
+# - User policies (TOML): 4 + priority/1000 (e.g., priority 100 → 4.100)
 # - Admin policies (TOML): 5 + priority/1000 (e.g., priority 100 → 5.100)
 #
-# This ensures Admin > User > Workspace > Default hierarchy is always preserved,
+# This ensures Admin > User > Workspace > Extension > Default hierarchy is always preserved,
 # while allowing user-specified priorities to work within each tier.
 #
-# Settings-based and dynamic rules (all in user tier 3.x):
+# Settings-based and dynamic rules (all in user tier 4.x):
-#   3.95: Tools that the user has selected as "Always Allow" in the interactive UI
+#   4.95: Tools that the user has selected as "Always Allow" in the interactive UI
-#   3.9:  MCP servers excluded list (security: persistent server blocks)
+#   4.9:  MCP servers excluded list (security: persistent server blocks)
-#   3.4:  Command line flag --exclude-tools (explicit temporary blocks)
+#   4.4:  Command line flag --exclude-tools (explicit temporary blocks)
-#   3.3:  Command line flag --allowed-tools (explicit temporary allows)
+#   4.3:  Command line flag --allowed-tools (explicit temporary allows)
-#   3.2:  MCP servers with trust=true (persistent trusted servers)
+#   4.2:  MCP servers with trust=true (persistent trusted servers)
-#   3.1:  MCP servers allowed list (persistent general server allows)
+#   4.1:  MCP servers allowed list (persistent general server allows)
 #
 # TOML policy priorities (before transformation):
 #   10: Write tools default to ASK_USER (becomes 1.010 in default tier)
@@ -36,6 +37,15 @@ decision = "ask_user"
 priority = 999
 modes = ["yolo"]
 # Plan mode transitions are blocked in YOLO mode to maintain state consistency
 # and because planning currently requires human interaction (plan approval),
 # which conflicts with YOLO's autonomous nature.
 [[rule]]
 toolName = ["enter_plan_mode", "exit_plan_mode"]
 decision = "deny"
 priority = 999
 modes = ["yolo"]
 # Allow everything else in YOLO mode
 [[rule]]
 decision = "allow"
@@ -2808,6 +2808,82 @@ describe('PolicyEngine', () => {
        'Execution of scripts (including those from skills) is blocked',
      );
    });
    it('should deny enter_plan_mode when already in PLAN mode', async () => {
      const rules: PolicyRule[] = [
        {
          toolName: 'enter_plan_mode',
          decision: PolicyDecision.DENY,
          priority: 70,
          modes: [ApprovalMode.PLAN],
          denyMessage: 'You are already in Plan Mode.',
        },
      ];
      engine = new PolicyEngine({
        rules,
        approvalMode: ApprovalMode.PLAN,
      });
      const result = await engine.check({ name: 'enter_plan_mode' }, undefined);
      expect(result.decision).toBe(PolicyDecision.DENY);
      expect(result.rule?.denyMessage).toBe('You are already in Plan Mode.');
    });
    it('should deny exit_plan_mode when in DEFAULT mode', async () => {
      const rules: PolicyRule[] = [
        {
          toolName: 'exit_plan_mode',
          decision: PolicyDecision.DENY,
          priority: 10,
          modes: [ApprovalMode.DEFAULT],
          denyMessage: 'You are not in Plan Mode.',
        },
      ];
      engine = new PolicyEngine({
        rules,
        approvalMode: ApprovalMode.DEFAULT,
      });
      const result = await engine.check({ name: 'exit_plan_mode' }, undefined);
      expect(result.decision).toBe(PolicyDecision.DENY);
      expect(result.rule?.denyMessage).toBe('You are not in Plan Mode.');
    });
    it('should deny both plan tools in YOLO mode', async () => {
      const rules: PolicyRule[] = [
        {
          toolName: 'enter_plan_mode',
          decision: PolicyDecision.DENY,
          priority: 999,
          modes: [ApprovalMode.YOLO],
        },
        {
          toolName: 'exit_plan_mode',
          decision: PolicyDecision.DENY,
          priority: 999,
          modes: [ApprovalMode.YOLO],
        },
      ];
      engine = new PolicyEngine({
        rules,
        approvalMode: ApprovalMode.YOLO,
      });
      const resultEnter = await engine.check(
        { name: 'enter_plan_mode' },
        undefined,
      );
      expect(resultEnter.decision).toBe(PolicyDecision.DENY);
      const resultExit = await engine.check(
        { name: 'exit_plan_mode' },
        undefined,
      );
      expect(resultExit.decision).toBe(PolicyDecision.DENY);
    });
  });
  describe('removeRulesByTier', () => {
@@ -28,9 +28,11 @@ export class AskUserTool extends BaseDeclarativeTool<
  AskUserParams,
  ToolResult
 > {
  static readonly Name = ASK_USER_TOOL_NAME;
  constructor(messageBus: MessageBus) {
    super(
-      ASK_USER_TOOL_NAME,
+      AskUserTool.Name,
      ASK_USER_DISPLAY_NAME,
      ASK_USER_DEFINITION.base.description!,
      Kind.Communicate,
@@ -27,12 +27,14 @@ export class EnterPlanModeTool extends BaseDeclarativeTool<
  EnterPlanModeParams,
  ToolResult
 > {
  static readonly Name = ENTER_PLAN_MODE_TOOL_NAME;
  constructor(
    private config: Config,
    messageBus: MessageBus,
  ) {
    super(
-      ENTER_PLAN_MODE_TOOL_NAME,
+      EnterPlanModeTool.Name,
      'Enter Plan Mode',
      ENTER_PLAN_MODE_DEFINITION.base.description!,
      Kind.Plan,
@@ -35,6 +35,8 @@ export class ExitPlanModeTool extends BaseDeclarativeTool<
  ExitPlanModeParams,
  ToolResult
 > {
  static readonly Name = EXIT_PLAN_MODE_TOOL_NAME;
  constructor(
    private config: Config,
    messageBus: MessageBus,
@@ -42,7 +44,7 @@ export class ExitPlanModeTool extends BaseDeclarativeTool<
    const plansDir = config.storage.getPlansDir();
    const definition = getExitPlanModeDefinition(plansDir);
    super(
-      EXIT_PLAN_MODE_TOOL_NAME,
+      ExitPlanModeTool.Name,
      'Exit Plan Mode',
      definition.base.description!,
      Kind.Plan,