refactor(core): replace manual syncPlanModeTools with declarative policy rules (#20596)

This commit is contained in:
Jerop Kipruto
2026-03-02 17:30:50 -05:00
committed by GitHub
parent e43b1cff58
commit d05ba11a31
11 changed files with 198 additions and 226 deletions
+22 -127
View File
@@ -223,8 +223,6 @@ import type {
ModelConfigService,
ModelConfigServiceConfig,
} from '../services/modelConfigService.js';
import { ExitPlanModeTool } from '../tools/exit-plan-mode.js';
import { EnterPlanModeTool } from '../tools/enter-plan-mode.js';
import { LocalLiteRtLmClient } from '../core/localLiteRtLmClient.js';
vi.mock('../core/baseLlmClient.js');
@@ -1204,6 +1202,28 @@ describe('Server Config (config.ts)', () => {
expect(SubAgentToolMock).not.toHaveBeenCalled();
});
it('should register EnterPlanModeTool and ExitPlanModeTool when plan is enabled', async () => {
const params: ConfigParameters = {
...baseParams,
plan: true,
};
const config = new Config(params);
await config.initialize();
const registerToolMock = (
(await vi.importMock('../tools/tool-registry')) as {
ToolRegistry: { prototype: { registerTool: Mock } };
}
).ToolRegistry.prototype.registerTool;
const registeredTools = registerToolMock.mock.calls.map(
(call) => call[0].constructor.name,
);
expect(registeredTools).toContain('EnterPlanModeTool');
expect(registeredTools).toContain('ExitPlanModeTool');
});
describe('with minified tool class names', () => {
beforeEach(() => {
Object.defineProperty(
@@ -2961,131 +2981,6 @@ describe('Plans Directory Initialization', () => {
expect(fs.promises.mkdir).not.toHaveBeenCalledWith(plansDir, {
recursive: true,
});
const context = config.getWorkspaceContext();
expect(context.getDirectories()).not.toContain(plansDir);
});
});
describe('syncPlanModeTools', () => {
const baseParams: ConfigParameters = {
sessionId: 'test-session',
targetDir: '.',
debugMode: false,
model: 'test-model',
cwd: '.',
};
it('should register ExitPlanModeTool and unregister EnterPlanModeTool when in PLAN mode', async () => {
const config = new Config({
...baseParams,
approvalMode: ApprovalMode.PLAN,
});
const registry = new ToolRegistry(config, config.getMessageBus());
vi.spyOn(config, 'getToolRegistry').mockReturnValue(registry);
const registerSpy = vi.spyOn(registry, 'registerTool');
const unregisterSpy = vi.spyOn(registry, 'unregisterTool');
const getToolSpy = vi.spyOn(registry, 'getTool');
getToolSpy.mockImplementation((name) => {
if (name === 'enter_plan_mode')
return new EnterPlanModeTool(config, config.getMessageBus());
return undefined;
});
config.syncPlanModeTools();
expect(unregisterSpy).toHaveBeenCalledWith('enter_plan_mode');
expect(registerSpy).toHaveBeenCalledWith(expect.anything());
const registeredTool = registerSpy.mock.calls[0][0];
const { ExitPlanModeTool } = await import('../tools/exit-plan-mode.js');
expect(registeredTool).toBeInstanceOf(ExitPlanModeTool);
});
it('should register EnterPlanModeTool and unregister ExitPlanModeTool when NOT in PLAN mode and experimental.plan is enabled', async () => {
const config = new Config({
...baseParams,
approvalMode: ApprovalMode.DEFAULT,
plan: true,
});
const registry = new ToolRegistry(config, config.getMessageBus());
vi.spyOn(config, 'getToolRegistry').mockReturnValue(registry);
const registerSpy = vi.spyOn(registry, 'registerTool');
const unregisterSpy = vi.spyOn(registry, 'unregisterTool');
const getToolSpy = vi.spyOn(registry, 'getTool');
getToolSpy.mockImplementation((name) => {
if (name === 'exit_plan_mode')
return new ExitPlanModeTool(config, config.getMessageBus());
return undefined;
});
config.syncPlanModeTools();
expect(unregisterSpy).toHaveBeenCalledWith('exit_plan_mode');
expect(registerSpy).toHaveBeenCalledWith(expect.anything());
const registeredTool = registerSpy.mock.calls[0][0];
const { EnterPlanModeTool } = await import('../tools/enter-plan-mode.js');
expect(registeredTool).toBeInstanceOf(EnterPlanModeTool);
});
it('should NOT register EnterPlanModeTool when experimental.plan is disabled', async () => {
const config = new Config({
...baseParams,
approvalMode: ApprovalMode.DEFAULT,
plan: false,
});
const registry = new ToolRegistry(config, config.getMessageBus());
vi.spyOn(config, 'getToolRegistry').mockReturnValue(registry);
const registerSpy = vi.spyOn(registry, 'registerTool');
vi.spyOn(registry, 'getTool').mockReturnValue(undefined);
config.syncPlanModeTools();
const { EnterPlanModeTool } = await import('../tools/enter-plan-mode.js');
const registeredTool = registerSpy.mock.calls.find(
(call) => call[0] instanceof EnterPlanModeTool,
);
expect(registeredTool).toBeUndefined();
});
it('should NOT register EnterPlanModeTool when in YOLO mode, even if plan is enabled', async () => {
const config = new Config({
...baseParams,
approvalMode: ApprovalMode.YOLO,
plan: true,
});
const registry = new ToolRegistry(config, config.getMessageBus());
vi.spyOn(config, 'getToolRegistry').mockReturnValue(registry);
const registerSpy = vi.spyOn(registry, 'registerTool');
vi.spyOn(registry, 'getTool').mockReturnValue(undefined);
config.syncPlanModeTools();
const { EnterPlanModeTool } = await import('../tools/enter-plan-mode.js');
const registeredTool = registerSpy.mock.calls.find(
(call) => call[0] instanceof EnterPlanModeTool,
);
expect(registeredTool).toBeUndefined();
});
it('should call geminiClient.setTools if initialized', async () => {
const config = new Config(baseParams);
const registry = new ToolRegistry(config, config.getMessageBus());
vi.spyOn(config, 'getToolRegistry').mockReturnValue(registry);
const client = config.getGeminiClient();
vi.spyOn(client, 'isInitialized').mockReturnValue(true);
const setToolsSpy = vi
.spyOn(client, 'setTools')
.mockResolvedValue(undefined);
config.syncPlanModeTools();
expect(setToolsSpy).toHaveBeenCalled();
});
});
+5 -47
View File
@@ -370,10 +370,6 @@ import { McpClientManager } from '../tools/mcp-client-manager.js';
import { type McpContext } from '../tools/mcp-client.js';
import type { EnvironmentSanitizationConfig } from '../services/environmentSanitization.js';
import { getErrorMessage } from '../utils/errors.js';
import {
ENTER_PLAN_MODE_TOOL_NAME,
EXIT_PLAN_MODE_TOOL_NAME,
} from '../tools/tool-names.js';
export type { FileFilteringOptions };
export {
@@ -1172,7 +1168,6 @@ export class Config implements McpContext {
}
await this.geminiClient.initialize();
this.syncPlanModeTools();
this.initialized = true;
}
@@ -1998,52 +1993,15 @@ export class Config implements McpContext {
(currentMode === ApprovalMode.YOLO || mode === ApprovalMode.YOLO);
if (isPlanModeTransition || isYoloModeTransition) {
this.syncPlanModeTools();
if (this.geminiClient?.isInitialized()) {
this.geminiClient.setTools().catch((err) => {
debugLogger.error('Failed to update tools', err);
});
}
this.updateSystemInstructionIfInitialized();
}
}
/**
* Synchronizes enter/exit plan mode tools based on current mode.
*/
syncPlanModeTools(): void {
const registry = this.getToolRegistry();
if (!registry) {
return;
}
const approvalMode = this.getApprovalMode();
const isPlanMode = approvalMode === ApprovalMode.PLAN;
const isYoloMode = approvalMode === ApprovalMode.YOLO;
if (isPlanMode) {
if (registry.getTool(ENTER_PLAN_MODE_TOOL_NAME)) {
registry.unregisterTool(ENTER_PLAN_MODE_TOOL_NAME);
}
if (!registry.getTool(EXIT_PLAN_MODE_TOOL_NAME)) {
registry.registerTool(new ExitPlanModeTool(this, this.messageBus));
}
} else {
if (registry.getTool(EXIT_PLAN_MODE_TOOL_NAME)) {
registry.unregisterTool(EXIT_PLAN_MODE_TOOL_NAME);
}
if (this.planEnabled && !isYoloMode) {
if (!registry.getTool(ENTER_PLAN_MODE_TOOL_NAME)) {
registry.registerTool(new EnterPlanModeTool(this, this.messageBus));
}
} else {
if (registry.getTool(ENTER_PLAN_MODE_TOOL_NAME)) {
registry.unregisterTool(ENTER_PLAN_MODE_TOOL_NAME);
}
}
}
if (this.geminiClient?.isInitialized()) {
this.geminiClient.setTools().catch((err) => {
debugLogger.error('Failed to update tools', err);
});
}
}
/**
* Logs the duration of the current approval mode.
*/
+40 -12
View File
@@ -5,20 +5,21 @@
#
# Priority bands (tiers):
# - Default policies (TOML): 1 + priority/1000 (e.g., priority 100 → 1.100)
# - Workspace policies (TOML): 2 + priority/1000 (e.g., priority 100 → 2.100)
# - User policies (TOML): 3 + priority/1000 (e.g., priority 100 → 3.100)
# - Admin policies (TOML): 4 + priority/1000 (e.g., priority 100 → 4.100)
# - Extension policies (TOML): 2 + priority/1000 (e.g., priority 100 → 2.100)
# - Workspace policies (TOML): 3 + priority/1000 (e.g., priority 100 → 3.100)
# - User policies (TOML): 4 + priority/1000 (e.g., priority 100 → 4.100)
# - Admin policies (TOML): 5 + priority/1000 (e.g., priority 100 → 5.100)
#
# This ensures Admin > User > Workspace > Default hierarchy is always preserved,
# This ensures Admin > User > Workspace > Extension > Default hierarchy is always preserved,
# while allowing user-specified priorities to work within each tier.
#
# Settings-based and dynamic rules (all in user tier 3.x):
# 3.95: Tools that the user has selected as "Always Allow" in the interactive UI
# 3.9: MCP servers excluded list (security: persistent server blocks)
# 3.4: Command line flag --exclude-tools (explicit temporary blocks)
# 3.3: Command line flag --allowed-tools (explicit temporary allows)
# 3.2: MCP servers with trust=true (persistent trusted servers)
# 3.1: MCP servers allowed list (persistent general server allows)
# Settings-based and dynamic rules (all in user tier 4.x):
# 4.95: Tools that the user has selected as "Always Allow" in the interactive UI
# 4.9: MCP servers excluded list (security: persistent server blocks)
# 4.4: Command line flag --exclude-tools (explicit temporary blocks)
# 4.3: Command line flag --allowed-tools (explicit temporary allows)
# 4.2: MCP servers with trust=true (persistent trusted servers)
# 4.1: MCP servers allowed list (persistent general server allows)
#
# TOML policy priorities (before transformation):
# 10: Write tools default to ASK_USER (becomes 1.010 in default tier)
@@ -26,6 +27,33 @@
# 70: Plan mode explicit ALLOW override (becomes 1.070 in default tier)
# 999: YOLO mode allow-all (becomes 1.999 in default tier)
# Mode Transitions (into/out of Plan Mode)
[[rule]]
toolName = "enter_plan_mode"
decision = "ask_user"
priority = 50
[[rule]]
toolName = "enter_plan_mode"
decision = "deny"
priority = 70
modes = ["plan"]
deny_message = "You are already in Plan Mode."
[[rule]]
toolName = "exit_plan_mode"
decision = "ask_user"
priority = 70
modes = ["plan"]
[[rule]]
toolName = "exit_plan_mode"
decision = "deny"
priority = 50
deny_message = "You are not currently in Plan Mode. Use enter_plan_mode first to design a plan."
# Catch-All: Deny everything by default in Plan mode.
[[rule]]
@@ -50,7 +78,7 @@ priority = 70
modes = ["plan"]
[[rule]]
toolName = ["ask_user", "exit_plan_mode", "save_memory"]
toolName = ["ask_user", "save_memory"]
decision = "ask_user"
priority = 70
modes = ["plan"]
@@ -5,20 +5,21 @@
#
# Priority bands (tiers):
# - Default policies (TOML): 1 + priority/1000 (e.g., priority 100 → 1.100)
# - Workspace policies (TOML): 2 + priority/1000 (e.g., priority 100 → 2.100)
# - User policies (TOML): 3 + priority/1000 (e.g., priority 100 → 3.100)
# - Admin policies (TOML): 4 + priority/1000 (e.g., priority 100 → 4.100)
# - Extension policies (TOML): 2 + priority/1000 (e.g., priority 100 → 2.100)
# - Workspace policies (TOML): 3 + priority/1000 (e.g., priority 100 → 3.100)
# - User policies (TOML): 4 + priority/1000 (e.g., priority 100 → 4.100)
# - Admin policies (TOML): 5 + priority/1000 (e.g., priority 100 → 5.100)
#
# This ensures Admin > User > Workspace > Default hierarchy is always preserved,
# This ensures Admin > User > Workspace > Extension > Default hierarchy is always preserved,
# while allowing user-specified priorities to work within each tier.
#
# Settings-based and dynamic rules (all in user tier 3.x):
# 3.95: Tools that the user has selected as "Always Allow" in the interactive UI
# 3.9: MCP servers excluded list (security: persistent server blocks)
# 3.4: Command line flag --exclude-tools (explicit temporary blocks)
# 3.3: Command line flag --allowed-tools (explicit temporary allows)
# 3.2: MCP servers with trust=true (persistent trusted servers)
# 3.1: MCP servers allowed list (persistent general server allows)
# Settings-based and dynamic rules (all in user tier 4.x):
# 4.95: Tools that the user has selected as "Always Allow" in the interactive UI
# 4.9: MCP servers excluded list (security: persistent server blocks)
# 4.4: Command line flag --exclude-tools (explicit temporary blocks)
# 4.3: Command line flag --allowed-tools (explicit temporary allows)
# 4.2: MCP servers with trust=true (persistent trusted servers)
# 4.1: MCP servers allowed list (persistent general server allows)
#
# TOML policy priorities (before transformation):
# 10: Write tools default to ASK_USER (becomes 1.010 in default tier)
+12 -11
View File
@@ -5,20 +5,21 @@
#
# Priority bands (tiers):
# - Default policies (TOML): 1 + priority/1000 (e.g., priority 100 → 1.100)
# - Workspace policies (TOML): 2 + priority/1000 (e.g., priority 100 → 2.100)
# - User policies (TOML): 3 + priority/1000 (e.g., priority 100 → 3.100)
# - Admin policies (TOML): 4 + priority/1000 (e.g., priority 100 → 4.100)
# - Extension policies (TOML): 2 + priority/1000 (e.g., priority 100 → 2.100)
# - Workspace policies (TOML): 3 + priority/1000 (e.g., priority 100 → 3.100)
# - User policies (TOML): 4 + priority/1000 (e.g., priority 100 → 4.100)
# - Admin policies (TOML): 5 + priority/1000 (e.g., priority 100 → 5.100)
#
# This ensures Admin > User > Workspace > Default hierarchy is always preserved,
# This ensures Admin > User > Workspace > Extension > Default hierarchy is always preserved,
# while allowing user-specified priorities to work within each tier.
#
# Settings-based and dynamic rules (all in user tier 3.x):
# 3.95: Tools that the user has selected as "Always Allow" in the interactive UI
# 3.9: MCP servers excluded list (security: persistent server blocks)
# 3.4: Command line flag --exclude-tools (explicit temporary blocks)
# 3.3: Command line flag --allowed-tools (explicit temporary allows)
# 3.2: MCP servers with trust=true (persistent trusted servers)
# 3.1: MCP servers allowed list (persistent general server allows)
# Settings-based and dynamic rules (all in user tier 4.x):
# 4.95: Tools that the user has selected as "Always Allow" in the interactive UI
# 4.9: MCP servers excluded list (security: persistent server blocks)
# 4.4: Command line flag --exclude-tools (explicit temporary blocks)
# 4.3: Command line flag --allowed-tools (explicit temporary allows)
# 4.2: MCP servers with trust=true (persistent trusted servers)
# 4.1: MCP servers allowed list (persistent general server allows)
#
# TOML policy priorities (before transformation):
# 10: Write tools default to ASK_USER (becomes 1.010 in default tier)
+21 -11
View File
@@ -5,20 +5,21 @@
#
# Priority bands (tiers):
# - Default policies (TOML): 1 + priority/1000 (e.g., priority 100 → 1.100)
# - Workspace policies (TOML): 2 + priority/1000 (e.g., priority 100 → 2.100)
# - User policies (TOML): 3 + priority/1000 (e.g., priority 100 → 3.100)
# - Admin policies (TOML): 4 + priority/1000 (e.g., priority 100 → 4.100)
# - Extension policies (TOML): 2 + priority/1000 (e.g., priority 100 → 2.100)
# - Workspace policies (TOML): 3 + priority/1000 (e.g., priority 100 → 3.100)
# - User policies (TOML): 4 + priority/1000 (e.g., priority 100 → 4.100)
# - Admin policies (TOML): 5 + priority/1000 (e.g., priority 100 → 5.100)
#
# This ensures Admin > User > Workspace > Default hierarchy is always preserved,
# This ensures Admin > User > Workspace > Extension > Default hierarchy is always preserved,
# while allowing user-specified priorities to work within each tier.
#
# Settings-based and dynamic rules (all in user tier 3.x):
# 3.95: Tools that the user has selected as "Always Allow" in the interactive UI
# 3.9: MCP servers excluded list (security: persistent server blocks)
# 3.4: Command line flag --exclude-tools (explicit temporary blocks)
# 3.3: Command line flag --allowed-tools (explicit temporary allows)
# 3.2: MCP servers with trust=true (persistent trusted servers)
# 3.1: MCP servers allowed list (persistent general server allows)
# Settings-based and dynamic rules (all in user tier 4.x):
# 4.95: Tools that the user has selected as "Always Allow" in the interactive UI
# 4.9: MCP servers excluded list (security: persistent server blocks)
# 4.4: Command line flag --exclude-tools (explicit temporary blocks)
# 4.3: Command line flag --allowed-tools (explicit temporary allows)
# 4.2: MCP servers with trust=true (persistent trusted servers)
# 4.1: MCP servers allowed list (persistent general server allows)
#
# TOML policy priorities (before transformation):
# 10: Write tools default to ASK_USER (becomes 1.010 in default tier)
@@ -36,6 +37,15 @@ decision = "ask_user"
priority = 999
modes = ["yolo"]
# Plan mode transitions are blocked in YOLO mode to maintain state consistency
# and because planning currently requires human interaction (plan approval),
# which conflicts with YOLO's autonomous nature.
[[rule]]
toolName = ["enter_plan_mode", "exit_plan_mode"]
decision = "deny"
priority = 999
modes = ["yolo"]
# Allow everything else in YOLO mode
[[rule]]
decision = "allow"
@@ -2808,6 +2808,82 @@ describe('PolicyEngine', () => {
'Execution of scripts (including those from skills) is blocked',
);
});
it('should deny enter_plan_mode when already in PLAN mode', async () => {
const rules: PolicyRule[] = [
{
toolName: 'enter_plan_mode',
decision: PolicyDecision.DENY,
priority: 70,
modes: [ApprovalMode.PLAN],
denyMessage: 'You are already in Plan Mode.',
},
];
engine = new PolicyEngine({
rules,
approvalMode: ApprovalMode.PLAN,
});
const result = await engine.check({ name: 'enter_plan_mode' }, undefined);
expect(result.decision).toBe(PolicyDecision.DENY);
expect(result.rule?.denyMessage).toBe('You are already in Plan Mode.');
});
it('should deny exit_plan_mode when in DEFAULT mode', async () => {
const rules: PolicyRule[] = [
{
toolName: 'exit_plan_mode',
decision: PolicyDecision.DENY,
priority: 10,
modes: [ApprovalMode.DEFAULT],
denyMessage: 'You are not in Plan Mode.',
},
];
engine = new PolicyEngine({
rules,
approvalMode: ApprovalMode.DEFAULT,
});
const result = await engine.check({ name: 'exit_plan_mode' }, undefined);
expect(result.decision).toBe(PolicyDecision.DENY);
expect(result.rule?.denyMessage).toBe('You are not in Plan Mode.');
});
it('should deny both plan tools in YOLO mode', async () => {
const rules: PolicyRule[] = [
{
toolName: 'enter_plan_mode',
decision: PolicyDecision.DENY,
priority: 999,
modes: [ApprovalMode.YOLO],
},
{
toolName: 'exit_plan_mode',
decision: PolicyDecision.DENY,
priority: 999,
modes: [ApprovalMode.YOLO],
},
];
engine = new PolicyEngine({
rules,
approvalMode: ApprovalMode.YOLO,
});
const resultEnter = await engine.check(
{ name: 'enter_plan_mode' },
undefined,
);
expect(resultEnter.decision).toBe(PolicyDecision.DENY);
const resultExit = await engine.check(
{ name: 'exit_plan_mode' },
undefined,
);
expect(resultExit.decision).toBe(PolicyDecision.DENY);
});
});
describe('removeRulesByTier', () => {
+3 -1
View File
@@ -28,9 +28,11 @@ export class AskUserTool extends BaseDeclarativeTool<
AskUserParams,
ToolResult
> {
static readonly Name = ASK_USER_TOOL_NAME;
constructor(messageBus: MessageBus) {
super(
ASK_USER_TOOL_NAME,
AskUserTool.Name,
ASK_USER_DISPLAY_NAME,
ASK_USER_DEFINITION.base.description!,
Kind.Communicate,
+3 -1
View File
@@ -27,12 +27,14 @@ export class EnterPlanModeTool extends BaseDeclarativeTool<
EnterPlanModeParams,
ToolResult
> {
static readonly Name = ENTER_PLAN_MODE_TOOL_NAME;
constructor(
private config: Config,
messageBus: MessageBus,
) {
super(
ENTER_PLAN_MODE_TOOL_NAME,
EnterPlanModeTool.Name,
'Enter Plan Mode',
ENTER_PLAN_MODE_DEFINITION.base.description!,
Kind.Plan,
+3 -1
View File
@@ -35,6 +35,8 @@ export class ExitPlanModeTool extends BaseDeclarativeTool<
ExitPlanModeParams,
ToolResult
> {
static readonly Name = EXIT_PLAN_MODE_TOOL_NAME;
constructor(
private config: Config,
messageBus: MessageBus,
@@ -42,7 +44,7 @@ export class ExitPlanModeTool extends BaseDeclarativeTool<
const plansDir = config.storage.getPlansDir();
const definition = getExitPlanModeDefinition(plansDir);
super(
EXIT_PLAN_MODE_TOOL_NAME,
ExitPlanModeTool.Name,
'Exit Plan Mode',
definition.base.description!,
Kind.Plan,