From f5bd474e519a3f45706642da170e32ea44c46bbf Mon Sep 17 00:00:00 2001 From: Allen Hutchison Date: Wed, 5 Nov 2025 10:10:23 -0800 Subject: [PATCH] fix(core): prevent server name spoofing in policy engine (#12511) --- .../config/policy-engine.integration.test.ts | 134 ++++++---- .../core/src/confirmation-bus/message-bus.ts | 5 +- packages/core/src/confirmation-bus/types.ts | 1 + .../core/src/policy/policy-engine.test.ts | 234 +++++++++++++----- packages/core/src/policy/policy-engine.ts | 16 +- .../src/tools/base-tool-invocation.test.ts | 131 ++++++++++ packages/core/src/tools/mcp-tool.ts | 9 +- packages/core/src/tools/tools.ts | 2 + 8 files changed, 411 insertions(+), 121 deletions(-) create mode 100644 packages/core/src/tools/base-tool-invocation.test.ts diff --git a/packages/cli/src/config/policy-engine.integration.test.ts b/packages/cli/src/config/policy-engine.integration.test.ts index 9b8457bc33..0c22cfeba9 100644 --- a/packages/cli/src/config/policy-engine.integration.test.ts +++ b/packages/cli/src/config/policy-engine.integration.test.ts @@ -30,18 +30,22 @@ describe('Policy Engine Integration Tests', () => { const engine = new PolicyEngine(config); // Allowed tool should be allowed - expect(engine.check({ name: 'run_shell_command' })).toBe( + expect(engine.check({ name: 'run_shell_command' }, undefined)).toBe( PolicyDecision.ALLOW, ); // Excluded tool should be denied - expect(engine.check({ name: 'write_file' })).toBe(PolicyDecision.DENY); + expect(engine.check({ name: 'write_file' }, undefined)).toBe( + PolicyDecision.DENY, + ); // Other write tools should ask user - expect(engine.check({ name: 'replace' })).toBe(PolicyDecision.ASK_USER); + expect(engine.check({ name: 'replace' }, undefined)).toBe( + PolicyDecision.ASK_USER, + ); // Unknown tools should use default - expect(engine.check({ name: 'unknown_tool' })).toBe( + expect(engine.check({ name: 'unknown_tool' }, undefined)).toBe( PolicyDecision.ASK_USER, ); }); @@ -68,31 +72,31 @@ describe('Policy Engine Integration Tests', () => { const engine = new PolicyEngine(config); // Tools from allowed server should be allowed - expect(engine.check({ name: 'allowed-server__tool1' })).toBe( - PolicyDecision.ALLOW, - ); - expect(engine.check({ name: 'allowed-server__another_tool' })).toBe( + expect(engine.check({ name: 'allowed-server__tool1' }, undefined)).toBe( PolicyDecision.ALLOW, ); + expect( + engine.check({ name: 'allowed-server__another_tool' }, undefined), + ).toBe(PolicyDecision.ALLOW); // Tools from trusted server should be allowed - expect(engine.check({ name: 'trusted-server__tool1' })).toBe( - PolicyDecision.ALLOW, - ); - expect(engine.check({ name: 'trusted-server__special_tool' })).toBe( + expect(engine.check({ name: 'trusted-server__tool1' }, undefined)).toBe( PolicyDecision.ALLOW, ); + expect( + engine.check({ name: 'trusted-server__special_tool' }, undefined), + ).toBe(PolicyDecision.ALLOW); // Tools from blocked server should be denied - expect(engine.check({ name: 'blocked-server__tool1' })).toBe( - PolicyDecision.DENY, - ); - expect(engine.check({ name: 'blocked-server__any_tool' })).toBe( + expect(engine.check({ name: 'blocked-server__tool1' }, undefined)).toBe( PolicyDecision.DENY, ); + expect( + engine.check({ name: 'blocked-server__any_tool' }, undefined), + ).toBe(PolicyDecision.DENY); // Tools from unknown servers should use default - expect(engine.check({ name: 'unknown-server__tool' })).toBe( + expect(engine.check({ name: 'unknown-server__tool' }, undefined)).toBe( PolicyDecision.ASK_USER, ); }); @@ -114,13 +118,13 @@ describe('Policy Engine Integration Tests', () => { const engine = new PolicyEngine(config); // MCP server allowed (priority 2.1) provides general allow for server - expect(engine.check({ name: 'my-server__safe-tool' })).toBe( + expect(engine.check({ name: 'my-server__safe-tool' }, undefined)).toBe( PolicyDecision.ALLOW, ); // But specific tool exclude (priority 2.4) wins over server allow - expect(engine.check({ name: 'my-server__dangerous-tool' })).toBe( - PolicyDecision.DENY, - ); + expect( + engine.check({ name: 'my-server__dangerous-tool' }, undefined), + ).toBe(PolicyDecision.DENY); }); it('should handle complex mixed configurations', async () => { @@ -150,36 +154,44 @@ describe('Policy Engine Integration Tests', () => { const engine = new PolicyEngine(config); // Read-only tools should be allowed (autoAccept) - expect(engine.check({ name: 'read_file' })).toBe(PolicyDecision.ALLOW); - expect(engine.check({ name: 'list_directory' })).toBe( + expect(engine.check({ name: 'read_file' }, undefined)).toBe( + PolicyDecision.ALLOW, + ); + expect(engine.check({ name: 'list_directory' }, undefined)).toBe( PolicyDecision.ALLOW, ); // But glob is explicitly excluded, so it should be denied - expect(engine.check({ name: 'glob' })).toBe(PolicyDecision.DENY); + expect(engine.check({ name: 'glob' }, undefined)).toBe( + PolicyDecision.DENY, + ); // Replace should ask user (normal write tool behavior) - expect(engine.check({ name: 'replace' })).toBe(PolicyDecision.ASK_USER); + expect(engine.check({ name: 'replace' }, undefined)).toBe( + PolicyDecision.ASK_USER, + ); // Explicitly allowed tools - expect(engine.check({ name: 'custom-tool' })).toBe(PolicyDecision.ALLOW); - expect(engine.check({ name: 'my-server__special-tool' })).toBe( + expect(engine.check({ name: 'custom-tool' }, undefined)).toBe( + PolicyDecision.ALLOW, + ); + expect(engine.check({ name: 'my-server__special-tool' }, undefined)).toBe( PolicyDecision.ALLOW, ); // MCP server tools - expect(engine.check({ name: 'allowed-server__tool' })).toBe( + expect(engine.check({ name: 'allowed-server__tool' }, undefined)).toBe( PolicyDecision.ALLOW, ); - expect(engine.check({ name: 'trusted-server__tool' })).toBe( + expect(engine.check({ name: 'trusted-server__tool' }, undefined)).toBe( PolicyDecision.ALLOW, ); - expect(engine.check({ name: 'blocked-server__tool' })).toBe( + expect(engine.check({ name: 'blocked-server__tool' }, undefined)).toBe( PolicyDecision.DENY, ); // Write tools should ask by default - expect(engine.check({ name: 'write_file' })).toBe( + expect(engine.check({ name: 'write_file' }, undefined)).toBe( PolicyDecision.ASK_USER, ); }); @@ -198,14 +210,18 @@ describe('Policy Engine Integration Tests', () => { const engine = new PolicyEngine(config); // Most tools should be allowed in YOLO mode - expect(engine.check({ name: 'run_shell_command' })).toBe( + expect(engine.check({ name: 'run_shell_command' }, undefined)).toBe( + PolicyDecision.ALLOW, + ); + expect(engine.check({ name: 'write_file' }, undefined)).toBe( + PolicyDecision.ALLOW, + ); + expect(engine.check({ name: 'unknown_tool' }, undefined)).toBe( PolicyDecision.ALLOW, ); - expect(engine.check({ name: 'write_file' })).toBe(PolicyDecision.ALLOW); - expect(engine.check({ name: 'unknown_tool' })).toBe(PolicyDecision.ALLOW); // But explicitly excluded tools should still be denied - expect(engine.check({ name: 'dangerous-tool' })).toBe( + expect(engine.check({ name: 'dangerous-tool' }, undefined)).toBe( PolicyDecision.DENY, ); }); @@ -220,11 +236,15 @@ describe('Policy Engine Integration Tests', () => { const engine = new PolicyEngine(config); // Edit tools should be allowed in AUTO_EDIT mode - expect(engine.check({ name: 'replace' })).toBe(PolicyDecision.ALLOW); - expect(engine.check({ name: 'write_file' })).toBe(PolicyDecision.ALLOW); + expect(engine.check({ name: 'replace' }, undefined)).toBe( + PolicyDecision.ALLOW, + ); + expect(engine.check({ name: 'write_file' }, undefined)).toBe( + PolicyDecision.ALLOW, + ); // Other tools should follow normal rules - expect(engine.check({ name: 'run_shell_command' })).toBe( + expect(engine.check({ name: 'run_shell_command' }, undefined)).toBe( PolicyDecision.ASK_USER, ); }); @@ -285,20 +305,24 @@ describe('Policy Engine Integration Tests', () => { expect(readOnlyToolRule?.priority).toBeCloseTo(1.05, 5); // Verify the engine applies these priorities correctly - expect(engine.check({ name: 'blocked-tool' })).toBe(PolicyDecision.DENY); - expect(engine.check({ name: 'blocked-server__any' })).toBe( + expect(engine.check({ name: 'blocked-tool' }, undefined)).toBe( PolicyDecision.DENY, ); - expect(engine.check({ name: 'specific-tool' })).toBe( + expect(engine.check({ name: 'blocked-server__any' }, undefined)).toBe( + PolicyDecision.DENY, + ); + expect(engine.check({ name: 'specific-tool' }, undefined)).toBe( PolicyDecision.ALLOW, ); - expect(engine.check({ name: 'trusted-server__any' })).toBe( + expect(engine.check({ name: 'trusted-server__any' }, undefined)).toBe( PolicyDecision.ALLOW, ); - expect(engine.check({ name: 'mcp-server__any' })).toBe( + expect(engine.check({ name: 'mcp-server__any' }, undefined)).toBe( + PolicyDecision.ALLOW, + ); + expect(engine.check({ name: 'glob' }, undefined)).toBe( PolicyDecision.ALLOW, ); - expect(engine.check({ name: 'glob' })).toBe(PolicyDecision.ALLOW); }); it('should handle edge case: MCP server with both trust and exclusion', async () => { @@ -322,7 +346,7 @@ describe('Policy Engine Integration Tests', () => { const engine = new PolicyEngine(config); // Exclusion (195) should win over trust (90) - expect(engine.check({ name: 'conflicted-server__tool' })).toBe( + expect(engine.check({ name: 'conflicted-server__tool' }, undefined)).toBe( PolicyDecision.DENY, ); }); @@ -345,10 +369,10 @@ describe('Policy Engine Integration Tests', () => { // Server exclusion (195) wins over specific tool allow (100) // This might be counterintuitive but follows the priority system - expect(engine.check({ name: 'my-server__special-tool' })).toBe( + expect(engine.check({ name: 'my-server__special-tool' }, undefined)).toBe( PolicyDecision.DENY, ); - expect(engine.check({ name: 'my-server__other-tool' })).toBe( + expect(engine.check({ name: 'my-server__other-tool' }, undefined)).toBe( PolicyDecision.DENY, ); }); @@ -365,8 +389,10 @@ describe('Policy Engine Integration Tests', () => { const engine = new PolicyEngine(engineConfig); // ASK_USER should become DENY in non-interactive mode - expect(engine.check({ name: 'unknown_tool' })).toBe(PolicyDecision.DENY); - expect(engine.check({ name: 'run_shell_command' })).toBe( + expect(engine.check({ name: 'unknown_tool' }, undefined)).toBe( + PolicyDecision.DENY, + ); + expect(engine.check({ name: 'run_shell_command' }, undefined)).toBe( PolicyDecision.DENY, ); }); @@ -381,13 +407,17 @@ describe('Policy Engine Integration Tests', () => { const engine = new PolicyEngine(config); // Should have default rules for write tools - expect(engine.check({ name: 'write_file' })).toBe( + expect(engine.check({ name: 'write_file' }, undefined)).toBe( + PolicyDecision.ASK_USER, + ); + expect(engine.check({ name: 'replace' }, undefined)).toBe( PolicyDecision.ASK_USER, ); - expect(engine.check({ name: 'replace' })).toBe(PolicyDecision.ASK_USER); // Unknown tools should use default - expect(engine.check({ name: 'unknown' })).toBe(PolicyDecision.ASK_USER); + expect(engine.check({ name: 'unknown' }, undefined)).toBe( + PolicyDecision.ASK_USER, + ); }); it('should verify rules are created with correct priorities', async () => { diff --git a/packages/core/src/confirmation-bus/message-bus.ts b/packages/core/src/confirmation-bus/message-bus.ts index b48129b412..cd293a72a6 100644 --- a/packages/core/src/confirmation-bus/message-bus.ts +++ b/packages/core/src/confirmation-bus/message-bus.ts @@ -50,7 +50,10 @@ export class MessageBus extends EventEmitter { } if (message.type === MessageBusType.TOOL_CONFIRMATION_REQUEST) { - const decision = this.policyEngine.check(message.toolCall); + const decision = this.policyEngine.check( + message.toolCall, + message.serverName, + ); switch (decision) { case PolicyDecision.ALLOW: diff --git a/packages/core/src/confirmation-bus/types.ts b/packages/core/src/confirmation-bus/types.ts index 2b4bcf5685..52d7bd2e9f 100644 --- a/packages/core/src/confirmation-bus/types.ts +++ b/packages/core/src/confirmation-bus/types.ts @@ -19,6 +19,7 @@ export interface ToolConfirmationRequest { type: MessageBusType.TOOL_CONFIRMATION_REQUEST; toolCall: FunctionCall; correlationId: string; + serverName?: string; } export interface ToolConfirmationResponse { diff --git a/packages/core/src/policy/policy-engine.test.ts b/packages/core/src/policy/policy-engine.test.ts index 03caf2524e..fd3a4b62b2 100644 --- a/packages/core/src/policy/policy-engine.test.ts +++ b/packages/core/src/policy/policy-engine.test.ts @@ -22,13 +22,13 @@ describe('PolicyEngine', () => { describe('constructor', () => { it('should use default config when none provided', () => { - const decision = engine.check({ name: 'test' }); + const decision = engine.check({ name: 'test' }, undefined); expect(decision).toBe(PolicyDecision.ASK_USER); }); it('should respect custom default decision', () => { engine = new PolicyEngine({ defaultDecision: PolicyDecision.DENY }); - const decision = engine.check({ name: 'test' }); + const decision = engine.check({ name: 'test' }, undefined); expect(decision).toBe(PolicyDecision.DENY); }); @@ -57,9 +57,15 @@ describe('PolicyEngine', () => { engine = new PolicyEngine({ rules }); - expect(engine.check({ name: 'shell' })).toBe(PolicyDecision.ALLOW); - expect(engine.check({ name: 'edit' })).toBe(PolicyDecision.DENY); - expect(engine.check({ name: 'other' })).toBe(PolicyDecision.ASK_USER); + expect(engine.check({ name: 'shell' }, undefined)).toBe( + PolicyDecision.ALLOW, + ); + expect(engine.check({ name: 'edit' }, undefined)).toBe( + PolicyDecision.DENY, + ); + expect(engine.check({ name: 'other' }, undefined)).toBe( + PolicyDecision.ASK_USER, + ); }); it('should match by args pattern', () => { @@ -87,8 +93,8 @@ describe('PolicyEngine', () => { args: { command: 'ls -la' }, }; - expect(engine.check(dangerousCall)).toBe(PolicyDecision.DENY); - expect(engine.check(safeCall)).toBe(PolicyDecision.ALLOW); + expect(engine.check(dangerousCall, undefined)).toBe(PolicyDecision.DENY); + expect(engine.check(safeCall, undefined)).toBe(PolicyDecision.ALLOW); }); it('should apply rules by priority', () => { @@ -100,7 +106,9 @@ describe('PolicyEngine', () => { engine = new PolicyEngine({ rules }); // Higher priority rule (ALLOW) should win - expect(engine.check({ name: 'shell' })).toBe(PolicyDecision.ALLOW); + expect(engine.check({ name: 'shell' }, undefined)).toBe( + PolicyDecision.ALLOW, + ); }); it('should apply wildcard rules (no toolName)', () => { @@ -111,8 +119,10 @@ describe('PolicyEngine', () => { engine = new PolicyEngine({ rules }); - expect(engine.check({ name: 'safe-tool' })).toBe(PolicyDecision.ALLOW); - expect(engine.check({ name: 'any-other-tool' })).toBe( + expect(engine.check({ name: 'safe-tool' }, undefined)).toBe( + PolicyDecision.ALLOW, + ); + expect(engine.check({ name: 'any-other-tool' }, undefined)).toBe( PolicyDecision.DENY, ); }); @@ -129,13 +139,17 @@ describe('PolicyEngine', () => { engine = new PolicyEngine(config); // ASK_USER should become DENY in non-interactive mode - expect(engine.check({ name: 'interactive-tool' })).toBe( + expect(engine.check({ name: 'interactive-tool' }, undefined)).toBe( PolicyDecision.DENY, ); // ALLOW should remain ALLOW - expect(engine.check({ name: 'allowed-tool' })).toBe(PolicyDecision.ALLOW); + expect(engine.check({ name: 'allowed-tool' }, undefined)).toBe( + PolicyDecision.ALLOW, + ); // Default ASK_USER should also become DENY - expect(engine.check({ name: 'unknown-tool' })).toBe(PolicyDecision.DENY); + expect(engine.check({ name: 'unknown-tool' }, undefined)).toBe( + PolicyDecision.DENY, + ); }); }); @@ -165,11 +179,15 @@ describe('PolicyEngine', () => { }); it('should apply newly added rules', () => { - expect(engine.check({ name: 'new-tool' })).toBe(PolicyDecision.ASK_USER); + expect(engine.check({ name: 'new-tool' }, undefined)).toBe( + PolicyDecision.ASK_USER, + ); engine.addRule({ toolName: 'new-tool', decision: PolicyDecision.ALLOW }); - expect(engine.check({ name: 'new-tool' })).toBe(PolicyDecision.ALLOW); + expect(engine.check({ name: 'new-tool' }, undefined)).toBe( + PolicyDecision.ALLOW, + ); }); }); @@ -235,29 +253,31 @@ describe('PolicyEngine', () => { engine = new PolicyEngine({ rules }); // Should match my-server tools - expect(engine.check({ name: 'my-server__tool1' })).toBe( + expect(engine.check({ name: 'my-server__tool1' }, undefined)).toBe( PolicyDecision.ALLOW, ); - expect(engine.check({ name: 'my-server__another_tool' })).toBe( + expect(engine.check({ name: 'my-server__another_tool' }, undefined)).toBe( PolicyDecision.ALLOW, ); // Should match blocked-server tools - expect(engine.check({ name: 'blocked-server__tool1' })).toBe( - PolicyDecision.DENY, - ); - expect(engine.check({ name: 'blocked-server__dangerous' })).toBe( + expect(engine.check({ name: 'blocked-server__tool1' }, undefined)).toBe( PolicyDecision.DENY, ); + expect( + engine.check({ name: 'blocked-server__dangerous' }, undefined), + ).toBe(PolicyDecision.DENY); // Should not match other patterns - expect(engine.check({ name: 'other-server__tool' })).toBe( + expect(engine.check({ name: 'other-server__tool' }, undefined)).toBe( PolicyDecision.ASK_USER, ); - expect(engine.check({ name: 'my-server-tool' })).toBe( + expect(engine.check({ name: 'my-server-tool' }, undefined)).toBe( PolicyDecision.ASK_USER, ); // No __ separator - expect(engine.check({ name: 'my-server' })).toBe(PolicyDecision.ASK_USER); // No tool name + expect(engine.check({ name: 'my-server' }, undefined)).toBe( + PolicyDecision.ASK_USER, + ); // No tool name }); it('should prioritize specific tool rules over server wildcards', () => { @@ -277,10 +297,62 @@ describe('PolicyEngine', () => { engine = new PolicyEngine({ rules }); // Specific tool deny should override server allow - expect(engine.check({ name: 'my-server__dangerous-tool' })).toBe( - PolicyDecision.DENY, + expect( + engine.check({ name: 'my-server__dangerous-tool' }, undefined), + ).toBe(PolicyDecision.DENY); + expect(engine.check({ name: 'my-server__safe-tool' }, undefined)).toBe( + PolicyDecision.ALLOW, ); - expect(engine.check({ name: 'my-server__safe-tool' })).toBe( + }); + + it('should NOT match spoofed server names when using wildcards', () => { + // Vulnerability: A rule for 'prefix__*' matches 'prefix__suffix__tool' + // effectively allowing a server named 'prefix__suffix' to spoof 'prefix'. + const rules: PolicyRule[] = [ + { + toolName: 'safe_server__*', + decision: PolicyDecision.ALLOW, + }, + ]; + engine = new PolicyEngine({ rules }); + + // A tool from a different server 'safe_server__malicious' + const spoofedToolCall = { name: 'safe_server__malicious__tool' }; + + // CURRENT BEHAVIOR (FIXED): Matches because it starts with 'safe_server__' BUT serverName doesn't match 'safe_server' + // We expect this to FAIL matching the ALLOW rule, thus falling back to default (ASK_USER) + expect(engine.check(spoofedToolCall, 'safe_server__malicious')).toBe( + PolicyDecision.ASK_USER, + ); + }); + + it('should verify tool name prefix even if serverName matches', () => { + const rules: PolicyRule[] = [ + { + toolName: 'safe_server__*', + decision: PolicyDecision.ALLOW, + }, + ]; + engine = new PolicyEngine({ rules }); + + // serverName matches, but tool name does not start with prefix + const invalidToolCall = { name: 'other_server__tool' }; + expect(engine.check(invalidToolCall, 'safe_server')).toBe( + PolicyDecision.ASK_USER, + ); + }); + + it('should allow when both serverName and tool name prefix match', () => { + const rules: PolicyRule[] = [ + { + toolName: 'safe_server__*', + decision: PolicyDecision.ALLOW, + }, + ]; + engine = new PolicyEngine({ rules }); + + const validToolCall = { name: 'safe_server__tool' }; + expect(engine.check(validToolCall, 'safe_server')).toBe( PolicyDecision.ALLOW, ); }); @@ -302,17 +374,19 @@ describe('PolicyEngine', () => { engine = new PolicyEngine({ rules }); // Matches highest priority rule (ls command) - expect(engine.check({ name: 'shell', args: { command: 'ls -la' } })).toBe( - PolicyDecision.ALLOW, - ); + expect( + engine.check({ name: 'shell', args: { command: 'ls -la' } }, undefined), + ).toBe(PolicyDecision.ALLOW); // Matches middle priority rule (shell without ls) - expect(engine.check({ name: 'shell', args: { command: 'pwd' } })).toBe( - PolicyDecision.ASK_USER, - ); + expect( + engine.check({ name: 'shell', args: { command: 'pwd' } }, undefined), + ).toBe(PolicyDecision.ASK_USER); // Matches lowest priority rule (not shell) - expect(engine.check({ name: 'edit' })).toBe(PolicyDecision.DENY); + expect(engine.check({ name: 'edit' }, undefined)).toBe( + PolicyDecision.DENY, + ); }); it('should handle tools with no args', () => { @@ -327,17 +401,19 @@ describe('PolicyEngine', () => { engine = new PolicyEngine({ rules }); // Tool call without args should not match pattern - expect(engine.check({ name: 'read' })).toBe(PolicyDecision.ASK_USER); - - // Tool call with args not matching pattern - expect(engine.check({ name: 'read', args: { file: 'public.txt' } })).toBe( + expect(engine.check({ name: 'read' }, undefined)).toBe( PolicyDecision.ASK_USER, ); + // Tool call with args not matching pattern + expect( + engine.check({ name: 'read', args: { file: 'public.txt' } }, undefined), + ).toBe(PolicyDecision.ASK_USER); + // Tool call with args matching pattern - expect(engine.check({ name: 'read', args: { file: 'secret.txt' } })).toBe( - PolicyDecision.DENY, - ); + expect( + engine.check({ name: 'read', args: { file: 'secret.txt' } }, undefined), + ).toBe(PolicyDecision.DENY); }); it('should match args pattern regardless of property order', () => { @@ -356,16 +432,16 @@ describe('PolicyEngine', () => { const args1 = { command: 'rm -rf /', path: '/home' }; const args2 = { path: '/home', command: 'rm -rf /' }; - expect(engine.check({ name: 'shell', args: args1 })).toBe( + expect(engine.check({ name: 'shell', args: args1 }, undefined)).toBe( PolicyDecision.DENY, ); - expect(engine.check({ name: 'shell', args: args2 })).toBe( + expect(engine.check({ name: 'shell', args: args2 }, undefined)).toBe( PolicyDecision.DENY, ); // Verify safe command doesn't match const safeArgs = { command: 'ls -la', path: '/home' }; - expect(engine.check({ name: 'shell', args: safeArgs })).toBe( + expect(engine.check({ name: 'shell', args: safeArgs }, undefined)).toBe( PolicyDecision.ASK_USER, ); }); @@ -391,10 +467,10 @@ describe('PolicyEngine', () => { data: { value: 'secret', sensitive: true }, }; - expect(engine.check({ name: 'api', args: args1 })).toBe( + expect(engine.check({ name: 'api', args: args1 }, undefined)).toBe( PolicyDecision.DENY, ); - expect(engine.check({ name: 'api', args: args2 })).toBe( + expect(engine.check({ name: 'api', args: args2 }, undefined)).toBe( PolicyDecision.DENY, ); }); @@ -424,17 +500,17 @@ describe('PolicyEngine', () => { // Should not throw stack overflow error expect(() => - engine.check({ name: 'test', args: circularArgs }), + engine.check({ name: 'test', args: circularArgs }, undefined), ).not.toThrow(); // Should detect the circular reference pattern - expect(engine.check({ name: 'test', args: circularArgs })).toBe( - PolicyDecision.DENY, - ); + expect( + engine.check({ name: 'test', args: circularArgs }, undefined), + ).toBe(PolicyDecision.DENY); // Non-circular object should not match const normalArgs = { name: 'test', data: { value: 'normal' } }; - expect(engine.check({ name: 'test', args: normalArgs })).toBe( + expect(engine.check({ name: 'test', args: normalArgs }, undefined)).toBe( PolicyDecision.ASK_USER, ); }); @@ -471,13 +547,13 @@ describe('PolicyEngine', () => { // Should handle without stack overflow expect(() => - engine.check({ name: 'deep', args: deepCircular }), + engine.check({ name: 'deep', args: deepCircular }, undefined), ).not.toThrow(); // Should detect the circular reference - expect(engine.check({ name: 'deep', args: deepCircular })).toBe( - PolicyDecision.DENY, - ); + expect( + engine.check({ name: 'deep', args: deepCircular }, undefined), + ).toBe(PolicyDecision.DENY); }); it('should handle repeated non-circular objects correctly', () => { @@ -506,7 +582,9 @@ describe('PolicyEngine', () => { }; // Should NOT mark repeated objects as circular, and should match the shared value pattern - expect(engine.check({ name: 'test', args })).toBe(PolicyDecision.ALLOW); + expect(engine.check({ name: 'test', args }, undefined)).toBe( + PolicyDecision.ALLOW, + ); }); it('should omit undefined and function values from objects', () => { @@ -528,7 +606,9 @@ describe('PolicyEngine', () => { }; // Should match pattern with defined value, undefined and functions omitted - expect(engine.check({ name: 'test', args })).toBe(PolicyDecision.ALLOW); + expect(engine.check({ name: 'test', args }, undefined)).toBe( + PolicyDecision.ALLOW, + ); // Check that the pattern would NOT match if undefined was included const rulesWithUndefined: PolicyRule[] = [ @@ -539,7 +619,7 @@ describe('PolicyEngine', () => { }, ]; engine = new PolicyEngine({ rules: rulesWithUndefined }); - expect(engine.check({ name: 'test', args })).toBe( + expect(engine.check({ name: 'test', args }, undefined)).toBe( PolicyDecision.ASK_USER, ); @@ -552,7 +632,7 @@ describe('PolicyEngine', () => { }, ]; engine = new PolicyEngine({ rules: rulesWithFunction }); - expect(engine.check({ name: 'test', args })).toBe( + expect(engine.check({ name: 'test', args }, undefined)).toBe( PolicyDecision.ASK_USER, ); }); @@ -573,7 +653,9 @@ describe('PolicyEngine', () => { }; // Should match pattern with undefined and functions converted to null - expect(engine.check({ name: 'test', args })).toBe(PolicyDecision.ALLOW); + expect(engine.check({ name: 'test', args }, undefined)).toBe( + PolicyDecision.ALLOW, + ); }); it('should produce valid JSON for all inputs', () => { @@ -607,10 +689,12 @@ describe('PolicyEngine', () => { engine = new PolicyEngine({ rules }); // Should not throw when checking (which internally uses stableStringify) - expect(() => engine.check({ name: 'test', args: input })).not.toThrow(); + expect(() => + engine.check({ name: 'test', args: input }, undefined), + ).not.toThrow(); // The check should succeed - expect(engine.check({ name: 'test', args: input })).toBe( + expect(engine.check({ name: 'test', args: input }, undefined)).toBe( PolicyDecision.ALLOW, ); } @@ -641,7 +725,9 @@ describe('PolicyEngine', () => { }; // Should match the sanitized pattern, not the dangerous one - expect(engine.check({ name: 'test', args })).toBe(PolicyDecision.ALLOW); + expect(engine.check({ name: 'test', args }, undefined)).toBe( + PolicyDecision.ALLOW, + ); }); it('should handle toJSON that returns primitives', () => { @@ -663,7 +749,9 @@ describe('PolicyEngine', () => { }; // toJSON returns a string, which should be properly stringified - expect(engine.check({ name: 'test', args })).toBe(PolicyDecision.ALLOW); + expect(engine.check({ name: 'test', args }, undefined)).toBe( + PolicyDecision.ALLOW, + ); }); it('should handle toJSON that throws an error', () => { @@ -687,7 +775,23 @@ describe('PolicyEngine', () => { }; // Should fall back to regular object serialization when toJSON throws - expect(engine.check({ name: 'test', args })).toBe(PolicyDecision.ALLOW); + expect(engine.check({ name: 'test', args }, undefined)).toBe( + PolicyDecision.ALLOW, + ); + }); + }); + + describe('serverName requirement', () => { + it('should require serverName for checks', () => { + // @ts-expect-error - intentionally testing missing serverName + expect(engine.check({ name: 'test' })).toBe(PolicyDecision.ASK_USER); + // When serverName is provided (even undefined), it should work + expect(engine.check({ name: 'test' }, undefined)).toBe( + PolicyDecision.ASK_USER, + ); + expect(engine.check({ name: 'test' }, 'some-server')).toBe( + PolicyDecision.ASK_USER, + ); }); }); }); diff --git a/packages/core/src/policy/policy-engine.ts b/packages/core/src/policy/policy-engine.ts index cc98cc5966..034cce2c8b 100644 --- a/packages/core/src/policy/policy-engine.ts +++ b/packages/core/src/policy/policy-engine.ts @@ -17,12 +17,21 @@ function ruleMatches( rule: PolicyRule, toolCall: FunctionCall, stringifiedArgs: string | undefined, + serverName: string | undefined, ): boolean { // Check tool name if specified if (rule.toolName) { // Support wildcard patterns: "serverName__*" matches "serverName__anyTool" if (rule.toolName.endsWith('__*')) { const prefix = rule.toolName.slice(0, -3); // Remove "__*" + if (serverName !== undefined) { + // Robust check: if serverName is provided, it MUST match the prefix exactly. + // This prevents "malicious-server" from spoofing "trusted-server" by naming itself "trusted-server__malicious". + if (serverName !== prefix) { + return false; + } + } + // Always verify the prefix, even if serverName matched if (!toolCall.name || !toolCall.name.startsWith(prefix + '__')) { return false; } @@ -65,7 +74,10 @@ export class PolicyEngine { /** * Check if a tool call is allowed based on the configured policies. */ - check(toolCall: FunctionCall): PolicyDecision { + check( + toolCall: FunctionCall, + serverName: string | undefined, + ): PolicyDecision { let stringifiedArgs: string | undefined; // Compute stringified args once before the loop if (toolCall.args && this.rules.some((rule) => rule.argsPattern)) { @@ -78,7 +90,7 @@ export class PolicyEngine { // Find the first matching rule (already sorted by priority) for (const rule of this.rules) { - if (ruleMatches(rule, toolCall, stringifiedArgs)) { + if (ruleMatches(rule, toolCall, stringifiedArgs, serverName)) { debugLogger.debug( `[PolicyEngine.check] MATCHED rule: toolName=${rule.toolName}, decision=${rule.decision}, priority=${rule.priority}, argsPattern=${rule.argsPattern?.source || 'none'}`, ); diff --git a/packages/core/src/tools/base-tool-invocation.test.ts b/packages/core/src/tools/base-tool-invocation.test.ts new file mode 100644 index 0000000000..38d651f076 --- /dev/null +++ b/packages/core/src/tools/base-tool-invocation.test.ts @@ -0,0 +1,131 @@ +/** + * @license + * Copyright 2025 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { describe, it, expect, vi, beforeEach } from 'vitest'; +import { BaseToolInvocation, type ToolResult } from './tools.js'; +import type { MessageBus } from '../confirmation-bus/message-bus.js'; +import { + type Message, + MessageBusType, + type ToolConfirmationRequest, + type ToolConfirmationResponse, +} from '../confirmation-bus/types.js'; + +class TestBaseToolInvocation extends BaseToolInvocation { + getDescription(): string { + return 'test description'; + } + async execute(): Promise { + return { llmContent: [], returnDisplay: '' }; + } +} + +describe('BaseToolInvocation', () => { + let messageBus: MessageBus; + let abortController: AbortController; + + beforeEach(() => { + messageBus = { + publish: vi.fn(), + subscribe: vi.fn(), + unsubscribe: vi.fn(), + } as unknown as MessageBus; + abortController = new AbortController(); + }); + + it('should propagate serverName to ToolConfirmationRequest', async () => { + const serverName = 'test-server'; + const tool = new TestBaseToolInvocation( + {}, + messageBus, + 'test-tool', + 'Test Tool', + serverName, + ); + + let capturedRequest: ToolConfirmationRequest | undefined; + vi.mocked(messageBus.publish).mockImplementation((request: Message) => { + if (request.type === MessageBusType.TOOL_CONFIRMATION_REQUEST) { + capturedRequest = request; + } + }); + + let responseHandler: + | ((response: ToolConfirmationResponse) => void) + | undefined; + vi.mocked(messageBus.subscribe).mockImplementation( + (type: MessageBusType, handler: (message: Message) => void) => { + if (type === MessageBusType.TOOL_CONFIRMATION_RESPONSE) { + responseHandler = handler as ( + response: ToolConfirmationResponse, + ) => void; + } + }, + ); + + const confirmationPromise = tool.shouldConfirmExecute( + abortController.signal, + ); + + // Wait for microtasks to ensure publish is called + await new Promise((resolve) => setTimeout(resolve, 0)); + + expect(messageBus.publish).toHaveBeenCalledTimes(1); + expect(capturedRequest).toBeDefined(); + expect(capturedRequest?.type).toBe( + MessageBusType.TOOL_CONFIRMATION_REQUEST, + ); + expect(capturedRequest?.serverName).toBe(serverName); + + // Simulate response to finish the promise cleanly + if (responseHandler && capturedRequest) { + responseHandler({ + type: MessageBusType.TOOL_CONFIRMATION_RESPONSE, + correlationId: capturedRequest.correlationId, + confirmed: true, + }); + } + + await confirmationPromise; + }); + + it('should NOT propagate serverName if not provided', async () => { + const tool = new TestBaseToolInvocation( + {}, + messageBus, + 'test-tool', + 'Test Tool', + // no serverName + ); + + let capturedRequest: ToolConfirmationRequest | undefined; + vi.mocked(messageBus.publish).mockImplementation((request: Message) => { + if (request.type === MessageBusType.TOOL_CONFIRMATION_REQUEST) { + capturedRequest = request; + } + }); + + // We need to mock subscribe to avoid hanging if we want to await the promise, + // but for this test we just need to check publish. + // We'll abort to clean up. + const confirmationPromise = tool.shouldConfirmExecute( + abortController.signal, + ); + + await new Promise((resolve) => setTimeout(resolve, 0)); + + expect(messageBus.publish).toHaveBeenCalledTimes(1); + expect(capturedRequest).toBeDefined(); + expect(capturedRequest?.serverName).toBeUndefined(); + + abortController.abort(); + try { + await confirmationPromise; + } catch { + // ignore abort error + } + }); +}); diff --git a/packages/core/src/tools/mcp-tool.ts b/packages/core/src/tools/mcp-tool.ts index 5949bdf5b2..f5c9205e9d 100644 --- a/packages/core/src/tools/mcp-tool.ts +++ b/packages/core/src/tools/mcp-tool.ts @@ -77,7 +77,14 @@ class DiscoveredMCPToolInvocation extends BaseToolInvocation< // Use composite format for policy checks: serverName__toolName // This enables server wildcards (e.g., "google-workspace__*") // while still allowing specific tool rules - super(params, messageBus, `${serverName}__${serverToolName}`, displayName); + + super( + params, + messageBus, + `${serverName}__${serverToolName}`, + displayName, + serverName, + ); } protected override async getConfirmationDetails( diff --git a/packages/core/src/tools/tools.ts b/packages/core/src/tools/tools.ts index 98605aea9b..59d1ef7baf 100644 --- a/packages/core/src/tools/tools.ts +++ b/packages/core/src/tools/tools.ts @@ -78,6 +78,7 @@ export abstract class BaseToolInvocation< protected readonly messageBus?: MessageBus, readonly _toolName?: string, readonly _toolDisplayName?: string, + readonly _serverName?: string, ) {} abstract getDescription(): string; @@ -215,6 +216,7 @@ export abstract class BaseToolInvocation< type: MessageBusType.TOOL_CONFIRMATION_REQUEST, toolCall, correlationId, + serverName: this._serverName, }; try {