From f5bd474e519a3f45706642da170e32ea44c46bbf Mon Sep 17 00:00:00 2001
From: Allen Hutchison <adh@google.com>
Date: Wed, 5 Nov 2025 10:10:23 -0800
Subject: [PATCH] fix(core): prevent server name spoofing in policy engine
 (#12511)

---
 .../config/policy-engine.integration.test.ts  | 134 ++++++----
 .../core/src/confirmation-bus/message-bus.ts  |   5 +-
 packages/core/src/confirmation-bus/types.ts   |   1 +
 .../core/src/policy/policy-engine.test.ts     | 234 +++++++++++++-----
 packages/core/src/policy/policy-engine.ts     |  16 +-
 .../src/tools/base-tool-invocation.test.ts    | 131 ++++++++++
 packages/core/src/tools/mcp-tool.ts           |   9 +-
 packages/core/src/tools/tools.ts              |   2 +
 8 files changed, 411 insertions(+), 121 deletions(-)
 create mode 100644 packages/core/src/tools/base-tool-invocation.test.ts

diff --git a/packages/cli/src/config/policy-engine.integration.test.ts b/packages/cli/src/config/policy-engine.integration.test.ts
index 9b8457bc33..0c22cfeba9 100644
--- a/packages/cli/src/config/policy-engine.integration.test.ts
+++ b/packages/cli/src/config/policy-engine.integration.test.ts
@@ -30,18 +30,22 @@ describe('Policy Engine Integration Tests', () => {
       const engine = new PolicyEngine(config);
 
       // Allowed tool should be allowed
-      expect(engine.check({ name: 'run_shell_command' })).toBe(
+      expect(engine.check({ name: 'run_shell_command' }, undefined)).toBe(
         PolicyDecision.ALLOW,
       );
 
       // Excluded tool should be denied
-      expect(engine.check({ name: 'write_file' })).toBe(PolicyDecision.DENY);
+      expect(engine.check({ name: 'write_file' }, undefined)).toBe(
+        PolicyDecision.DENY,
+      );
 
       // Other write tools should ask user
-      expect(engine.check({ name: 'replace' })).toBe(PolicyDecision.ASK_USER);
+      expect(engine.check({ name: 'replace' }, undefined)).toBe(
+        PolicyDecision.ASK_USER,
+      );
 
       // Unknown tools should use default
-      expect(engine.check({ name: 'unknown_tool' })).toBe(
+      expect(engine.check({ name: 'unknown_tool' }, undefined)).toBe(
         PolicyDecision.ASK_USER,
       );
     });
@@ -68,31 +72,31 @@ describe('Policy Engine Integration Tests', () => {
       const engine = new PolicyEngine(config);
 
       // Tools from allowed server should be allowed
-      expect(engine.check({ name: 'allowed-server__tool1' })).toBe(
-        PolicyDecision.ALLOW,
-      );
-      expect(engine.check({ name: 'allowed-server__another_tool' })).toBe(
+      expect(engine.check({ name: 'allowed-server__tool1' }, undefined)).toBe(
         PolicyDecision.ALLOW,
       );
+      expect(
+        engine.check({ name: 'allowed-server__another_tool' }, undefined),
+      ).toBe(PolicyDecision.ALLOW);
 
       // Tools from trusted server should be allowed
-      expect(engine.check({ name: 'trusted-server__tool1' })).toBe(
-        PolicyDecision.ALLOW,
-      );
-      expect(engine.check({ name: 'trusted-server__special_tool' })).toBe(
+      expect(engine.check({ name: 'trusted-server__tool1' }, undefined)).toBe(
         PolicyDecision.ALLOW,
       );
+      expect(
+        engine.check({ name: 'trusted-server__special_tool' }, undefined),
+      ).toBe(PolicyDecision.ALLOW);
 
       // Tools from blocked server should be denied
-      expect(engine.check({ name: 'blocked-server__tool1' })).toBe(
-        PolicyDecision.DENY,
-      );
-      expect(engine.check({ name: 'blocked-server__any_tool' })).toBe(
+      expect(engine.check({ name: 'blocked-server__tool1' }, undefined)).toBe(
         PolicyDecision.DENY,
       );
+      expect(
+        engine.check({ name: 'blocked-server__any_tool' }, undefined),
+      ).toBe(PolicyDecision.DENY);
 
       // Tools from unknown servers should use default
-      expect(engine.check({ name: 'unknown-server__tool' })).toBe(
+      expect(engine.check({ name: 'unknown-server__tool' }, undefined)).toBe(
         PolicyDecision.ASK_USER,
       );
     });
@@ -114,13 +118,13 @@ describe('Policy Engine Integration Tests', () => {
       const engine = new PolicyEngine(config);
 
       // MCP server allowed (priority 2.1) provides general allow for server
-      expect(engine.check({ name: 'my-server__safe-tool' })).toBe(
+      expect(engine.check({ name: 'my-server__safe-tool' }, undefined)).toBe(
         PolicyDecision.ALLOW,
       );
       // But specific tool exclude (priority 2.4) wins over server allow
-      expect(engine.check({ name: 'my-server__dangerous-tool' })).toBe(
-        PolicyDecision.DENY,
-      );
+      expect(
+        engine.check({ name: 'my-server__dangerous-tool' }, undefined),
+      ).toBe(PolicyDecision.DENY);
     });
 
     it('should handle complex mixed configurations', async () => {
@@ -150,36 +154,44 @@ describe('Policy Engine Integration Tests', () => {
       const engine = new PolicyEngine(config);
 
       // Read-only tools should be allowed (autoAccept)
-      expect(engine.check({ name: 'read_file' })).toBe(PolicyDecision.ALLOW);
-      expect(engine.check({ name: 'list_directory' })).toBe(
+      expect(engine.check({ name: 'read_file' }, undefined)).toBe(
+        PolicyDecision.ALLOW,
+      );
+      expect(engine.check({ name: 'list_directory' }, undefined)).toBe(
         PolicyDecision.ALLOW,
       );
 
       // But glob is explicitly excluded, so it should be denied
-      expect(engine.check({ name: 'glob' })).toBe(PolicyDecision.DENY);
+      expect(engine.check({ name: 'glob' }, undefined)).toBe(
+        PolicyDecision.DENY,
+      );
 
       // Replace should ask user (normal write tool behavior)
-      expect(engine.check({ name: 'replace' })).toBe(PolicyDecision.ASK_USER);
+      expect(engine.check({ name: 'replace' }, undefined)).toBe(
+        PolicyDecision.ASK_USER,
+      );
 
       // Explicitly allowed tools
-      expect(engine.check({ name: 'custom-tool' })).toBe(PolicyDecision.ALLOW);
-      expect(engine.check({ name: 'my-server__special-tool' })).toBe(
+      expect(engine.check({ name: 'custom-tool' }, undefined)).toBe(
+        PolicyDecision.ALLOW,
+      );
+      expect(engine.check({ name: 'my-server__special-tool' }, undefined)).toBe(
         PolicyDecision.ALLOW,
       );
 
       // MCP server tools
-      expect(engine.check({ name: 'allowed-server__tool' })).toBe(
+      expect(engine.check({ name: 'allowed-server__tool' }, undefined)).toBe(
         PolicyDecision.ALLOW,
       );
-      expect(engine.check({ name: 'trusted-server__tool' })).toBe(
+      expect(engine.check({ name: 'trusted-server__tool' }, undefined)).toBe(
         PolicyDecision.ALLOW,
       );
-      expect(engine.check({ name: 'blocked-server__tool' })).toBe(
+      expect(engine.check({ name: 'blocked-server__tool' }, undefined)).toBe(
         PolicyDecision.DENY,
       );
 
       // Write tools should ask by default
-      expect(engine.check({ name: 'write_file' })).toBe(
+      expect(engine.check({ name: 'write_file' }, undefined)).toBe(
         PolicyDecision.ASK_USER,
       );
     });
@@ -198,14 +210,18 @@ describe('Policy Engine Integration Tests', () => {
       const engine = new PolicyEngine(config);
 
       // Most tools should be allowed in YOLO mode
-      expect(engine.check({ name: 'run_shell_command' })).toBe(
+      expect(engine.check({ name: 'run_shell_command' }, undefined)).toBe(
+        PolicyDecision.ALLOW,
+      );
+      expect(engine.check({ name: 'write_file' }, undefined)).toBe(
+        PolicyDecision.ALLOW,
+      );
+      expect(engine.check({ name: 'unknown_tool' }, undefined)).toBe(
         PolicyDecision.ALLOW,
       );
-      expect(engine.check({ name: 'write_file' })).toBe(PolicyDecision.ALLOW);
-      expect(engine.check({ name: 'unknown_tool' })).toBe(PolicyDecision.ALLOW);
 
       // But explicitly excluded tools should still be denied
-      expect(engine.check({ name: 'dangerous-tool' })).toBe(
+      expect(engine.check({ name: 'dangerous-tool' }, undefined)).toBe(
         PolicyDecision.DENY,
       );
     });
@@ -220,11 +236,15 @@ describe('Policy Engine Integration Tests', () => {
       const engine = new PolicyEngine(config);
 
       // Edit tools should be allowed in AUTO_EDIT mode
-      expect(engine.check({ name: 'replace' })).toBe(PolicyDecision.ALLOW);
-      expect(engine.check({ name: 'write_file' })).toBe(PolicyDecision.ALLOW);
+      expect(engine.check({ name: 'replace' }, undefined)).toBe(
+        PolicyDecision.ALLOW,
+      );
+      expect(engine.check({ name: 'write_file' }, undefined)).toBe(
+        PolicyDecision.ALLOW,
+      );
 
       // Other tools should follow normal rules
-      expect(engine.check({ name: 'run_shell_command' })).toBe(
+      expect(engine.check({ name: 'run_shell_command' }, undefined)).toBe(
         PolicyDecision.ASK_USER,
       );
     });
@@ -285,20 +305,24 @@ describe('Policy Engine Integration Tests', () => {
       expect(readOnlyToolRule?.priority).toBeCloseTo(1.05, 5);
 
       // Verify the engine applies these priorities correctly
-      expect(engine.check({ name: 'blocked-tool' })).toBe(PolicyDecision.DENY);
-      expect(engine.check({ name: 'blocked-server__any' })).toBe(
+      expect(engine.check({ name: 'blocked-tool' }, undefined)).toBe(
         PolicyDecision.DENY,
       );
-      expect(engine.check({ name: 'specific-tool' })).toBe(
+      expect(engine.check({ name: 'blocked-server__any' }, undefined)).toBe(
+        PolicyDecision.DENY,
+      );
+      expect(engine.check({ name: 'specific-tool' }, undefined)).toBe(
         PolicyDecision.ALLOW,
       );
-      expect(engine.check({ name: 'trusted-server__any' })).toBe(
+      expect(engine.check({ name: 'trusted-server__any' }, undefined)).toBe(
         PolicyDecision.ALLOW,
       );
-      expect(engine.check({ name: 'mcp-server__any' })).toBe(
+      expect(engine.check({ name: 'mcp-server__any' }, undefined)).toBe(
+        PolicyDecision.ALLOW,
+      );
+      expect(engine.check({ name: 'glob' }, undefined)).toBe(
         PolicyDecision.ALLOW,
       );
-      expect(engine.check({ name: 'glob' })).toBe(PolicyDecision.ALLOW);
     });
 
     it('should handle edge case: MCP server with both trust and exclusion', async () => {
@@ -322,7 +346,7 @@ describe('Policy Engine Integration Tests', () => {
       const engine = new PolicyEngine(config);
 
       // Exclusion (195) should win over trust (90)
-      expect(engine.check({ name: 'conflicted-server__tool' })).toBe(
+      expect(engine.check({ name: 'conflicted-server__tool' }, undefined)).toBe(
         PolicyDecision.DENY,
       );
     });
@@ -345,10 +369,10 @@ describe('Policy Engine Integration Tests', () => {
 
       // Server exclusion (195) wins over specific tool allow (100)
       // This might be counterintuitive but follows the priority system
-      expect(engine.check({ name: 'my-server__special-tool' })).toBe(
+      expect(engine.check({ name: 'my-server__special-tool' }, undefined)).toBe(
         PolicyDecision.DENY,
       );
-      expect(engine.check({ name: 'my-server__other-tool' })).toBe(
+      expect(engine.check({ name: 'my-server__other-tool' }, undefined)).toBe(
         PolicyDecision.DENY,
       );
     });
@@ -365,8 +389,10 @@ describe('Policy Engine Integration Tests', () => {
       const engine = new PolicyEngine(engineConfig);
 
       // ASK_USER should become DENY in non-interactive mode
-      expect(engine.check({ name: 'unknown_tool' })).toBe(PolicyDecision.DENY);
-      expect(engine.check({ name: 'run_shell_command' })).toBe(
+      expect(engine.check({ name: 'unknown_tool' }, undefined)).toBe(
+        PolicyDecision.DENY,
+      );
+      expect(engine.check({ name: 'run_shell_command' }, undefined)).toBe(
         PolicyDecision.DENY,
       );
     });
@@ -381,13 +407,17 @@ describe('Policy Engine Integration Tests', () => {
       const engine = new PolicyEngine(config);
 
       // Should have default rules for write tools
-      expect(engine.check({ name: 'write_file' })).toBe(
+      expect(engine.check({ name: 'write_file' }, undefined)).toBe(
+        PolicyDecision.ASK_USER,
+      );
+      expect(engine.check({ name: 'replace' }, undefined)).toBe(
         PolicyDecision.ASK_USER,
       );
-      expect(engine.check({ name: 'replace' })).toBe(PolicyDecision.ASK_USER);
 
       // Unknown tools should use default
-      expect(engine.check({ name: 'unknown' })).toBe(PolicyDecision.ASK_USER);
+      expect(engine.check({ name: 'unknown' }, undefined)).toBe(
+        PolicyDecision.ASK_USER,
+      );
     });
 
     it('should verify rules are created with correct priorities', async () => {
diff --git a/packages/core/src/confirmation-bus/message-bus.ts b/packages/core/src/confirmation-bus/message-bus.ts
index b48129b412..cd293a72a6 100644
--- a/packages/core/src/confirmation-bus/message-bus.ts
+++ b/packages/core/src/confirmation-bus/message-bus.ts
@@ -50,7 +50,10 @@ export class MessageBus extends EventEmitter {
       }
 
       if (message.type === MessageBusType.TOOL_CONFIRMATION_REQUEST) {
-        const decision = this.policyEngine.check(message.toolCall);
+        const decision = this.policyEngine.check(
+          message.toolCall,
+          message.serverName,
+        );
 
         switch (decision) {
           case PolicyDecision.ALLOW:
diff --git a/packages/core/src/confirmation-bus/types.ts b/packages/core/src/confirmation-bus/types.ts
index 2b4bcf5685..52d7bd2e9f 100644
--- a/packages/core/src/confirmation-bus/types.ts
+++ b/packages/core/src/confirmation-bus/types.ts
@@ -19,6 +19,7 @@ export interface ToolConfirmationRequest {
   type: MessageBusType.TOOL_CONFIRMATION_REQUEST;
   toolCall: FunctionCall;
   correlationId: string;
+  serverName?: string;
 }
 
 export interface ToolConfirmationResponse {
diff --git a/packages/core/src/policy/policy-engine.test.ts b/packages/core/src/policy/policy-engine.test.ts
index 03caf2524e..fd3a4b62b2 100644
--- a/packages/core/src/policy/policy-engine.test.ts
+++ b/packages/core/src/policy/policy-engine.test.ts
@@ -22,13 +22,13 @@ describe('PolicyEngine', () => {
 
   describe('constructor', () => {
     it('should use default config when none provided', () => {
-      const decision = engine.check({ name: 'test' });
+      const decision = engine.check({ name: 'test' }, undefined);
       expect(decision).toBe(PolicyDecision.ASK_USER);
     });
 
     it('should respect custom default decision', () => {
       engine = new PolicyEngine({ defaultDecision: PolicyDecision.DENY });
-      const decision = engine.check({ name: 'test' });
+      const decision = engine.check({ name: 'test' }, undefined);
       expect(decision).toBe(PolicyDecision.DENY);
     });
 
@@ -57,9 +57,15 @@ describe('PolicyEngine', () => {
 
       engine = new PolicyEngine({ rules });
 
-      expect(engine.check({ name: 'shell' })).toBe(PolicyDecision.ALLOW);
-      expect(engine.check({ name: 'edit' })).toBe(PolicyDecision.DENY);
-      expect(engine.check({ name: 'other' })).toBe(PolicyDecision.ASK_USER);
+      expect(engine.check({ name: 'shell' }, undefined)).toBe(
+        PolicyDecision.ALLOW,
+      );
+      expect(engine.check({ name: 'edit' }, undefined)).toBe(
+        PolicyDecision.DENY,
+      );
+      expect(engine.check({ name: 'other' }, undefined)).toBe(
+        PolicyDecision.ASK_USER,
+      );
     });
 
     it('should match by args pattern', () => {
@@ -87,8 +93,8 @@ describe('PolicyEngine', () => {
         args: { command: 'ls -la' },
       };
 
-      expect(engine.check(dangerousCall)).toBe(PolicyDecision.DENY);
-      expect(engine.check(safeCall)).toBe(PolicyDecision.ALLOW);
+      expect(engine.check(dangerousCall, undefined)).toBe(PolicyDecision.DENY);
+      expect(engine.check(safeCall, undefined)).toBe(PolicyDecision.ALLOW);
     });
 
     it('should apply rules by priority', () => {
@@ -100,7 +106,9 @@ describe('PolicyEngine', () => {
       engine = new PolicyEngine({ rules });
 
       // Higher priority rule (ALLOW) should win
-      expect(engine.check({ name: 'shell' })).toBe(PolicyDecision.ALLOW);
+      expect(engine.check({ name: 'shell' }, undefined)).toBe(
+        PolicyDecision.ALLOW,
+      );
     });
 
     it('should apply wildcard rules (no toolName)', () => {
@@ -111,8 +119,10 @@ describe('PolicyEngine', () => {
 
       engine = new PolicyEngine({ rules });
 
-      expect(engine.check({ name: 'safe-tool' })).toBe(PolicyDecision.ALLOW);
-      expect(engine.check({ name: 'any-other-tool' })).toBe(
+      expect(engine.check({ name: 'safe-tool' }, undefined)).toBe(
+        PolicyDecision.ALLOW,
+      );
+      expect(engine.check({ name: 'any-other-tool' }, undefined)).toBe(
         PolicyDecision.DENY,
       );
     });
@@ -129,13 +139,17 @@ describe('PolicyEngine', () => {
       engine = new PolicyEngine(config);
 
       // ASK_USER should become DENY in non-interactive mode
-      expect(engine.check({ name: 'interactive-tool' })).toBe(
+      expect(engine.check({ name: 'interactive-tool' }, undefined)).toBe(
         PolicyDecision.DENY,
       );
       // ALLOW should remain ALLOW
-      expect(engine.check({ name: 'allowed-tool' })).toBe(PolicyDecision.ALLOW);
+      expect(engine.check({ name: 'allowed-tool' }, undefined)).toBe(
+        PolicyDecision.ALLOW,
+      );
       // Default ASK_USER should also become DENY
-      expect(engine.check({ name: 'unknown-tool' })).toBe(PolicyDecision.DENY);
+      expect(engine.check({ name: 'unknown-tool' }, undefined)).toBe(
+        PolicyDecision.DENY,
+      );
     });
   });
 
@@ -165,11 +179,15 @@ describe('PolicyEngine', () => {
     });
 
     it('should apply newly added rules', () => {
-      expect(engine.check({ name: 'new-tool' })).toBe(PolicyDecision.ASK_USER);
+      expect(engine.check({ name: 'new-tool' }, undefined)).toBe(
+        PolicyDecision.ASK_USER,
+      );
 
       engine.addRule({ toolName: 'new-tool', decision: PolicyDecision.ALLOW });
 
-      expect(engine.check({ name: 'new-tool' })).toBe(PolicyDecision.ALLOW);
+      expect(engine.check({ name: 'new-tool' }, undefined)).toBe(
+        PolicyDecision.ALLOW,
+      );
     });
   });
 
@@ -235,29 +253,31 @@ describe('PolicyEngine', () => {
       engine = new PolicyEngine({ rules });
 
       // Should match my-server tools
-      expect(engine.check({ name: 'my-server__tool1' })).toBe(
+      expect(engine.check({ name: 'my-server__tool1' }, undefined)).toBe(
         PolicyDecision.ALLOW,
       );
-      expect(engine.check({ name: 'my-server__another_tool' })).toBe(
+      expect(engine.check({ name: 'my-server__another_tool' }, undefined)).toBe(
         PolicyDecision.ALLOW,
       );
 
       // Should match blocked-server tools
-      expect(engine.check({ name: 'blocked-server__tool1' })).toBe(
-        PolicyDecision.DENY,
-      );
-      expect(engine.check({ name: 'blocked-server__dangerous' })).toBe(
+      expect(engine.check({ name: 'blocked-server__tool1' }, undefined)).toBe(
         PolicyDecision.DENY,
       );
+      expect(
+        engine.check({ name: 'blocked-server__dangerous' }, undefined),
+      ).toBe(PolicyDecision.DENY);
 
       // Should not match other patterns
-      expect(engine.check({ name: 'other-server__tool' })).toBe(
+      expect(engine.check({ name: 'other-server__tool' }, undefined)).toBe(
         PolicyDecision.ASK_USER,
       );
-      expect(engine.check({ name: 'my-server-tool' })).toBe(
+      expect(engine.check({ name: 'my-server-tool' }, undefined)).toBe(
         PolicyDecision.ASK_USER,
       ); // No __ separator
-      expect(engine.check({ name: 'my-server' })).toBe(PolicyDecision.ASK_USER); // No tool name
+      expect(engine.check({ name: 'my-server' }, undefined)).toBe(
+        PolicyDecision.ASK_USER,
+      ); // No tool name
     });
 
     it('should prioritize specific tool rules over server wildcards', () => {
@@ -277,10 +297,62 @@ describe('PolicyEngine', () => {
       engine = new PolicyEngine({ rules });
 
       // Specific tool deny should override server allow
-      expect(engine.check({ name: 'my-server__dangerous-tool' })).toBe(
-        PolicyDecision.DENY,
+      expect(
+        engine.check({ name: 'my-server__dangerous-tool' }, undefined),
+      ).toBe(PolicyDecision.DENY);
+      expect(engine.check({ name: 'my-server__safe-tool' }, undefined)).toBe(
+        PolicyDecision.ALLOW,
       );
-      expect(engine.check({ name: 'my-server__safe-tool' })).toBe(
+    });
+
+    it('should NOT match spoofed server names when using wildcards', () => {
+      // Vulnerability: A rule for 'prefix__*' matches 'prefix__suffix__tool'
+      // effectively allowing a server named 'prefix__suffix' to spoof 'prefix'.
+      const rules: PolicyRule[] = [
+        {
+          toolName: 'safe_server__*',
+          decision: PolicyDecision.ALLOW,
+        },
+      ];
+      engine = new PolicyEngine({ rules });
+
+      // A tool from a different server 'safe_server__malicious'
+      const spoofedToolCall = { name: 'safe_server__malicious__tool' };
+
+      // CURRENT BEHAVIOR (FIXED): Matches because it starts with 'safe_server__' BUT serverName doesn't match 'safe_server'
+      // We expect this to FAIL matching the ALLOW rule, thus falling back to default (ASK_USER)
+      expect(engine.check(spoofedToolCall, 'safe_server__malicious')).toBe(
+        PolicyDecision.ASK_USER,
+      );
+    });
+
+    it('should verify tool name prefix even if serverName matches', () => {
+      const rules: PolicyRule[] = [
+        {
+          toolName: 'safe_server__*',
+          decision: PolicyDecision.ALLOW,
+        },
+      ];
+      engine = new PolicyEngine({ rules });
+
+      // serverName matches, but tool name does not start with prefix
+      const invalidToolCall = { name: 'other_server__tool' };
+      expect(engine.check(invalidToolCall, 'safe_server')).toBe(
+        PolicyDecision.ASK_USER,
+      );
+    });
+
+    it('should allow when both serverName and tool name prefix match', () => {
+      const rules: PolicyRule[] = [
+        {
+          toolName: 'safe_server__*',
+          decision: PolicyDecision.ALLOW,
+        },
+      ];
+      engine = new PolicyEngine({ rules });
+
+      const validToolCall = { name: 'safe_server__tool' };
+      expect(engine.check(validToolCall, 'safe_server')).toBe(
         PolicyDecision.ALLOW,
       );
     });
@@ -302,17 +374,19 @@ describe('PolicyEngine', () => {
       engine = new PolicyEngine({ rules });
 
       // Matches highest priority rule (ls command)
-      expect(engine.check({ name: 'shell', args: { command: 'ls -la' } })).toBe(
-        PolicyDecision.ALLOW,
-      );
+      expect(
+        engine.check({ name: 'shell', args: { command: 'ls -la' } }, undefined),
+      ).toBe(PolicyDecision.ALLOW);
 
       // Matches middle priority rule (shell without ls)
-      expect(engine.check({ name: 'shell', args: { command: 'pwd' } })).toBe(
-        PolicyDecision.ASK_USER,
-      );
+      expect(
+        engine.check({ name: 'shell', args: { command: 'pwd' } }, undefined),
+      ).toBe(PolicyDecision.ASK_USER);
 
       // Matches lowest priority rule (not shell)
-      expect(engine.check({ name: 'edit' })).toBe(PolicyDecision.DENY);
+      expect(engine.check({ name: 'edit' }, undefined)).toBe(
+        PolicyDecision.DENY,
+      );
     });
 
     it('should handle tools with no args', () => {
@@ -327,17 +401,19 @@ describe('PolicyEngine', () => {
       engine = new PolicyEngine({ rules });
 
       // Tool call without args should not match pattern
-      expect(engine.check({ name: 'read' })).toBe(PolicyDecision.ASK_USER);
-
-      // Tool call with args not matching pattern
-      expect(engine.check({ name: 'read', args: { file: 'public.txt' } })).toBe(
+      expect(engine.check({ name: 'read' }, undefined)).toBe(
         PolicyDecision.ASK_USER,
       );
 
+      // Tool call with args not matching pattern
+      expect(
+        engine.check({ name: 'read', args: { file: 'public.txt' } }, undefined),
+      ).toBe(PolicyDecision.ASK_USER);
+
       // Tool call with args matching pattern
-      expect(engine.check({ name: 'read', args: { file: 'secret.txt' } })).toBe(
-        PolicyDecision.DENY,
-      );
+      expect(
+        engine.check({ name: 'read', args: { file: 'secret.txt' } }, undefined),
+      ).toBe(PolicyDecision.DENY);
     });
 
     it('should match args pattern regardless of property order', () => {
@@ -356,16 +432,16 @@ describe('PolicyEngine', () => {
       const args1 = { command: 'rm -rf /', path: '/home' };
       const args2 = { path: '/home', command: 'rm -rf /' };
 
-      expect(engine.check({ name: 'shell', args: args1 })).toBe(
+      expect(engine.check({ name: 'shell', args: args1 }, undefined)).toBe(
         PolicyDecision.DENY,
       );
-      expect(engine.check({ name: 'shell', args: args2 })).toBe(
+      expect(engine.check({ name: 'shell', args: args2 }, undefined)).toBe(
         PolicyDecision.DENY,
       );
 
       // Verify safe command doesn't match
       const safeArgs = { command: 'ls -la', path: '/home' };
-      expect(engine.check({ name: 'shell', args: safeArgs })).toBe(
+      expect(engine.check({ name: 'shell', args: safeArgs }, undefined)).toBe(
         PolicyDecision.ASK_USER,
       );
     });
@@ -391,10 +467,10 @@ describe('PolicyEngine', () => {
         data: { value: 'secret', sensitive: true },
       };
 
-      expect(engine.check({ name: 'api', args: args1 })).toBe(
+      expect(engine.check({ name: 'api', args: args1 }, undefined)).toBe(
         PolicyDecision.DENY,
       );
-      expect(engine.check({ name: 'api', args: args2 })).toBe(
+      expect(engine.check({ name: 'api', args: args2 }, undefined)).toBe(
         PolicyDecision.DENY,
       );
     });
@@ -424,17 +500,17 @@ describe('PolicyEngine', () => {
 
       // Should not throw stack overflow error
       expect(() =>
-        engine.check({ name: 'test', args: circularArgs }),
+        engine.check({ name: 'test', args: circularArgs }, undefined),
       ).not.toThrow();
 
       // Should detect the circular reference pattern
-      expect(engine.check({ name: 'test', args: circularArgs })).toBe(
-        PolicyDecision.DENY,
-      );
+      expect(
+        engine.check({ name: 'test', args: circularArgs }, undefined),
+      ).toBe(PolicyDecision.DENY);
 
       // Non-circular object should not match
       const normalArgs = { name: 'test', data: { value: 'normal' } };
-      expect(engine.check({ name: 'test', args: normalArgs })).toBe(
+      expect(engine.check({ name: 'test', args: normalArgs }, undefined)).toBe(
         PolicyDecision.ASK_USER,
       );
     });
@@ -471,13 +547,13 @@ describe('PolicyEngine', () => {
 
       // Should handle without stack overflow
       expect(() =>
-        engine.check({ name: 'deep', args: deepCircular }),
+        engine.check({ name: 'deep', args: deepCircular }, undefined),
       ).not.toThrow();
 
       // Should detect the circular reference
-      expect(engine.check({ name: 'deep', args: deepCircular })).toBe(
-        PolicyDecision.DENY,
-      );
+      expect(
+        engine.check({ name: 'deep', args: deepCircular }, undefined),
+      ).toBe(PolicyDecision.DENY);
     });
 
     it('should handle repeated non-circular objects correctly', () => {
@@ -506,7 +582,9 @@ describe('PolicyEngine', () => {
       };
 
       // Should NOT mark repeated objects as circular, and should match the shared value pattern
-      expect(engine.check({ name: 'test', args })).toBe(PolicyDecision.ALLOW);
+      expect(engine.check({ name: 'test', args }, undefined)).toBe(
+        PolicyDecision.ALLOW,
+      );
     });
 
     it('should omit undefined and function values from objects', () => {
@@ -528,7 +606,9 @@ describe('PolicyEngine', () => {
       };
 
       // Should match pattern with defined value, undefined and functions omitted
-      expect(engine.check({ name: 'test', args })).toBe(PolicyDecision.ALLOW);
+      expect(engine.check({ name: 'test', args }, undefined)).toBe(
+        PolicyDecision.ALLOW,
+      );
 
       // Check that the pattern would NOT match if undefined was included
       const rulesWithUndefined: PolicyRule[] = [
@@ -539,7 +619,7 @@ describe('PolicyEngine', () => {
         },
       ];
       engine = new PolicyEngine({ rules: rulesWithUndefined });
-      expect(engine.check({ name: 'test', args })).toBe(
+      expect(engine.check({ name: 'test', args }, undefined)).toBe(
         PolicyDecision.ASK_USER,
       );
 
@@ -552,7 +632,7 @@ describe('PolicyEngine', () => {
         },
       ];
       engine = new PolicyEngine({ rules: rulesWithFunction });
-      expect(engine.check({ name: 'test', args })).toBe(
+      expect(engine.check({ name: 'test', args }, undefined)).toBe(
         PolicyDecision.ASK_USER,
       );
     });
@@ -573,7 +653,9 @@ describe('PolicyEngine', () => {
       };
 
       // Should match pattern with undefined and functions converted to null
-      expect(engine.check({ name: 'test', args })).toBe(PolicyDecision.ALLOW);
+      expect(engine.check({ name: 'test', args }, undefined)).toBe(
+        PolicyDecision.ALLOW,
+      );
     });
 
     it('should produce valid JSON for all inputs', () => {
@@ -607,10 +689,12 @@ describe('PolicyEngine', () => {
         engine = new PolicyEngine({ rules });
 
         // Should not throw when checking (which internally uses stableStringify)
-        expect(() => engine.check({ name: 'test', args: input })).not.toThrow();
+        expect(() =>
+          engine.check({ name: 'test', args: input }, undefined),
+        ).not.toThrow();
 
         // The check should succeed
-        expect(engine.check({ name: 'test', args: input })).toBe(
+        expect(engine.check({ name: 'test', args: input }, undefined)).toBe(
           PolicyDecision.ALLOW,
         );
       }
@@ -641,7 +725,9 @@ describe('PolicyEngine', () => {
       };
 
       // Should match the sanitized pattern, not the dangerous one
-      expect(engine.check({ name: 'test', args })).toBe(PolicyDecision.ALLOW);
+      expect(engine.check({ name: 'test', args }, undefined)).toBe(
+        PolicyDecision.ALLOW,
+      );
     });
 
     it('should handle toJSON that returns primitives', () => {
@@ -663,7 +749,9 @@ describe('PolicyEngine', () => {
       };
 
       // toJSON returns a string, which should be properly stringified
-      expect(engine.check({ name: 'test', args })).toBe(PolicyDecision.ALLOW);
+      expect(engine.check({ name: 'test', args }, undefined)).toBe(
+        PolicyDecision.ALLOW,
+      );
     });
 
     it('should handle toJSON that throws an error', () => {
@@ -687,7 +775,23 @@ describe('PolicyEngine', () => {
       };
 
       // Should fall back to regular object serialization when toJSON throws
-      expect(engine.check({ name: 'test', args })).toBe(PolicyDecision.ALLOW);
+      expect(engine.check({ name: 'test', args }, undefined)).toBe(
+        PolicyDecision.ALLOW,
+      );
+    });
+  });
+
+  describe('serverName requirement', () => {
+    it('should require serverName for checks', () => {
+      // @ts-expect-error - intentionally testing missing serverName
+      expect(engine.check({ name: 'test' })).toBe(PolicyDecision.ASK_USER);
+      // When serverName is provided (even undefined), it should work
+      expect(engine.check({ name: 'test' }, undefined)).toBe(
+        PolicyDecision.ASK_USER,
+      );
+      expect(engine.check({ name: 'test' }, 'some-server')).toBe(
+        PolicyDecision.ASK_USER,
+      );
     });
   });
 });
diff --git a/packages/core/src/policy/policy-engine.ts b/packages/core/src/policy/policy-engine.ts
index cc98cc5966..034cce2c8b 100644
--- a/packages/core/src/policy/policy-engine.ts
+++ b/packages/core/src/policy/policy-engine.ts
@@ -17,12 +17,21 @@ function ruleMatches(
   rule: PolicyRule,
   toolCall: FunctionCall,
   stringifiedArgs: string | undefined,
+  serverName: string | undefined,
 ): boolean {
   // Check tool name if specified
   if (rule.toolName) {
     // Support wildcard patterns: "serverName__*" matches "serverName__anyTool"
     if (rule.toolName.endsWith('__*')) {
       const prefix = rule.toolName.slice(0, -3); // Remove "__*"
+      if (serverName !== undefined) {
+        // Robust check: if serverName is provided, it MUST match the prefix exactly.
+        // This prevents "malicious-server" from spoofing "trusted-server" by naming itself "trusted-server__malicious".
+        if (serverName !== prefix) {
+          return false;
+        }
+      }
+      // Always verify the prefix, even if serverName matched
       if (!toolCall.name || !toolCall.name.startsWith(prefix + '__')) {
         return false;
       }
@@ -65,7 +74,10 @@ export class PolicyEngine {
   /**
    * Check if a tool call is allowed based on the configured policies.
    */
-  check(toolCall: FunctionCall): PolicyDecision {
+  check(
+    toolCall: FunctionCall,
+    serverName: string | undefined,
+  ): PolicyDecision {
     let stringifiedArgs: string | undefined;
     // Compute stringified args once before the loop
     if (toolCall.args && this.rules.some((rule) => rule.argsPattern)) {
@@ -78,7 +90,7 @@ export class PolicyEngine {
 
     // Find the first matching rule (already sorted by priority)
     for (const rule of this.rules) {
-      if (ruleMatches(rule, toolCall, stringifiedArgs)) {
+      if (ruleMatches(rule, toolCall, stringifiedArgs, serverName)) {
         debugLogger.debug(
           `[PolicyEngine.check] MATCHED rule: toolName=${rule.toolName}, decision=${rule.decision}, priority=${rule.priority}, argsPattern=${rule.argsPattern?.source || 'none'}`,
         );
diff --git a/packages/core/src/tools/base-tool-invocation.test.ts b/packages/core/src/tools/base-tool-invocation.test.ts
new file mode 100644
index 0000000000..38d651f076
--- /dev/null
+++ b/packages/core/src/tools/base-tool-invocation.test.ts
@@ -0,0 +1,131 @@
+/**
+ * @license
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import { describe, it, expect, vi, beforeEach } from 'vitest';
+import { BaseToolInvocation, type ToolResult } from './tools.js';
+import type { MessageBus } from '../confirmation-bus/message-bus.js';
+import {
+  type Message,
+  MessageBusType,
+  type ToolConfirmationRequest,
+  type ToolConfirmationResponse,
+} from '../confirmation-bus/types.js';
+
+class TestBaseToolInvocation extends BaseToolInvocation<object, ToolResult> {
+  getDescription(): string {
+    return 'test description';
+  }
+  async execute(): Promise<ToolResult> {
+    return { llmContent: [], returnDisplay: '' };
+  }
+}
+
+describe('BaseToolInvocation', () => {
+  let messageBus: MessageBus;
+  let abortController: AbortController;
+
+  beforeEach(() => {
+    messageBus = {
+      publish: vi.fn(),
+      subscribe: vi.fn(),
+      unsubscribe: vi.fn(),
+    } as unknown as MessageBus;
+    abortController = new AbortController();
+  });
+
+  it('should propagate serverName to ToolConfirmationRequest', async () => {
+    const serverName = 'test-server';
+    const tool = new TestBaseToolInvocation(
+      {},
+      messageBus,
+      'test-tool',
+      'Test Tool',
+      serverName,
+    );
+
+    let capturedRequest: ToolConfirmationRequest | undefined;
+    vi.mocked(messageBus.publish).mockImplementation((request: Message) => {
+      if (request.type === MessageBusType.TOOL_CONFIRMATION_REQUEST) {
+        capturedRequest = request;
+      }
+    });
+
+    let responseHandler:
+      | ((response: ToolConfirmationResponse) => void)
+      | undefined;
+    vi.mocked(messageBus.subscribe).mockImplementation(
+      (type: MessageBusType, handler: (message: Message) => void) => {
+        if (type === MessageBusType.TOOL_CONFIRMATION_RESPONSE) {
+          responseHandler = handler as (
+            response: ToolConfirmationResponse,
+          ) => void;
+        }
+      },
+    );
+
+    const confirmationPromise = tool.shouldConfirmExecute(
+      abortController.signal,
+    );
+
+    // Wait for microtasks to ensure publish is called
+    await new Promise((resolve) => setTimeout(resolve, 0));
+
+    expect(messageBus.publish).toHaveBeenCalledTimes(1);
+    expect(capturedRequest).toBeDefined();
+    expect(capturedRequest?.type).toBe(
+      MessageBusType.TOOL_CONFIRMATION_REQUEST,
+    );
+    expect(capturedRequest?.serverName).toBe(serverName);
+
+    // Simulate response to finish the promise cleanly
+    if (responseHandler && capturedRequest) {
+      responseHandler({
+        type: MessageBusType.TOOL_CONFIRMATION_RESPONSE,
+        correlationId: capturedRequest.correlationId,
+        confirmed: true,
+      });
+    }
+
+    await confirmationPromise;
+  });
+
+  it('should NOT propagate serverName if not provided', async () => {
+    const tool = new TestBaseToolInvocation(
+      {},
+      messageBus,
+      'test-tool',
+      'Test Tool',
+      // no serverName
+    );
+
+    let capturedRequest: ToolConfirmationRequest | undefined;
+    vi.mocked(messageBus.publish).mockImplementation((request: Message) => {
+      if (request.type === MessageBusType.TOOL_CONFIRMATION_REQUEST) {
+        capturedRequest = request;
+      }
+    });
+
+    // We need to mock subscribe to avoid hanging if we want to await the promise,
+    // but for this test we just need to check publish.
+    // We'll abort to clean up.
+    const confirmationPromise = tool.shouldConfirmExecute(
+      abortController.signal,
+    );
+
+    await new Promise((resolve) => setTimeout(resolve, 0));
+
+    expect(messageBus.publish).toHaveBeenCalledTimes(1);
+    expect(capturedRequest).toBeDefined();
+    expect(capturedRequest?.serverName).toBeUndefined();
+
+    abortController.abort();
+    try {
+      await confirmationPromise;
+    } catch {
+      // ignore abort error
+    }
+  });
+});
diff --git a/packages/core/src/tools/mcp-tool.ts b/packages/core/src/tools/mcp-tool.ts
index 5949bdf5b2..f5c9205e9d 100644
--- a/packages/core/src/tools/mcp-tool.ts
+++ b/packages/core/src/tools/mcp-tool.ts
@@ -77,7 +77,14 @@ class DiscoveredMCPToolInvocation extends BaseToolInvocation<
     // Use composite format for policy checks: serverName__toolName
     // This enables server wildcards (e.g., "google-workspace__*")
     // while still allowing specific tool rules
-    super(params, messageBus, `${serverName}__${serverToolName}`, displayName);
+
+    super(
+      params,
+      messageBus,
+      `${serverName}__${serverToolName}`,
+      displayName,
+      serverName,
+    );
   }
 
   protected override async getConfirmationDetails(
diff --git a/packages/core/src/tools/tools.ts b/packages/core/src/tools/tools.ts
index 98605aea9b..59d1ef7baf 100644
--- a/packages/core/src/tools/tools.ts
+++ b/packages/core/src/tools/tools.ts
@@ -78,6 +78,7 @@ export abstract class BaseToolInvocation<
     protected readonly messageBus?: MessageBus,
     readonly _toolName?: string,
     readonly _toolDisplayName?: string,
+    readonly _serverName?: string,
   ) {}
 
   abstract getDescription(): string;
@@ -215,6 +216,7 @@ export abstract class BaseToolInvocation<
         type: MessageBusType.TOOL_CONFIRMATION_REQUEST,
         toolCall,
         correlationId,
+        serverName: this._serverName,
       };
 
       try {