From a6256cdabf8c2229fe52bfc8d857d76b9735d772 Mon Sep 17 00:00:00 2001 From: gemini-cli-robot Date: Fri, 6 Mar 2026 19:29:28 -0500 Subject: [PATCH] fix(patch): cherry-pick 931e668 to release/v0.33.0-preview.4-pr-21425 [CONFLICTS] (#21478) Co-authored-by: Abhi <43648792+abhipatel12@users.noreply.github.com> Co-authored-by: Abhi --- .github/workflows/ci.yml | 4 +- .github/workflows/links.yml | 4 +- integration-tests/extensions-reload.test.ts | 4 +- .../config/policy-engine.integration.test.ts | 73 ++-- .../core/src/agents/local-executor.test.ts | 7 +- .../core/__snapshots__/prompts.test.ts.snap | 6 +- .../src/core/loggingContentGenerator.test.ts | 19 +- packages/core/src/core/prompts.test.ts | 16 +- packages/core/src/policy/config.test.ts | 44 +-- packages/core/src/policy/config.ts | 18 +- .../core/src/policy/policy-engine.test.ts | 342 +++++++++++------- packages/core/src/policy/policy-engine.ts | 298 ++++++--------- packages/core/src/policy/toml-loader.test.ts | 58 ++- packages/core/src/policy/toml-loader.ts | 53 ++- packages/core/src/policy/types.ts | 12 + .../core/src/prompts/promptProvider.test.ts | 2 +- packages/core/src/tools/mcp-tool.test.ts | 69 +++- packages/core/src/tools/mcp-tool.ts | 109 +++++- packages/core/src/tools/tool-registry.test.ts | 64 ++-- packages/core/src/tools/tool-registry.ts | 16 +- 20 files changed, 704 insertions(+), 514 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index a358ad8b07..b10cdd6045 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -124,7 +124,9 @@ jobs: - name: 'Link Checker' uses: 'lycheeverse/lychee-action@885c65f3dc543b57c898c8099f4e08c8afd178a2' # ratchet: lycheeverse/lychee-action@v2.6.1 with: - args: '--verbose --accept 200,503 ./**/*.md' + # Exclude GEMINI.md because the absolute GitHub URL in CONTRIBUTING.md (which is symlinked) + # causes intermittent 429 Too Many Requests errors from GitHub API rate limits. + args: '--verbose --accept 200,503 --exclude "GEMINI\\.md" ./**/*.md' fail: true test_linux: name: 'Test (Linux) - ${{ matrix.node-version }}, ${{ matrix.shard }}' diff --git a/.github/workflows/links.yml b/.github/workflows/links.yml index 1ed45019f9..8ac79614db 100644 --- a/.github/workflows/links.yml +++ b/.github/workflows/links.yml @@ -22,4 +22,6 @@ jobs: id: 'lychee' uses: 'lycheeverse/lychee-action@885c65f3dc543b57c898c8099f4e08c8afd178a2' # ratchet: lycheeverse/lychee-action@v2.6.1 with: - args: '--verbose --no-progress --accept 200,503 ./**/*.md' + # Exclude GEMINI.md because the absolute GitHub URL in CONTRIBUTING.md (which is symlinked) + # causes intermittent 429 Too Many Requests errors from GitHub API rate limits. + args: '--verbose --no-progress --accept 200,503 --exclude "GEMINI\\.md" ./**/*.md' diff --git a/integration-tests/extensions-reload.test.ts b/integration-tests/extensions-reload.test.ts index 520076d7c6..9d451cedcf 100644 --- a/integration-tests/extensions-reload.test.ts +++ b/integration-tests/extensions-reload.test.ts @@ -104,7 +104,7 @@ describe('extension reloading', () => { return ( output.includes( 'test-server (from test-extension) - Ready (1 tool)', - ) && output.includes('- hello') + ) && output.includes('- mcp_test-server_hello') ); }, 30000, // 30s timeout @@ -148,7 +148,7 @@ describe('extension reloading', () => { return ( output.includes( 'test-server (from test-extension) - Ready (1 tool)', - ) && output.includes('- goodbye') + ) && output.includes('- mcp_test-server_goodbye') ); }, 30000, diff --git a/packages/cli/src/config/policy-engine.integration.test.ts b/packages/cli/src/config/policy-engine.integration.test.ts index 02515815d0..312a55216d 100644 --- a/packages/cli/src/config/policy-engine.integration.test.ts +++ b/packages/cli/src/config/policy-engine.integration.test.ts @@ -89,13 +89,13 @@ describe('Policy Engine Integration Tests', () => { // Tools from allowed server should be allowed // Tools from allowed server should be allowed expect( - (await engine.check({ name: 'allowed-server__tool1' }, undefined)) + (await engine.check({ name: 'mcp_allowed-server_tool1' }, undefined)) .decision, ).toBe(PolicyDecision.ALLOW); expect( ( await engine.check( - { name: 'allowed-server__another_tool' }, + { name: 'mcp_allowed-server_another_tool' }, undefined, ) ).decision, @@ -103,13 +103,13 @@ describe('Policy Engine Integration Tests', () => { // Tools from trusted server should be allowed expect( - (await engine.check({ name: 'trusted-server__tool1' }, undefined)) + (await engine.check({ name: 'mcp_trusted-server_tool1' }, undefined)) .decision, ).toBe(PolicyDecision.ALLOW); expect( ( await engine.check( - { name: 'trusted-server__special_tool' }, + { name: 'mcp_trusted-server_special_tool' }, undefined, ) ).decision, @@ -117,17 +117,17 @@ describe('Policy Engine Integration Tests', () => { // Tools from blocked server should be denied expect( - (await engine.check({ name: 'blocked-server__tool1' }, undefined)) + (await engine.check({ name: 'mcp_blocked-server_tool1' }, undefined)) .decision, ).toBe(PolicyDecision.DENY); expect( - (await engine.check({ name: 'blocked-server__any_tool' }, undefined)) + (await engine.check({ name: 'mcp_blocked-server_any_tool' }, undefined)) .decision, ).toBe(PolicyDecision.DENY); // Tools from unknown servers should use default expect( - (await engine.check({ name: 'unknown-server__tool' }, undefined)) + (await engine.check({ name: 'mcp_unknown-server_tool' }, undefined)) .decision, ).toBe(PolicyDecision.ASK_USER); }); @@ -147,12 +147,16 @@ describe('Policy Engine Integration Tests', () => { // ANY tool with a server name should be allowed expect( - (await engine.check({ name: 'mcp-server__tool' }, 'mcp-server')) + (await engine.check({ name: 'mcp_mcp-server_tool' }, 'mcp-server')) .decision, ).toBe(PolicyDecision.ALLOW); expect( - (await engine.check({ name: 'another-server__tool' }, 'another-server')) - .decision, + ( + await engine.check( + { name: 'mcp_another-server_tool' }, + 'another-server', + ) + ).decision, ).toBe(PolicyDecision.ALLOW); // Built-in tools should NOT be allowed by the MCP wildcard @@ -167,7 +171,7 @@ describe('Policy Engine Integration Tests', () => { allowed: ['my-server'], }, tools: { - exclude: ['my-server__dangerous-tool'], + exclude: ['mcp_my-server_dangerous-tool'], }, }; @@ -180,20 +184,24 @@ describe('Policy Engine Integration Tests', () => { // MCP server allowed (priority 4.1) provides general allow for server // MCP server allowed (priority 4.1) provides general allow for server expect( - (await engine.check({ name: 'my-server__safe-tool' }, undefined)) + (await engine.check({ name: 'mcp_my-server_safe-tool' }, undefined)) .decision, ).toBe(PolicyDecision.ALLOW); // But specific tool exclude (priority 4.4) wins over server allow expect( - (await engine.check({ name: 'my-server__dangerous-tool' }, undefined)) - .decision, + ( + await engine.check( + { name: 'mcp_my-server_dangerous-tool' }, + undefined, + ) + ).decision, ).toBe(PolicyDecision.DENY); }); it('should handle complex mixed configurations', async () => { const settings: Settings = { tools: { - allowed: ['custom-tool', 'my-server__special-tool'], + allowed: ['custom-tool', 'mcp_my-server_special-tool'], exclude: ['glob', 'dangerous-tool'], }, mcp: { @@ -238,21 +246,21 @@ describe('Policy Engine Integration Tests', () => { (await engine.check({ name: 'custom-tool' }, undefined)).decision, ).toBe(PolicyDecision.ALLOW); expect( - (await engine.check({ name: 'my-server__special-tool' }, undefined)) + (await engine.check({ name: 'mcp_my-server_special-tool' }, undefined)) .decision, ).toBe(PolicyDecision.ALLOW); // MCP server tools expect( - (await engine.check({ name: 'allowed-server__tool' }, undefined)) + (await engine.check({ name: 'mcp_allowed-server_tool' }, undefined)) .decision, ).toBe(PolicyDecision.ALLOW); expect( - (await engine.check({ name: 'trusted-server__tool' }, undefined)) + (await engine.check({ name: 'mcp_trusted-server_tool' }, undefined)) .decision, ).toBe(PolicyDecision.ALLOW); expect( - (await engine.check({ name: 'blocked-server__tool' }, undefined)) + (await engine.check({ name: 'mcp_blocked-server_tool' }, undefined)) .decision, ).toBe(PolicyDecision.DENY); @@ -479,7 +487,7 @@ describe('Policy Engine Integration Tests', () => { expect(blockedToolRule?.priority).toBe(4.4); // Command line exclude const blockedServerRule = rules.find( - (r) => r.toolName === 'blocked-server__*', + (r) => r.toolName === 'mcp_blocked-server_*', ); expect(blockedServerRule?.priority).toBe(4.9); // MCP server exclude @@ -489,11 +497,13 @@ describe('Policy Engine Integration Tests', () => { expect(specificToolRule?.priority).toBe(4.3); // Command line allow const trustedServerRule = rules.find( - (r) => r.toolName === 'trusted-server__*', + (r) => r.toolName === 'mcp_trusted-server_*', ); expect(trustedServerRule?.priority).toBe(4.2); // MCP trusted server - const mcpServerRule = rules.find((r) => r.toolName === 'mcp-server__*'); + const mcpServerRule = rules.find( + (r) => r.toolName === 'mcp_mcp-server_*', + ); expect(mcpServerRule?.priority).toBe(4.1); // MCP allowed server const readOnlyToolRule = rules.find((r) => r.toolName === 'glob'); @@ -505,18 +515,19 @@ describe('Policy Engine Integration Tests', () => { (await engine.check({ name: 'blocked-tool' }, undefined)).decision, ).toBe(PolicyDecision.DENY); expect( - (await engine.check({ name: 'blocked-server__any' }, undefined)) + (await engine.check({ name: 'mcp_blocked-server_any' }, undefined)) .decision, ).toBe(PolicyDecision.DENY); expect( (await engine.check({ name: 'specific-tool' }, undefined)).decision, ).toBe(PolicyDecision.ALLOW); expect( - (await engine.check({ name: 'trusted-server__any' }, undefined)) + (await engine.check({ name: 'mcp_trusted-server_any' }, undefined)) .decision, ).toBe(PolicyDecision.ALLOW); expect( - (await engine.check({ name: 'mcp-server__any' }, undefined)).decision, + (await engine.check({ name: 'mcp_mcp-server_any' }, undefined)) + .decision, ).toBe(PolicyDecision.ALLOW); expect((await engine.check({ name: 'glob' }, undefined)).decision).toBe( PolicyDecision.ALLOW, @@ -545,7 +556,7 @@ describe('Policy Engine Integration Tests', () => { // Exclusion (195) should win over trust (90) expect( - (await engine.check({ name: 'conflicted-server__tool' }, undefined)) + (await engine.check({ name: 'mcp_conflicted-server_tool' }, undefined)) .decision, ).toBe(PolicyDecision.DENY); }); @@ -556,7 +567,7 @@ describe('Policy Engine Integration Tests', () => { excluded: ['my-server'], // Priority 195 - DENY }, tools: { - allowed: ['my-server__special-tool'], // Priority 100 - ALLOW + allowed: ['mcp_my-server_special-tool'], // Priority 100 - ALLOW }, }; @@ -569,11 +580,11 @@ describe('Policy Engine Integration Tests', () => { // Server exclusion (195) wins over specific tool allow (100) // This might be counterintuitive but follows the priority system expect( - (await engine.check({ name: 'my-server__special-tool' }, undefined)) + (await engine.check({ name: 'mcp_my-server_special-tool' }, undefined)) .decision, ).toBe(PolicyDecision.DENY); expect( - (await engine.check({ name: 'my-server__other-tool' }, undefined)) + (await engine.check({ name: 'mcp_my-server_other-tool' }, undefined)) .decision, ).toBe(PolicyDecision.DENY); }); @@ -643,13 +654,13 @@ describe('Policy Engine Integration Tests', () => { const tool3Rule = rules.find((r) => r.toolName === 'tool3'); expect(tool3Rule?.priority).toBe(4.4); // Excluded tools (user tier) - const server2Rule = rules.find((r) => r.toolName === 'server2__*'); + const server2Rule = rules.find((r) => r.toolName === 'mcp_server2_*'); expect(server2Rule?.priority).toBe(4.9); // Excluded servers (user tier) const tool1Rule = rules.find((r) => r.toolName === 'tool1'); expect(tool1Rule?.priority).toBe(4.3); // Allowed tools (user tier) - const server1Rule = rules.find((r) => r.toolName === 'server1__*'); + const server1Rule = rules.find((r) => r.toolName === 'mcp_server1_*'); expect(server1Rule?.priority).toBe(4.1); // Allowed servers (user tier) const globRule = rules.find((r) => r.toolName === 'glob'); diff --git a/packages/core/src/agents/local-executor.test.ts b/packages/core/src/agents/local-executor.test.ts index 5fb28d0e8a..1c53248156 100644 --- a/packages/core/src/agents/local-executor.test.ts +++ b/packages/core/src/agents/local-executor.test.ts @@ -17,10 +17,7 @@ import { debugLogger } from '../utils/debugLogger.js'; import { LocalAgentExecutor, type ActivityCallback } from './local-executor.js'; import { makeFakeConfig } from '../test-utils/config.js'; import { ToolRegistry } from '../tools/tool-registry.js'; -import { - DiscoveredMCPTool, - MCP_QUALIFIED_NAME_SEPARATOR, -} from '../tools/mcp-tool.js'; +import { DiscoveredMCPTool } from '../tools/mcp-tool.js'; import { LSTool } from '../tools/ls.js'; import { LS_TOOL_NAME, READ_FILE_TOOL_NAME } from '../tools/tool-names.js'; import { @@ -504,7 +501,7 @@ describe('LocalAgentExecutor', () => { it('should automatically qualify MCP tools in agent definitions', async () => { const serverName = 'mcp-server'; const toolName = 'mcp-tool'; - const qualifiedName = `${serverName}${MCP_QUALIFIED_NAME_SEPARATOR}${toolName}`; + const qualifiedName = `mcp_${serverName}_${toolName}`; const mockMcpTool = { tool: vi.fn(), diff --git a/packages/core/src/core/__snapshots__/prompts.test.ts.snap b/packages/core/src/core/__snapshots__/prompts.test.ts.snap index 438251ed1f..a739b2a0d4 100644 --- a/packages/core/src/core/__snapshots__/prompts.test.ts.snap +++ b/packages/core/src/core/__snapshots__/prompts.test.ts.snap @@ -103,7 +103,7 @@ The following tools are available in Plan Mode: \`exit_plan_mode\` \`write_file\` \`replace\` - \`read_data\` (readonly-server) + \`mcp_readonly-server_read_data\` (readonly-server) ## Rules @@ -271,7 +271,7 @@ The following tools are available in Plan Mode: \`exit_plan_mode\` \`write_file\` \`replace\` - \`read_data\` (readonly-server) + \`mcp_readonly-server_read_data\` (readonly-server) ## Rules @@ -558,7 +558,7 @@ The following tools are available in Plan Mode: \`exit_plan_mode\` \`write_file\` \`replace\` - \`read_data\` (readonly-server) + \`mcp_readonly-server_read_data\` (readonly-server) ## Rules diff --git a/packages/core/src/core/loggingContentGenerator.test.ts b/packages/core/src/core/loggingContentGenerator.test.ts index 0e9e83772c..1e8a886f69 100644 --- a/packages/core/src/core/loggingContentGenerator.test.ts +++ b/packages/core/src/core/loggingContentGenerator.test.ts @@ -709,7 +709,7 @@ describe('estimateContextBreakdown', () => { { functionDeclarations: [ { - name: 'myserver__search', + name: 'mcp_myserver_search', description: 'Search via MCP', parameters: {}, }, @@ -747,8 +747,7 @@ describe('estimateContextBreakdown', () => { expect(builtinOnly.mcp_servers).toBe(0); }); - it('should not classify tools with __ in the middle of a segment as MCP', () => { - // "__" at start or end (not a valid server__tool pattern) should not be MCP + it('should not classify tools without mcp_ prefix as MCP', () => { const config = { tools: [ { @@ -842,7 +841,7 @@ describe('estimateContextBreakdown', () => { functionDeclarations: [ { name: 'read_file', description: 'Read', parameters: {} }, { - name: 'myserver__search', + name: 'mcp_myserver_search', description: 'MCP search', parameters: {}, }, @@ -858,7 +857,7 @@ describe('estimateContextBreakdown', () => { expect(result.history).toBeGreaterThan(0); // tool_calls should only contain non-MCP tools expect(result.tool_calls['read_file']).toBeGreaterThan(0); - expect(result.tool_calls['myserver__search']).toBeUndefined(); + expect(result.tool_calls['mcp_myserver_search']).toBeUndefined(); // MCP tokens are only in mcp_servers expect(result.mcp_servers).toBeGreaterThan(0); }); @@ -870,7 +869,7 @@ describe('estimateContextBreakdown', () => { parts: [ { functionCall: { - name: 'myserver__search', + name: 'mcp_myserver_search', args: { query: 'test' }, }, }, @@ -881,7 +880,7 @@ describe('estimateContextBreakdown', () => { parts: [ { functionResponse: { - name: 'myserver__search', + name: 'mcp_myserver_search', response: { results: [] }, }, }, @@ -890,7 +889,7 @@ describe('estimateContextBreakdown', () => { ]; const result = estimateContextBreakdown(contents); // MCP tool calls should NOT appear in tool_calls - expect(result.tool_calls['myserver__search']).toBeUndefined(); + expect(result.tool_calls['mcp_myserver_search']).toBeUndefined(); // MCP call tokens should only be counted in mcp_servers expect(result.mcp_servers).toBeGreaterThan(0); }); @@ -908,7 +907,7 @@ describe('estimateContextBreakdown', () => { }, { functionCall: { - name: 'myserver__search', + name: 'mcp_myserver_search', args: { q: 'hello' }, }, }, @@ -919,7 +918,7 @@ describe('estimateContextBreakdown', () => { // Non-MCP tools should be in tool_calls expect(result.tool_calls['read_file']).toBeGreaterThan(0); // MCP tools should NOT be in tool_calls - expect(result.tool_calls['myserver__search']).toBeUndefined(); + expect(result.tool_calls['mcp_myserver_search']).toBeUndefined(); // MCP tool calls should only be in mcp_servers expect(result.mcp_servers).toBeGreaterThan(0); }); diff --git a/packages/core/src/core/prompts.test.ts b/packages/core/src/core/prompts.test.ts index 6d65596ce4..82ca01e9b3 100644 --- a/packages/core/src/core/prompts.test.ts +++ b/packages/core/src/core/prompts.test.ts @@ -465,9 +465,13 @@ describe('Core System Prompt (prompts.ts)', () => { const prompt = getCoreSystemPrompt(mockConfig); expect(prompt).toContain('# Active Approval Mode: Plan'); // Read-only MCP tool should appear with server name - expect(prompt).toContain('`read_data` (readonly-server)'); + expect(prompt).toContain( + '`mcp_readonly-server_read_data` (readonly-server)', + ); // Non-read-only MCP tool should not appear (excluded by policy) - expect(prompt).not.toContain('`write_data` (nonreadonly-server)'); + expect(prompt).not.toContain( + '`mcp_nonreadonly-server_write_data` (nonreadonly-server)', + ); expect(prompt).toMatchSnapshot(); }); @@ -485,8 +489,12 @@ describe('Core System Prompt (prompts.ts)', () => { const prompt = getCoreSystemPrompt(mockConfig); - expect(prompt).toContain('`read_data` (readonly-server)'); - expect(prompt).not.toContain('`write_data` (nonreadonly-server)'); + expect(prompt).toContain( + '`mcp_readonly-server_read_data` (readonly-server)', + ); + expect(prompt).not.toContain( + '`mcp_nonreadonly-server_write_data` (nonreadonly-server)', + ); }); it('should only list available tools in PLAN mode', () => { diff --git a/packages/core/src/policy/config.test.ts b/packages/core/src/policy/config.test.ts index 3ded361084..2df29cb956 100644 --- a/packages/core/src/policy/config.test.ts +++ b/packages/core/src/policy/config.test.ts @@ -202,8 +202,7 @@ describe('createPolicyEngineConfig', () => { '/tmp/mock/default/policies', ); const rule = config.rules?.find( - (r) => - r.toolName === 'my-server__*' && r.decision === PolicyDecision.ALLOW, + (r) => r.mcpName === 'my-server' && r.decision === PolicyDecision.ALLOW, ); expect(rule).toBeDefined(); expect(rule?.priority).toBe(4.1); // MCP allowed server @@ -220,8 +219,7 @@ describe('createPolicyEngineConfig', () => { '/tmp/mock/default/policies', ); const rule = config.rules?.find( - (r) => - r.toolName === 'my-server__*' && r.decision === PolicyDecision.DENY, + (r) => r.mcpName === 'my-server' && r.decision === PolicyDecision.DENY, ); expect(rule).toBeDefined(); expect(rule?.priority).toBe(4.9); // MCP excluded server @@ -247,8 +245,7 @@ describe('createPolicyEngineConfig', () => { const trustedRule = config.rules?.find( (r) => - r.toolName === 'trusted-server__*' && - r.decision === PolicyDecision.ALLOW, + r.mcpName === 'trusted-server' && r.decision === PolicyDecision.ALLOW, ); expect(trustedRule).toBeDefined(); expect(trustedRule?.priority).toBe(4.2); // MCP trusted server @@ -256,8 +253,7 @@ describe('createPolicyEngineConfig', () => { // Untrusted server should not have an allow rule const untrustedRule = config.rules?.find( (r) => - r.toolName === 'untrusted-server__*' && - r.decision === PolicyDecision.ALLOW, + r.mcpName === 'untrusted-server' && r.decision === PolicyDecision.ALLOW, ); expect(untrustedRule).toBeUndefined(); }); @@ -284,8 +280,7 @@ describe('createPolicyEngineConfig', () => { // Check allowed server const allowedRule = config.rules?.find( (r) => - r.toolName === 'allowed-server__*' && - r.decision === PolicyDecision.ALLOW, + r.mcpName === 'allowed-server' && r.decision === PolicyDecision.ALLOW, ); expect(allowedRule).toBeDefined(); expect(allowedRule?.priority).toBe(4.1); // MCP allowed server @@ -293,8 +288,7 @@ describe('createPolicyEngineConfig', () => { // Check trusted server const trustedRule = config.rules?.find( (r) => - r.toolName === 'trusted-server__*' && - r.decision === PolicyDecision.ALLOW, + r.mcpName === 'trusted-server' && r.decision === PolicyDecision.ALLOW, ); expect(trustedRule).toBeDefined(); expect(trustedRule?.priority).toBe(4.2); // MCP trusted server @@ -302,8 +296,7 @@ describe('createPolicyEngineConfig', () => { // Check excluded server const excludedRule = config.rules?.find( (r) => - r.toolName === 'excluded-server__*' && - r.decision === PolicyDecision.DENY, + r.mcpName === 'excluded-server' && r.decision === PolicyDecision.DENY, ); expect(excludedRule).toBeDefined(); expect(excludedRule?.priority).toBe(4.9); // MCP excluded server @@ -368,7 +361,7 @@ describe('createPolicyEngineConfig', () => { const { createPolicyEngineConfig } = await import('./config.js'); const settings: PolicySettings = { mcp: { excluded: ['my-server'] }, - tools: { allowed: ['my-server__specific-tool'] }, + tools: { allowed: ['mcp_my-server_specific-tool'] }, }; const config = await createPolicyEngineConfig( settings, @@ -377,12 +370,11 @@ describe('createPolicyEngineConfig', () => { ); const serverDenyRule = config.rules?.find( - (r) => - r.toolName === 'my-server__*' && r.decision === PolicyDecision.DENY, + (r) => r.mcpName === 'my-server' && r.decision === PolicyDecision.DENY, ); const toolAllowRule = config.rules?.find( (r) => - r.toolName === 'my-server__specific-tool' && + r.toolName === 'mcp_my-server_specific-tool' && r.decision === PolicyDecision.ALLOW, ); @@ -404,7 +396,7 @@ describe('createPolicyEngineConfig', () => { trust: true, }, }, - tools: { exclude: ['my-server__dangerous-tool'] }, + tools: { exclude: ['mcp_my-server_dangerous-tool'] }, }; const config = await createPolicyEngineConfig( settings, @@ -413,12 +405,11 @@ describe('createPolicyEngineConfig', () => { ); const serverAllowRule = config.rules?.find( - (r) => - r.toolName === 'my-server__*' && r.decision === PolicyDecision.ALLOW, + (r) => r.mcpName === 'my-server' && r.decision === PolicyDecision.ALLOW, ); const toolDenyRule = config.rules?.find( (r) => - r.toolName === 'my-server__dangerous-tool' && + r.toolName === 'mcp_my-server_dangerous-tool' && r.decision === PolicyDecision.DENY, ); @@ -432,8 +423,8 @@ describe('createPolicyEngineConfig', () => { it('should handle complex priority scenarios correctly', async () => { const settings: PolicySettings = { tools: { - allowed: ['my-server__tool1', 'other-tool'], // Priority 4.3 - exclude: ['my-server__tool2', 'glob'], // Priority 4.4 + allowed: ['mcp_trusted-server_tool1', 'other-tool'], // Priority 4.3 + exclude: ['mcp_trusted-server_tool2', 'glob'], // Priority 4.4 }, mcp: { allowed: ['allowed-server'], // Priority 4.1 @@ -564,13 +555,12 @@ describe('createPolicyEngineConfig', () => { // Neither server should have an allow rule const noTrustRule = config.rules?.find( (r) => - r.toolName === 'no-trust-property__*' && + r.mcpName === 'no-trust-property' && r.decision === PolicyDecision.ALLOW, ); const explicitFalseRule = config.rules?.find( (r) => - r.toolName === 'explicit-false__*' && - r.decision === PolicyDecision.ALLOW, + r.mcpName === 'explicit-false' && r.decision === PolicyDecision.ALLOW, ); expect(noTrustRule).toBeUndefined(); diff --git a/packages/core/src/policy/config.ts b/packages/core/src/policy/config.ts index a1e337436e..8437cb9845 100644 --- a/packages/core/src/policy/config.ts +++ b/packages/core/src/policy/config.ts @@ -31,6 +31,7 @@ import { debugLogger } from '../utils/debugLogger.js'; import { SHELL_TOOL_NAMES } from '../utils/shell-utils.js'; import { SHELL_TOOL_NAME } from '../tools/tool-names.js'; import { isNodeError } from '../utils/errors.js'; +import { MCP_TOOL_PREFIX } from '../tools/mcp-tool.js'; import { isDirectorySecure } from '../utils/security.js'; @@ -342,7 +343,11 @@ export async function createPolicyEngineConfig( if (settings.mcp?.excluded) { for (const serverName of settings.mcp.excluded) { rules.push({ - toolName: `${serverName}__*`, + toolName: + serverName === '*' + ? `${MCP_TOOL_PREFIX}*` + : `${MCP_TOOL_PREFIX}${serverName}_*`, + mcpName: serverName, decision: PolicyDecision.DENY, priority: MCP_EXCLUDED_PRIORITY, source: 'Settings (MCP Excluded)', @@ -423,9 +428,10 @@ export async function createPolicyEngineConfig( )) { if (serverConfig.trust) { // Trust all tools from this MCP server - // Using pattern matching for MCP tool names which are formatted as "serverName__toolName" + // Using explicit mcpName metadata and FQN mcp_{serverName}_* rules.push({ - toolName: `${serverName}__*`, + toolName: `${MCP_TOOL_PREFIX}${serverName}_*`, + mcpName: serverName, decision: PolicyDecision.ALLOW, priority: TRUSTED_MCP_SERVER_PRIORITY, source: 'Settings (MCP Trusted)', @@ -439,7 +445,11 @@ export async function createPolicyEngineConfig( if (settings.mcp?.allowed) { for (const serverName of settings.mcp.allowed) { rules.push({ - toolName: `${serverName}__*`, + toolName: + serverName === '*' + ? `${MCP_TOOL_PREFIX}*` + : `${MCP_TOOL_PREFIX}${serverName}_*`, + mcpName: serverName, decision: PolicyDecision.ALLOW, priority: ALLOWED_MCP_SERVER_PRIORITY, source: 'Settings (MCP Allowed)', diff --git a/packages/core/src/policy/policy-engine.test.ts b/packages/core/src/policy/policy-engine.test.ts index b8e6968af9..7f8418fbae 100644 --- a/packages/core/src/policy/policy-engine.test.ts +++ b/packages/core/src/policy/policy-engine.test.ts @@ -150,7 +150,8 @@ describe('PolicyEngine', () => { it('should match unqualified tool names with qualified rules when serverName is provided', async () => { const rules: PolicyRule[] = [ { - toolName: 'my-server__tool', + toolName: 'mcp_my-server_tool', + mcpName: 'my-server', decision: PolicyDecision.ALLOW, }, ]; @@ -159,23 +160,9 @@ describe('PolicyEngine', () => { // Match with qualified name (standard) expect( - (await engine.check({ name: 'my-server__tool' }, 'my-server')).decision, + (await engine.check({ name: 'mcp_my-server_tool' }, 'my-server')) + .decision, ).toBe(PolicyDecision.ALLOW); - - // Match with unqualified name + serverName (the fix) - expect((await engine.check({ name: 'tool' }, 'my-server')).decision).toBe( - PolicyDecision.ALLOW, - ); - - // Should NOT match with unqualified name but NO serverName - expect((await engine.check({ name: 'tool' }, undefined)).decision).toBe( - PolicyDecision.ASK_USER, - ); - - // Should NOT match with unqualified name but WRONG serverName - expect( - (await engine.check({ name: 'tool' }, 'wrong-server')).decision, - ).toBe(PolicyDecision.ASK_USER); }); it('should match by args pattern', async () => { @@ -476,61 +463,41 @@ describe('PolicyEngine', () => { (await engine.check({ name: 'read_file' }, undefined)).decision, ).toBe(PolicyDecision.ALLOW); expect( - (await engine.check({ name: 'my-server__tool' }, 'my-server')).decision, + (await engine.check({ name: 'mcp_my-server_tool' }, 'my-server')) + .decision, ).toBe(PolicyDecision.ALLOW); }); - it('should match any MCP tool when toolName is *__*', async () => { + it('should match any MCP tool when toolName is mcp_*', async () => { engine = new PolicyEngine({ rules: [ - { toolName: '*__*', decision: PolicyDecision.ALLOW, priority: 10 }, + { toolName: 'mcp_*', decision: PolicyDecision.ALLOW, priority: 10 }, ], defaultDecision: PolicyDecision.DENY, }); - expect((await engine.check({ name: 'mcp__tool' }, 'mcp')).decision).toBe( - PolicyDecision.ALLOW, - ); expect( - (await engine.check({ name: 'other__tool' }, 'other')).decision, + (await engine.check({ name: 'mcp_mcp_tool' }, 'mcp')).decision, + ).toBe(PolicyDecision.ALLOW); + expect( + (await engine.check({ name: 'mcp_other_tool' }, 'other')).decision, ).toBe(PolicyDecision.ALLOW); expect( (await engine.check({ name: 'read_file' }, undefined)).decision, ).toBe(PolicyDecision.DENY); }); - it('should match specific tool across all servers when using *__tool', async () => { - engine = new PolicyEngine({ - rules: [ - { - toolName: '*__search', - decision: PolicyDecision.ALLOW, - priority: 10, - }, - ], - defaultDecision: PolicyDecision.DENY, - }); - - expect((await engine.check({ name: 'ws__search' }, 'ws')).decision).toBe( - PolicyDecision.ALLOW, - ); - expect((await engine.check({ name: 'gh__search' }, 'gh')).decision).toBe( - PolicyDecision.ALLOW, - ); - expect((await engine.check({ name: 'gh__list' }, 'gh')).decision).toBe( - PolicyDecision.DENY, - ); - }); - it('should match MCP server wildcard patterns', async () => { const rules: PolicyRule[] = [ { - toolName: 'my-server__*', + toolName: 'mcp_my-server_*', + mcpName: 'my-server', decision: PolicyDecision.ALLOW, priority: 10, }, { - toolName: 'blocked-server__*', + toolName: 'mcp_blocked-server_*', + mcpName: 'blocked-server', decision: PolicyDecision.DENY, priority: 20, }, @@ -540,19 +507,23 @@ describe('PolicyEngine', () => { // Should match my-server tools expect( - (await engine.check({ name: 'my-server__tool1' }, 'my-server')) + (await engine.check({ name: 'mcp_my-server_tool1' }, 'my-server')) .decision, ).toBe(PolicyDecision.ALLOW); expect( - (await engine.check({ name: 'my-server__another_tool' }, 'my-server')) - .decision, + ( + await engine.check( + { name: 'mcp_my-server_another_tool' }, + 'my-server', + ) + ).decision, ).toBe(PolicyDecision.ALLOW); // Should match blocked-server tools expect( ( await engine.check( - { name: 'blocked-server__tool1' }, + { name: 'mcp_blocked-server_tool1' }, 'blocked-server', ) ).decision, @@ -560,7 +531,7 @@ describe('PolicyEngine', () => { expect( ( await engine.check( - { name: 'blocked-server__dangerous' }, + { name: 'mcp_blocked-server_dangerous' }, 'blocked-server', ) ).decision, @@ -568,7 +539,7 @@ describe('PolicyEngine', () => { // Should not match other patterns expect( - (await engine.check({ name: 'other-server__tool' }, 'other-server')) + (await engine.check({ name: 'mcp_other-server_tool' }, 'other-server')) .decision, ).toBe(PolicyDecision.ASK_USER); expect( @@ -582,12 +553,14 @@ describe('PolicyEngine', () => { it('should prioritize specific tool rules over server wildcards', async () => { const rules: PolicyRule[] = [ { - toolName: 'my-server__*', + toolName: 'mcp_my-server_*', + mcpName: 'my-server', decision: PolicyDecision.ALLOW, priority: 10, }, { - toolName: 'my-server__dangerous-tool', + toolName: 'mcp_my-server_dangerous-tool', + mcpName: 'my-server', decision: PolicyDecision.DENY, priority: 20, }, @@ -597,33 +570,38 @@ describe('PolicyEngine', () => { // Specific tool deny should override server allow expect( - (await engine.check({ name: 'my-server__dangerous-tool' }, 'my-server')) - .decision, + ( + await engine.check( + { name: 'mcp_my-server_dangerous-tool' }, + 'my-server', + ) + ).decision, ).toBe(PolicyDecision.DENY); expect( - (await engine.check({ name: 'my-server__safe-tool' }, 'my-server')) + (await engine.check({ name: 'mcp_my-server_safe-tool' }, 'my-server')) .decision, ).toBe(PolicyDecision.ALLOW); }); it('should NOT match spoofed server names when using wildcards', async () => { - // Vulnerability: A rule for 'prefix__*' matches 'prefix__suffix__tool' - // effectively allowing a server named 'prefix__suffix' to spoof 'prefix'. + // Vulnerability: A rule for 'mcp_prefix_*' matches 'mcp_prefix__suffix_tool' + // effectively allowing a server named 'mcp_prefix_suffix' to spoof 'prefix'. const rules: PolicyRule[] = [ { - toolName: 'safe_server__*', + toolName: 'mcp_safe_server_*', + mcpName: 'safe_server', decision: PolicyDecision.ALLOW, }, ]; engine = new PolicyEngine({ rules }); - // A tool from a different server 'safe_server__malicious' - const spoofedToolCall = { name: 'safe_server__malicious__tool' }; + // A tool from a different server 'mcp_safe_server_malicious' + const spoofedToolCall = { name: 'mcp_mcp_safe_server_malicious_tool' }; // CURRENT BEHAVIOR (FIXED): Matches because it starts with 'safe_server__' BUT serverName doesn't match 'safe_server' // We expect this to FAIL matching the ALLOW rule, thus falling back to default (ASK_USER) expect( - (await engine.check(spoofedToolCall, 'safe_server__malicious')) + (await engine.check(spoofedToolCall, 'mcp_safe_server_malicious')) .decision, ).toBe(PolicyDecision.ASK_USER); }); @@ -631,14 +609,15 @@ describe('PolicyEngine', () => { it('should verify tool name prefix even if serverName matches', async () => { const rules: PolicyRule[] = [ { - toolName: 'safe_server__*', + toolName: 'mcp_safe_server_*', + mcpName: 'safe_server', decision: PolicyDecision.ALLOW, }, ]; engine = new PolicyEngine({ rules }); // serverName matches, but tool name does not start with prefix - const invalidToolCall = { name: 'other_server__tool' }; + const invalidToolCall = { name: 'mcp_other_server_tool' }; expect( (await engine.check(invalidToolCall, 'safe_server')).decision, ).toBe(PolicyDecision.ASK_USER); @@ -647,13 +626,14 @@ describe('PolicyEngine', () => { it('should allow when both serverName and tool name prefix match', async () => { const rules: PolicyRule[] = [ { - toolName: 'safe_server__*', + toolName: 'mcp_safe_server_*', + mcpName: 'safe_server', decision: PolicyDecision.ALLOW, }, ]; engine = new PolicyEngine({ rules }); - const validToolCall = { name: 'safe_server__tool' }; + const validToolCall = { name: 'mcp_safe_server_tool' }; expect((await engine.check(validToolCall, 'safe_server')).decision).toBe( PolicyDecision.ALLOW, ); @@ -2007,11 +1987,16 @@ describe('PolicyEngine', () => { it('should support wildcard patterns for checkers', async () => { const rules: PolicyRule[] = [ - { toolName: 'server__tool', decision: PolicyDecision.ALLOW }, + { + toolName: 'mcp_server_tool', + mcpName: 'server', + decision: PolicyDecision.ALLOW, + }, ]; const wildcardChecker: SafetyCheckerRule = { checker: { type: 'external', name: 'wildcard' }, - toolName: 'server__*', + toolName: 'mcp_server_*', + mcpName: 'server', }; engine = new PolicyEngine( @@ -2023,7 +2008,7 @@ describe('PolicyEngine', () => { decision: SafetyCheckDecision.ALLOW, }); - await engine.check({ name: 'server__tool' }, 'server'); + await engine.check({ name: 'mcp_server_tool' }, 'server'); expect(mockCheckerRunner.runChecker).toHaveBeenCalledWith( expect.anything(), @@ -2137,6 +2122,8 @@ describe('PolicyEngine', () => { rules: PolicyRule[]; approvalMode?: ApprovalMode; nonInteractive?: boolean; + allToolNames?: string[]; + metadata?: Map>; expected: string[]; } @@ -2144,11 +2131,13 @@ describe('PolicyEngine', () => { { name: 'should return empty set when no rules provided', rules: [], + allToolNames: ['tool1'], expected: [], }, { name: 'should apply rules without explicit modes to all modes', rules: [{ toolName: 'tool1', decision: PolicyDecision.DENY }], + allToolNames: ['tool1', 'tool2'], expected: ['tool1'], }, { @@ -2168,6 +2157,7 @@ describe('PolicyEngine', () => { modes: [ApprovalMode.DEFAULT], }, ], + allToolNames: ['tool1'], expected: [], }, { @@ -2184,6 +2174,7 @@ describe('PolicyEngine', () => { modes: [ApprovalMode.DEFAULT], }, ], + allToolNames: ['tool1', 'tool2', 'tool3'], expected: ['tool1'], }, { @@ -2202,6 +2193,7 @@ describe('PolicyEngine', () => { modes: [ApprovalMode.DEFAULT], }, ], + allToolNames: ['tool1'], expected: ['tool1'], }, { @@ -2220,6 +2212,7 @@ describe('PolicyEngine', () => { modes: [ApprovalMode.DEFAULT], }, ], + allToolNames: ['tool1'], expected: [], }, { @@ -2232,7 +2225,8 @@ describe('PolicyEngine', () => { }, ], nonInteractive: true, - expected: [], + allToolNames: ['tool1'], + expected: ['tool1'], }, { name: 'should ignore rules with argsPattern', @@ -2244,6 +2238,7 @@ describe('PolicyEngine', () => { modes: [ApprovalMode.DEFAULT], }, ], + allToolNames: ['tool1'], expected: [], }, { @@ -2256,6 +2251,7 @@ describe('PolicyEngine', () => { }, ], approvalMode: ApprovalMode.PLAN, + allToolNames: ['tool1'], expected: ['tool1'], }, { @@ -2268,6 +2264,7 @@ describe('PolicyEngine', () => { }, ], approvalMode: ApprovalMode.DEFAULT, + allToolNames: ['tool1'], expected: [], }, { @@ -2286,36 +2283,55 @@ describe('PolicyEngine', () => { }, ], approvalMode: ApprovalMode.YOLO, + allToolNames: ['dangerous-tool', 'safe-tool'], expected: [], }, { name: 'should respect server wildcard DENY', rules: [ { - toolName: 'server__*', + toolName: 'mcp_server_*', + mcpName: 'server', decision: PolicyDecision.DENY, modes: [ApprovalMode.DEFAULT], }, ], - expected: ['server__*'], + allToolNames: [ + 'mcp_server_tool1', + 'mcp_server_tool2', + 'mcp_other_tool', + ], + metadata: new Map([ + ['mcp_server_tool1', { _serverName: 'server' }], + ['mcp_server_tool2', { _serverName: 'server' }], + ['mcp_other_tool', { _serverName: 'other' }], + ]), + expected: ['mcp_server_tool1', 'mcp_server_tool2'], }, { name: 'should expand server wildcard for specific tools if already processed', rules: [ { - toolName: 'server__*', + toolName: 'mcp_server_*', + mcpName: 'server', decision: PolicyDecision.DENY, priority: 100, modes: [ApprovalMode.DEFAULT], }, { - toolName: 'server__tool1', - decision: PolicyDecision.DENY, + toolName: 'mcp_server_tool1', + mcpName: 'server', + decision: PolicyDecision.DENY, // redundant but tests ordering priority: 10, modes: [ApprovalMode.DEFAULT], }, ], - expected: ['server__*', 'server__tool1'], + allToolNames: ['mcp_server_tool1', 'mcp_server_tool2'], + metadata: new Map([ + ['mcp_server_tool1', { _serverName: 'server' }], + ['mcp_server_tool2', { _serverName: 'server' }], + ]), + expected: ['mcp_server_tool1', 'mcp_server_tool2'], }, { name: 'should exclude run_shell_command but NOT write_file in simulated Plan Mode', @@ -2342,24 +2358,29 @@ describe('PolicyEngine', () => { priority: 10, }, ], - expected: ['run_shell_command'], + allToolNames: ['write_file', 'run_shell_command', 'read_file'], + expected: ['run_shell_command', 'read_file'], }, { name: 'should NOT exclude tool if covered by a higher priority wildcard ALLOW', rules: [ { - toolName: 'server__*', + toolName: 'mcp_server_*', + mcpName: 'server', decision: PolicyDecision.ALLOW, priority: 100, modes: [ApprovalMode.DEFAULT], }, { - toolName: 'server__tool1', + toolName: 'mcp_server_tool1', + mcpName: 'server', decision: PolicyDecision.DENY, priority: 10, modes: [ApprovalMode.DEFAULT], }, ], + allToolNames: ['mcp_server_tool1'], + metadata: new Map([['mcp_server_tool1', { _serverName: 'server' }]]), expected: [], }, { @@ -2371,41 +2392,63 @@ describe('PolicyEngine', () => { priority: 10, }, ], - expected: ['*'], + allToolNames: ['toolA', 'toolB', 'mcp_server_toolC'], + expected: ['toolA', 'toolB', 'mcp_server_toolC'], // all tools denied by * }, { name: 'should handle MCP category wildcard *__* in getExcludedTools', rules: [ { - toolName: '*__*', + toolName: 'mcp_*', decision: PolicyDecision.DENY, priority: 10, }, ], - expected: ['*__*'], + allToolNames: ['localTool', 'mcp_myserver_mytool'], + metadata: new Map([ + ['mcp_myserver_mytool', { _serverName: 'myserver' }], + ]), + expected: ['mcp_myserver_mytool'], }, { - name: 'should handle tool wildcard *__search in getExcludedTools', + name: 'should handle tool wildcard mcp_server_* in getExcludedTools', rules: [ { - toolName: '*__search', + toolName: 'mcp_server_*', decision: PolicyDecision.DENY, priority: 10, }, ], - expected: ['*__search'], + allToolNames: [ + 'localTool', + 'mcp_server_search', + 'mcp_otherserver_read', + ], + metadata: new Map([ + ['mcp_server_search', { _serverName: 'server' }], + ['mcp_otherserver_read', { _serverName: 'otherserver' }], + ]), + expected: ['mcp_server_search'], }, ]; it.each(testCases)( '$name', - ({ rules, approvalMode, nonInteractive, expected }) => { + ({ + rules, + approvalMode, + nonInteractive, + allToolNames, + metadata, + expected, + }) => { engine = new PolicyEngine({ rules, approvalMode: approvalMode ?? ApprovalMode.DEFAULT, nonInteractive: nonInteractive ?? false, }); - const excluded = engine.getExcludedTools(); + const toolsSet = allToolNames ? new Set(allToolNames) : undefined; + const excluded = engine.getExcludedTools(metadata, toolsSet); expect(Array.from(excluded).sort()).toEqual(expected.sort()); }, ); @@ -2420,7 +2463,10 @@ describe('PolicyEngine', () => { }, ], }); - const excluded = engine.getExcludedTools(); + const excluded = engine.getExcludedTools( + undefined, + new Set(['dangerous_tool']), + ); expect(Array.from(excluded)).toEqual([]); }); @@ -2438,7 +2484,10 @@ describe('PolicyEngine', () => { ['dangerous_tool', { destructiveHint: true }], ['safe_tool', { readOnlyHint: true }], ]); - const excluded = engine.getExcludedTools(metadata); + const excluded = engine.getExcludedTools( + metadata, + new Set(['dangerous_tool', 'safe_tool']), + ); expect(Array.from(excluded)).toEqual(['dangerous_tool']); }); @@ -2455,7 +2504,10 @@ describe('PolicyEngine', () => { const metadata = new Map>([ ['safe_tool', { readOnlyHint: true }], ]); - const excluded = engine.getExcludedTools(metadata); + const excluded = engine.getExcludedTools( + metadata, + new Set(['safe_tool']), + ); expect(Array.from(excluded)).toEqual([]); }); @@ -2463,7 +2515,8 @@ describe('PolicyEngine', () => { engine = new PolicyEngine({ rules: [ { - toolName: 'server__*', + toolName: 'mcp_server_*', + mcpName: 'server', toolAnnotations: { destructiveHint: true }, decision: PolicyDecision.DENY, priority: 10, @@ -2471,12 +2524,25 @@ describe('PolicyEngine', () => { ], }); const metadata = new Map>([ - ['server__dangerous_tool', { destructiveHint: true }], - ['other__dangerous_tool', { destructiveHint: true }], - ['server__safe_tool', { readOnlyHint: true }], + [ + 'mcp_server_dangerous_tool', + { destructiveHint: true, _serverName: 'server' }, + ], + [ + 'mcp_other_dangerous_tool', + { destructiveHint: true, _serverName: 'other' }, + ], + ['mcp_server_safe_tool', { readOnlyHint: true, _serverName: 'server' }], ]); - const excluded = engine.getExcludedTools(metadata); - expect(Array.from(excluded)).toEqual(['server__dangerous_tool']); + const excluded = engine.getExcludedTools( + metadata, + new Set([ + 'mcp_server_dangerous_tool', + 'mcp_other_dangerous_tool', + 'mcp_server_safe_tool', + ]), + ); + expect(Array.from(excluded)).toEqual(['mcp_server_dangerous_tool']); }); it('should exclude unprocessed tools from allToolNames when global DENY is active', () => { @@ -2493,8 +2559,8 @@ describe('PolicyEngine', () => { priority: 70, }, { - // Simulates plan.toml: mcpName="*" → toolName="*__*" - toolName: '*__*', + // Simulates plan.toml: mcpName="*" → toolName="mcp_*" + toolName: 'mcp_*', toolAnnotations: { readOnlyHint: true }, decision: PolicyDecision.ASK_USER, priority: 70, @@ -2505,36 +2571,42 @@ describe('PolicyEngine', () => { }, ], }); - // MCP tools are registered with unqualified names in ToolRegistry + // MCP tools are registered with qualified names in ToolRegistry const allToolNames = new Set([ 'glob', 'read_file', 'shell', 'web_fetch', - 'read_mcp_tool', - 'write_mcp_tool', + 'mcp_my-server_read_mcp_tool', + 'mcp_my-server_write_mcp_tool', ]); // buildToolMetadata() includes _serverName for MCP tools const toolMetadata = new Map>([ - ['read_mcp_tool', { readOnlyHint: true, _serverName: 'my-server' }], - ['write_mcp_tool', { readOnlyHint: false, _serverName: 'my-server' }], + [ + 'mcp_my-server_read_mcp_tool', + { readOnlyHint: true, _serverName: 'my-server' }, + ], + [ + 'mcp_my-server_write_mcp_tool', + { readOnlyHint: false, _serverName: 'my-server' }, + ], ]); const excluded = engine.getExcludedTools(toolMetadata, allToolNames); expect(excluded.has('shell')).toBe(true); expect(excluded.has('web_fetch')).toBe(true); // Non-read-only MCP tool excluded by catch-all DENY - expect(excluded.has('write_mcp_tool')).toBe(true); + expect(excluded.has('mcp_my-server_write_mcp_tool')).toBe(true); expect(excluded.has('glob')).toBe(false); expect(excluded.has('read_file')).toBe(false); // Read-only MCP tool allowed by annotation rule - expect(excluded.has('read_mcp_tool')).toBe(false); + expect(excluded.has('mcp_my-server_read_mcp_tool')).toBe(false); }); - it('should match already-qualified MCP tool names without _serverName', () => { + it('should match MCP wildcard rules when explicitly mapped with _serverName', () => { engine = new PolicyEngine({ rules: [ { - toolName: '*__*', + toolName: 'mcp_*', toolAnnotations: { readOnlyHint: true }, decision: PolicyDecision.ASK_USER, priority: 70, @@ -2547,17 +2619,23 @@ describe('PolicyEngine', () => { }); // Tool registered with qualified name (collision case) const allToolNames = new Set([ - 'myserver__read_tool', - 'myserver__write_tool', + 'mcp_myserver_read_tool', + 'mcp_myserver_write_tool', ]); const toolMetadata = new Map>([ - ['myserver__read_tool', { readOnlyHint: true }], - ['myserver__write_tool', { readOnlyHint: false }], + [ + 'mcp_myserver_read_tool', + { readOnlyHint: true, _serverName: 'myserver' }, + ], + [ + 'mcp_myserver_write_tool', + { readOnlyHint: false, _serverName: 'myserver' }, + ], ]); const excluded = engine.getExcludedTools(toolMetadata, allToolNames); - // Qualified name already contains __, matched directly without _serverName - expect(excluded.has('myserver__read_tool')).toBe(false); - expect(excluded.has('myserver__write_tool')).toBe(true); + // Qualified name matched using explicit _serverName + expect(excluded.has('mcp_myserver_read_tool')).toBe(false); + expect(excluded.has('mcp_myserver_write_tool')).toBe(true); }); it('should not exclude unprocessed tools when allToolNames is not provided (backward compat)', () => { @@ -2648,7 +2726,7 @@ describe('PolicyEngine', () => { modes: [ApprovalMode.PLAN], }, { - toolName: '*__*', + toolName: 'mcp_*', toolAnnotations: { readOnlyHint: true }, decision: PolicyDecision.ASK_USER, priority: 70, @@ -2679,13 +2757,19 @@ describe('PolicyEngine', () => { 'write_todos', 'memory', 'save_memory', - 'read_tool', - 'write_tool', + 'mcp_mcp-server_read_tool', + 'mcp_mcp-server_write_tool', ]); // buildToolMetadata() includes _serverName for MCP tools const toolMetadata = new Map>([ - ['read_tool', { readOnlyHint: true, _serverName: 'mcp-server' }], - ['write_tool', { readOnlyHint: false, _serverName: 'mcp-server' }], + [ + 'mcp_mcp-server_read_tool', + { readOnlyHint: true, _serverName: 'mcp-server' }, + ], + [ + 'mcp_mcp-server_write_tool', + { readOnlyHint: false, _serverName: 'mcp-server' }, + ], ]); const excluded = engine.getExcludedTools(toolMetadata, allToolNames); // These should be excluded (caught by catch-all DENY) @@ -2698,7 +2782,7 @@ describe('PolicyEngine', () => { expect(excluded.has('write_file')).toBe(true); expect(excluded.has('replace')).toBe(true); // Non-read-only MCP tool excluded by catch-all DENY - expect(excluded.has('write_tool')).toBe(true); + expect(excluded.has('mcp_mcp-server_write_tool')).toBe(true); // These should NOT be excluded (explicitly allowed) expect(excluded.has('glob')).toBe(false); expect(excluded.has('grep_search')).toBe(false); @@ -2710,7 +2794,7 @@ describe('PolicyEngine', () => { expect(excluded.has('exit_plan_mode')).toBe(false); expect(excluded.has('save_memory')).toBe(false); // Read-only MCP tool allowed by annotation rule (matched via _serverName) - expect(excluded.has('read_tool')).toBe(false); + expect(excluded.has('mcp_mcp-server_read_tool')).toBe(false); }); }); @@ -3058,13 +3142,13 @@ describe('PolicyEngine', () => { engine = new PolicyEngine({ rules: [ { - toolName: '*__*', + toolName: 'mcp_*', toolAnnotations: { experimental: true }, decision: PolicyDecision.DENY, priority: 20, }, { - toolName: '*__*', + toolName: 'mcp_*', decision: PolicyDecision.ALLOW, priority: 10, }, @@ -3073,14 +3157,14 @@ describe('PolicyEngine', () => { expect( ( - await engine.check({ name: 'mcp__test' }, 'mcp', { + await engine.check({ name: 'mcp_mcp_test' }, 'mcp', { experimental: true, }) ).decision, ).toBe(PolicyDecision.DENY); expect( ( - await engine.check({ name: 'mcp__stable' }, 'mcp', { + await engine.check({ name: 'mcp_mcp_stable' }, 'mcp', { experimental: false, }) ).decision, diff --git a/packages/core/src/policy/policy-engine.ts b/packages/core/src/policy/policy-engine.ts index 03087716ff..0d6a043da0 100644 --- a/packages/core/src/policy/policy-engine.ts +++ b/packages/core/src/policy/policy-engine.ts @@ -25,6 +25,11 @@ import { hasRedirection, } from '../utils/shell-utils.js'; import { getToolAliases } from '../tools/tool-names.js'; +import { + MCP_TOOL_PREFIX, + isMcpToolAnnotation, + parseMcpToolName, +} from '../tools/mcp-tool.js'; function isWildcardPattern(name: string): boolean { return name === '*' || name.includes('*'); @@ -32,7 +37,7 @@ function isWildcardPattern(name: string): boolean { /** * Checks if a tool call matches a wildcard pattern. - * Supports global (*) and composite (server__*, *__tool, *__*) patterns. + * Supports global (*) and the explicit MCP (*mcp_serverName_**) format. */ function matchesWildcard( pattern: string, @@ -43,59 +48,25 @@ function matchesWildcard( return true; } - if (pattern.includes('__')) { - return matchesCompositePattern(pattern, toolName, serverName); + if (pattern === `${MCP_TOOL_PREFIX}*`) { + return serverName !== undefined; } + if (pattern.startsWith(MCP_TOOL_PREFIX) && pattern.endsWith('_*')) { + const expectedServerName = pattern.slice(MCP_TOOL_PREFIX.length, -2); + // 1. Must be an MCP tool call (has serverName) + // 2. Server name must match + // 3. Tool name must be properly qualified by that server + if (serverName === undefined || serverName !== expectedServerName) { + return false; + } + return toolName.startsWith(`${MCP_TOOL_PREFIX}${expectedServerName}_`); + } + + // Not a recognized wildcard pattern, fallback to exact match just in case return toolName === pattern; } -/** - * Matches composite patterns like "server__*", "*__tool", or "*__*". - */ -function matchesCompositePattern( - pattern: string, - toolName: string, - serverName: string | undefined, -): boolean { - const parts = pattern.split('__'); - if (parts.length !== 2) return false; - const [patternServer, patternTool] = parts; - - // 1. Identify the tool's components - const { actualServer, actualTool } = getToolMetadata(toolName, serverName); - - // 2. Composite patterns require a server context - if (actualServer === undefined) { - return false; - } - - // 3. Robustness: if serverName is provided, toolName MUST be qualified by it. - // This prevents "malicious-server" from spoofing "trusted-server" by naming itself "trusted-server__malicious". - if (serverName !== undefined && !toolName.startsWith(serverName + '__')) { - return false; - } - - // 4. Match components - const serverMatch = patternServer === '*' || patternServer === actualServer; - const toolMatch = patternTool === '*' || patternTool === actualTool; - - return serverMatch && toolMatch; -} - -/** - * Extracts the server and unqualified tool name from a tool call context. - */ -function getToolMetadata(toolName: string, serverName: string | undefined) { - const sepIndex = toolName.indexOf('__'); - const isQualified = sepIndex !== -1; - return { - actualServer: - serverName ?? (isQualified ? toolName.substring(0, sepIndex) : undefined), - actualTool: isQualified ? toolName.substring(sepIndex + 2) : toolName, - }; -} - function ruleMatches( rule: PolicyRule | SafetyCheckerRule, toolCall: FunctionCall, @@ -111,9 +82,20 @@ function ruleMatches( } } + // Strictly enforce mcpName identity if the rule dictates it + if (rule.mcpName) { + if (rule.mcpName === '*') { + // Rule requires it to be ANY MCP tool + if (serverName === undefined) return false; + } else { + // Rule requires it to be a specific MCP server + if (serverName !== rule.mcpName) return false; + } + } + // Check tool name if specified if (rule.toolName) { - // Support wildcard patterns: "serverName__*" matches "serverName__anyTool" + // Support wildcard patterns: "mcp_serverName_*" matches "mcp_serverName_anyTool" if (rule.toolName === '*') { // Match all tools } else if (isWildcardPattern(rule.toolName)) { @@ -371,6 +353,22 @@ export class PolicyEngine { serverName: string | undefined, toolAnnotations?: Record, ): Promise { + // Case 1: Metadata injection is the primary and safest way to identify an MCP server. + // If we have explicit `_serverName` metadata (usually injected by tool-registry for active tools), use it. + if (!serverName && isMcpToolAnnotation(toolAnnotations)) { + serverName = toolAnnotations._serverName; + } + + // Case 2: Fallback for static FQN strings (e.g. from TOML policies or allowed/excluded settings strings). + // These strings don't have active metadata objects associated with them during policy generation, + // so we must extract the server name from the qualified `mcp_{server}_{tool}` format. + if (!serverName && toolCall.name) { + const parsed = parseMcpToolName(toolCall.name); + if (parsed.serverName) { + serverName = parsed.serverName; + } + } + let stringifiedArgs: string | undefined; // Compute stringified args once before the loop if ( @@ -404,20 +402,12 @@ export class PolicyEngine { let matchedRule: PolicyRule | undefined; let decision: PolicyDecision | undefined; - // For tools with a server name, we want to try matching both the - // original name and the fully qualified name (server__tool). // We also want to check legacy aliases for the tool name. const toolNamesToTry = toolCall.name ? getToolAliases(toolCall.name) : []; const toolCallsToTry: FunctionCall[] = []; for (const name of toolNamesToTry) { toolCallsToTry.push({ ...toolCall, name }); - if (serverName && !name.includes('__')) { - toolCallsToTry.push({ - ...toolCall, - name: `${serverName}__${name}`, - }); - } } for (const rule of this.rules) { @@ -654,145 +644,69 @@ export class PolicyEngine { allToolNames?: Set, ): Set { const excludedTools = new Set(); - const processedTools = new Set(); - let globalVerdict: PolicyDecision | undefined; - for (const rule of this.rules) { - if (rule.argsPattern) { - if (rule.toolName && rule.decision !== PolicyDecision.DENY) { - processedTools.add(rule.toolName); - } - continue; - } - - // Check if rule applies to current approval mode - if (rule.modes && rule.modes.length > 0) { - if (!rule.modes.includes(this.approvalMode)) { - continue; - } - } - - // Handle annotation-based rules - if (rule.toolAnnotations) { - if (!toolMetadata) { - // Without metadata, we can't evaluate annotation rules — skip (conservative fallback) - continue; - } - // Iterate over all known tools and check if their annotations match this rule - for (const [toolName, annotations] of toolMetadata) { - if (processedTools.has(toolName)) { - continue; - } - // Check if annotations match the rule's toolAnnotations (partial match) - let annotationsMatch = true; - for (const [key, value] of Object.entries(rule.toolAnnotations)) { - if (annotations[key] !== value) { - annotationsMatch = false; - break; - } - } - if (!annotationsMatch) { - continue; - } - // Check if the tool name matches the rule's toolName pattern (if any) - if (rule.toolName) { - if (isWildcardPattern(rule.toolName)) { - // For composite patterns (e.g. "*__*"), construct a qualified - // name from metadata so matchesWildcard can resolve it. - const rawServerName = annotations['_serverName']; - const serverName = - typeof rawServerName === 'string' ? rawServerName : undefined; - const qualifiedName = - serverName && !toolName.includes('__') - ? `${serverName}__${toolName}` - : toolName; - if (!matchesWildcard(rule.toolName, qualifiedName, undefined)) { - continue; - } - } else if (toolName !== rule.toolName) { - continue; - } - } - // Determine decision considering global verdict - let decision: PolicyDecision; - if (globalVerdict !== undefined) { - decision = globalVerdict; - } else { - decision = rule.decision; - } - if (decision === PolicyDecision.DENY) { - excludedTools.add(toolName); - } - processedTools.add(toolName); - } - continue; - } - - // Handle Global Rules - if (!rule.toolName) { - if (globalVerdict === undefined) { - globalVerdict = rule.decision; - if (globalVerdict !== PolicyDecision.DENY) { - // Global ALLOW/ASK found. - // Since rules are sorted by priority, this overrides any lower-priority rules. - // We can stop processing because nothing else will be excluded. - break; - } - // If Global DENY, we continue to find specific tools to add to excluded set - } - continue; - } - - const toolName = rule.toolName; - - // Check if already processed (exact match) - if (processedTools.has(toolName)) { - continue; - } - - // Check if covered by a processed wildcard - let coveredByWildcard = false; - for (const processed of processedTools) { - if ( - isWildcardPattern(processed) && - matchesWildcard(processed, toolName, undefined) - ) { - // It's covered by a higher-priority wildcard rule. - // If that wildcard rule resulted in exclusion, this tool should also be excluded. - if (excludedTools.has(processed)) { - excludedTools.add(toolName); - } - coveredByWildcard = true; - break; - } - } - if (coveredByWildcard) { - continue; - } - - processedTools.add(toolName); - - // Determine decision - let decision: PolicyDecision; - if (globalVerdict !== undefined) { - decision = globalVerdict; - } else { - decision = rule.decision; - } - - if (decision === PolicyDecision.DENY) { - excludedTools.add(toolName); - } + if (!allToolNames) { + return excludedTools; } - // If there's a global DENY and we know all tool names, exclude any tool - // that wasn't explicitly allowed by a higher-priority rule. - if (globalVerdict === PolicyDecision.DENY && allToolNames) { - for (const name of allToolNames) { - if (!processedTools.has(name)) { - excludedTools.add(name); + for (const toolName of allToolNames) { + const annotations = toolMetadata?.get(toolName); + const serverName = isMcpToolAnnotation(annotations) + ? annotations._serverName + : undefined; + + let staticallyExcluded = false; + let matchFound = false; + + // Evaluate rules in priority order (they are already sorted in constructor) + for (const rule of this.rules) { + // Create a copy of the rule without argsPattern to see if it targets the tool + // regardless of the runtime arguments it might receive. + const ruleWithoutArgs: PolicyRule = { ...rule, argsPattern: undefined }; + const toolCall: FunctionCall = { name: toolName, args: {} }; + + const appliesToTool = ruleMatches( + ruleWithoutArgs, + toolCall, + undefined, // stringifiedArgs + serverName, + this.approvalMode, + annotations, + ); + + if (appliesToTool) { + if (rule.argsPattern) { + // Exclusions only apply statically before arguments are known. + if (rule.decision !== PolicyDecision.DENY) { + // Conditionally allowed/asked based on args. Therefore NOT statically excluded. + staticallyExcluded = false; + matchFound = true; + break; + } + // If it's conditionally DENIED based on args, it means it's not unconditionally denied. + // We must keep evaluating lower priority rules to see the default/unconditional state. + continue; + } else { + // Unconditional rule for this tool + const decision = this.applyNonInteractiveMode(rule.decision); + staticallyExcluded = decision === PolicyDecision.DENY; + matchFound = true; + break; + } } } + + if (!matchFound) { + // Fallback to default decision if no rule matches + const defaultDec = this.applyNonInteractiveMode(this.defaultDecision); + if (defaultDec === PolicyDecision.DENY) { + staticallyExcluded = true; + } + } + + if (staticallyExcluded) { + excludedTools.add(toolName); + } } return excludedTools; diff --git a/packages/core/src/policy/toml-loader.test.ts b/packages/core/src/policy/toml-loader.test.ts index a65248cfea..45501879c4 100644 --- a/packages/core/src/policy/toml-loader.test.ts +++ b/packages/core/src/policy/toml-loader.test.ts @@ -129,7 +129,7 @@ priority = 10 `); expect(result.rules).toHaveLength(1); - expect(result.rules[0].toolName).toBe('*__*'); + expect(result.rules[0].toolName).toBe('mcp_*'); expect(result.rules[0].decision).toBe(PolicyDecision.ASK_USER); expect(result.errors).toHaveLength(0); }); @@ -144,7 +144,7 @@ priority = 10 `); expect(result.rules).toHaveLength(1); - expect(result.rules[0].toolName).toBe('*__search'); + expect(result.rules[0].toolName).toBe('mcp_*_search'); expect(result.errors).toHaveLength(0); }); @@ -215,8 +215,12 @@ priority = 100 `); expect(result.rules).toHaveLength(2); - expect(result.rules[0].toolName).toBe('google-workspace__calendar.list'); - expect(result.rules[1].toolName).toBe('google-workspace__calendar.get'); + expect(result.rules[0].toolName).toBe( + 'mcp_google-workspace_calendar.list', + ); + expect(result.rules[1].toolName).toBe( + 'mcp_google-workspace_calendar.get', + ); expect(result.errors).toHaveLength(0); }); @@ -678,12 +682,12 @@ priority = 100 it('should not warn for MCP format tool names', async () => { const result = await runLoadPoliciesFromToml(` [[rule]] -toolName = "my-server__my-tool" +toolName = "mcp_my-server_my-tool" decision = "allow" priority = 100 [[rule]] -toolName = "my-server__*" +toolName = "mcp_my-server_*" decision = "allow" priority = 100 `); @@ -822,7 +826,7 @@ priority = 100 annotationRule, 'Should have loaded a rule with toolAnnotations', ).toBeDefined(); - expect(annotationRule!.toolName).toBe('*__*'); + expect(annotationRule!.toolName).toBe('mcp_*'); expect(annotationRule!.toolAnnotations).toEqual({ readOnlyHint: true, }); @@ -863,7 +867,7 @@ priority = 100 // 4. MCP tool WITHOUT annotations should be DENIED const denyResult = await engine.check( - { name: 'github__create_issue' }, + { name: 'mcp_github_create_issue' }, 'github', undefined, ); @@ -874,7 +878,7 @@ priority = 100 // 5. MCP tool with readOnlyHint=false should also be DENIED const denyResult2 = await engine.check( - { name: 'github__delete_issue' }, + { name: 'mcp_github_delete_issue' }, 'github', { readOnlyHint: false }, ); @@ -883,9 +887,9 @@ priority = 100 'MCP tool with readOnlyHint=false should be DENIED in Plan Mode', ).toBe(PolicyDecision.DENY); - // 6. Test with qualified tool name format (server__tool) but no separate serverName + // 6. Test with qualified tool name format (mcp_server_tool) but no separate serverName const qualifiedResult = await engine.check( - { name: 'github__list_repos' }, + { name: 'mcp_github_list_repos' }, undefined, { readOnlyHint: true }, ); @@ -990,7 +994,8 @@ priority = 100 ['people.getMe', 'calendar.list', 'calendar.get'], [ { - toolName: 'google-workspace__people.getxMe', + toolName: 'mcp_google-workspace_people.getxMe', + mcpName: 'google-workspace', source: 'User: workspace.toml', }, ], @@ -1007,8 +1012,14 @@ priority = 100 'google-workspace', ['people.getMe', 'calendar.list'], [ - { toolName: 'google-workspace__people.getMe' }, - { toolName: 'google-workspace__calendar.list' }, + { + toolName: 'mcp_google-workspace_people.getMe', + mcpName: 'google-workspace', + }, + { + toolName: 'mcp_google-workspace_calendar.list', + mcpName: 'google-workspace', + }, ], ); @@ -1019,7 +1030,7 @@ priority = 100 const warnings = validateMcpPolicyToolNames( 'my-server', ['tool1', 'tool2'], - [{ toolName: 'my-server__*' }], + [{ toolName: 'mcp_my-server_*', mcpName: 'my-server' }], ); expect(warnings).toHaveLength(0); @@ -1029,7 +1040,7 @@ priority = 100 const warnings = validateMcpPolicyToolNames( 'server-a', ['tool1'], - [{ toolName: 'server-b__toolx' }], + [{ toolName: 'mcp_server-b_toolx', mcpName: 'server-b' }], ); expect(warnings).toHaveLength(0); @@ -1039,7 +1050,12 @@ priority = 100 const warnings = validateMcpPolicyToolNames( 'my-server', ['tool1', 'tool2'], - [{ toolName: 'my-server__completely_different_name' }], + [ + { + toolName: 'mcp_my-server_completely_different_name', + mcpName: 'my-server', + }, + ], ); expect(warnings).toHaveLength(0); @@ -1059,7 +1075,13 @@ priority = 100 const warnings = validateMcpPolicyToolNames( 'my-server', ['tool1'], - [{ toolName: 'my-server__tol1', source: 'User: custom.toml' }], + [ + { + toolName: 'mcp_my-server_tol1', + mcpName: 'my-server', + source: 'User: custom.toml', + }, + ], ); expect(warnings).toHaveLength(1); diff --git a/packages/core/src/policy/toml-loader.ts b/packages/core/src/policy/toml-loader.ts index d2a24aa100..c91930a21d 100644 --- a/packages/core/src/policy/toml-loader.ts +++ b/packages/core/src/policy/toml-loader.ts @@ -24,6 +24,7 @@ import path from 'node:path'; import toml from '@iarna/toml'; import { z, type ZodError } from 'zod'; import { isNodeError } from '../utils/errors.js'; +import { MCP_TOOL_PREFIX, formatMcpToolName } from '../tools/mcp-tool.js'; /** * Maximum Levenshtein distance to consider a name a likely typo of a built-in tool. @@ -262,11 +263,15 @@ function validateShellCommandSyntax( * tool name, or null if valid or not close to any built-in name. */ function validateToolName(name: string, ruleIndex: number): string | null { + if (name.includes('__')) { + return `Rule #${ruleIndex + 1}: The "__" syntax for MCP tools is strictly deprecated. Please use the 'mcpName = "..."' property or the 'mcp_server_tool' format instead.`; + } + // A name that looks like an MCP tool (e.g., "re__ad") could be a typo of a // built-in tool ("read_file"). We should let such names fall through to the // Levenshtein distance check below. Non-MCP-like names that are valid can // be safely skipped. - if (isValidToolName(name, { allowWildcards: true }) && !name.includes('__')) { + if (isValidToolName(name, { allowWildcards: true })) { return null; } @@ -402,8 +407,8 @@ export async function loadPoliciesFromToml( // Validate tool names in rules for (let i = 0; i < tomlRules.length; i++) { const rule = tomlRules[i]; - // Skip MCP-scoped rules — MCP tool names are server-defined and dynamic - if (rule.mcpName) continue; + // We no longer skip MCP-scoped rules because we need to specifically + // warn users if they use deprecated "__" syntax for MCP tool names const toolNames: string[] = rule.toolName ? Array.isArray(rule.toolName) @@ -447,18 +452,19 @@ export async function loadPoliciesFromToml( // Create a policy rule for each tool name return toolNames.map((toolName) => { - // Transform mcpName field to composite toolName format - let effectiveToolName: string | undefined; - if (rule.mcpName && toolName) { - effectiveToolName = `${rule.mcpName}__${toolName}`; - } else if (rule.mcpName) { - effectiveToolName = `${rule.mcpName}__*`; - } else { - effectiveToolName = toolName; + let effectiveToolName: string | undefined = toolName; + const mcpName = rule.mcpName; + + if (mcpName) { + effectiveToolName = formatMcpToolName( + mcpName, + effectiveToolName, + ); } const policyRule: PolicyRule = { toolName: effectiveToolName, + mcpName: rule.mcpName, decision: rule.decision, priority: transformPriority(rule.priority, tier), modes: rule.modes, @@ -563,15 +569,16 @@ export async function loadPoliciesFromToml( return toolNames.map((toolName) => { let effectiveToolName: string | undefined; if (checker.mcpName && toolName) { - effectiveToolName = `${checker.mcpName}__${toolName}`; + effectiveToolName = `${MCP_TOOL_PREFIX}${checker.mcpName}_${toolName}`; } else if (checker.mcpName) { - effectiveToolName = `${checker.mcpName}__*`; + effectiveToolName = `${MCP_TOOL_PREFIX}${checker.mcpName}_*`; } else { effectiveToolName = toolName; } const safetyCheckerRule: SafetyCheckerRule = { toolName: effectiveToolName, + mcpName: checker.mcpName, priority: transformPriority(checker.priority, tier), // eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion checker: checker.checker as SafetyCheckerConfig, @@ -655,16 +662,28 @@ export async function loadPoliciesFromToml( export function validateMcpPolicyToolNames( serverName: string, discoveredToolNames: string[], - policyRules: ReadonlyArray<{ toolName?: string; source?: string }>, + policyRules: ReadonlyArray<{ + toolName?: string; + mcpName?: string; + source?: string; + }>, ): string[] { - const prefix = `${serverName}__`; + const prefix = `${MCP_TOOL_PREFIX}${serverName}_`; const warnings: string[] = []; for (const rule of policyRules) { if (!rule.toolName) continue; - if (!rule.toolName.startsWith(prefix)) continue; - const toolPart = rule.toolName.slice(prefix.length); + let toolPart: string | undefined; + + // The toolName is typically transformed into an FQN if mcpName was used. + if (rule.mcpName === serverName && rule.toolName.startsWith(prefix)) { + toolPart = rule.toolName.slice(prefix.length); + } else if (rule.toolName.startsWith(prefix)) { + toolPart = rule.toolName.slice(prefix.length); + } else { + continue; + } // Skip wildcards if (toolPart === '*') continue; diff --git a/packages/core/src/policy/types.ts b/packages/core/src/policy/types.ts index 18c621c176..f59821b093 100644 --- a/packages/core/src/policy/types.ts +++ b/packages/core/src/policy/types.ts @@ -110,6 +110,13 @@ export interface PolicyRule { */ toolName?: string; + /** + * Identifies the MCP server this rule applies to. + * Enables precise rule matching against `serverName` metadata instead + * of parsing composite string names. + */ + mcpName?: string; + /** * Pattern to match against tool arguments. * Can be used for more fine-grained control. @@ -166,6 +173,11 @@ export interface SafetyCheckerRule { */ toolName?: string; + /** + * Identifies the MCP server this rule applies to. + */ + mcpName?: string; + /** * Pattern to match against tool arguments. * Can be used for more fine-grained control. diff --git a/packages/core/src/prompts/promptProvider.test.ts b/packages/core/src/prompts/promptProvider.test.ts index b74f159e4f..e3905d1fd7 100644 --- a/packages/core/src/prompts/promptProvider.test.ts +++ b/packages/core/src/prompts/promptProvider.test.ts @@ -153,7 +153,7 @@ describe('PromptProvider', () => { const provider = new PromptProvider(); const prompt = provider.getCoreSystemPrompt(mockConfig); - expect(prompt).toContain('`mcp_read` (my-mcp-server)'); + expect(prompt).toContain('`mcp_my-mcp-server_mcp_read` (my-mcp-server)'); }); it('should include write constraint message in plan mode prompt', () => { diff --git a/packages/core/src/tools/mcp-tool.test.ts b/packages/core/src/tools/mcp-tool.test.ts index c72a0533e1..b8a97132d1 100644 --- a/packages/core/src/tools/mcp-tool.test.ts +++ b/packages/core/src/tools/mcp-tool.test.ts @@ -8,9 +8,12 @@ import type { Mocked } from 'vitest'; import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest'; import { safeJsonStringify } from '../utils/safeJsonStringify.js'; -import { DiscoveredMCPTool, generateValidName } from './mcp-tool.js'; // Added getStringifiedResultForDisplay -import type { ToolResult } from './tools.js'; -import { ToolConfirmationOutcome } from './tools.js'; // Added ToolConfirmationOutcome +import { + DiscoveredMCPTool, + generateValidName, + formatMcpToolName, +} from './mcp-tool.js'; // Added getStringifiedResultForDisplay +import { ToolConfirmationOutcome, type ToolResult } from './tools.js'; import type { CallableTool, Part } from '@google/genai'; import { ToolErrorType } from './tool-error.js'; import { @@ -43,23 +46,23 @@ const createSdkResponse = ( describe('generateValidName', () => { it('should return a valid name for a simple function', () => { - expect(generateValidName('myFunction')).toBe('myFunction'); + expect(generateValidName('myFunction')).toBe('mcp_myFunction'); }); it('should replace invalid characters with underscores', () => { expect(generateValidName('invalid-name with spaces')).toBe( - 'invalid-name_with_spaces', + 'mcp_invalid-name_with_spaces', ); }); it('should truncate long names', () => { expect(generateValidName('x'.repeat(80))).toBe( - 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxx...xxxxxxxxxxxxxxxxxxxxxxxxxxxxxx', + 'mcp_xxxxxxxxxxxxxxxxxxxxxxxxxx...xxxxxxxxxxxxxxxxxxxxxxxxxxxxxx', ); }); it('should handle names with only invalid characters', () => { - expect(generateValidName('!@#$%^&*()')).toBe('__________'); + expect(generateValidName('!@#$%^&*()')).toBe('mcp___________'); }); it.each([ @@ -74,6 +77,30 @@ describe('generateValidName', () => { ); }); +describe('formatMcpToolName', () => { + it('should format a fully qualified name', () => { + expect(formatMcpToolName('github', 'list_repos')).toBe( + 'mcp_github_list_repos', + ); + }); + + it('should handle global wildcards', () => { + expect(formatMcpToolName('*')).toBe('mcp_*'); + }); + + it('should handle tool-level wildcards', () => { + expect(formatMcpToolName('github', '*')).toBe('mcp_github_*'); + }); + + it('should handle undefined toolName as a tool-level wildcard', () => { + expect(formatMcpToolName('github')).toBe('mcp_github_*'); + }); + + it('should format explicitly global wildcard with specific tool', () => { + expect(formatMcpToolName('*', 'list_repos')).toBe('mcp_*_list_repos'); + }); +}); + describe('DiscoveredMCPTool', () => { const serverName = 'mock-mcp-server'; const serverToolName = 'actual-server-tool-name'; @@ -110,8 +137,10 @@ describe('DiscoveredMCPTool', () => { describe('constructor', () => { it('should set properties correctly', () => { - expect(tool.name).toBe(serverToolName); - expect(tool.schema.name).toBe(serverToolName); + expect(tool.name).toBe('mcp_mock-mcp-server_actual-server-tool-name'); + expect(tool.schema.name).toBe( + 'mcp_mock-mcp-server_actual-server-tool-name', + ); expect(tool.schema.description).toBe(baseDescription); expect(tool.schema.parameters).toBeUndefined(); expect(tool.schema.parametersJsonSchema).toEqual(inputSchema); @@ -937,31 +966,35 @@ describe('DiscoveredMCPTool', () => { describe('MCP Tool Naming Regression Fixes', () => { describe('generateValidName', () => { it('should replace spaces with underscores', () => { - expect(generateValidName('My Tool')).toBe('My_Tool'); + expect(generateValidName('My Tool')).toBe('mcp_My_Tool'); }); it('should allow colons', () => { - expect(generateValidName('namespace:tool')).toBe('namespace:tool'); + expect(generateValidName('namespace:tool')).toBe('mcp_namespace:tool'); }); it('should ensure name starts with a letter or underscore', () => { - expect(generateValidName('123-tool')).toBe('_123-tool'); - expect(generateValidName('-tool')).toBe('_-tool'); - expect(generateValidName('.tool')).toBe('_.tool'); + expect(generateValidName('valid_tool_name')).toBe('mcp_valid_tool_name'); + expect(generateValidName('alsoValid-123.name')).toBe( + 'mcp_alsoValid-123.name', + ); + expect(generateValidName('another:valid:name')).toBe( + 'mcp_another:valid:name', + ); }); it('should handle very long names by truncating in the middle', () => { const longName = 'a'.repeat(40) + '__' + 'b'.repeat(40); const result = generateValidName(longName); expect(result.length).toBeLessThanOrEqual(63); - expect(result).toMatch(/^a{30}\.\.\.b{30}$/); + expect(result).toMatch(/^mcp_a{26}\.\.\.b{30}$/); }); it('should handle very long names starting with a digit', () => { const longName = '1' + 'a'.repeat(80); const result = generateValidName(longName); expect(result.length).toBeLessThanOrEqual(63); - expect(result.startsWith('_1')).toBe(true); + expect(result.startsWith('mcp_1')).toBe(true); }); }); @@ -977,7 +1010,7 @@ describe('MCP Tool Naming Regression Fixes', () => { ); const qn = tool.getFullyQualifiedName(); - expect(qn).toBe('My_Server__my-tool'); + expect(qn).toBe('mcp_My_Server_my-tool'); }); it('should handle long server and tool names in qualified name', () => { @@ -1008,7 +1041,7 @@ describe('MCP Tool Naming Regression Fixes', () => { ); const qn = tool.getFullyQualifiedName(); - expect(qn).toBe('_123-server__tool'); + expect(qn).toBe('mcp_123-server_tool'); }); }); }); diff --git a/packages/core/src/tools/mcp-tool.ts b/packages/core/src/tools/mcp-tool.ts index 9d3f8d2e7c..c1fa4b5cb4 100644 --- a/packages/core/src/tools/mcp-tool.ts +++ b/packages/core/src/tools/mcp-tool.ts @@ -5,6 +5,7 @@ */ import { safeJsonStringify } from '../utils/safeJsonStringify.js'; +import { debugLogger } from '../utils/debugLogger.js'; import type { ToolCallConfirmationDetails, ToolInvocation, @@ -25,18 +26,92 @@ import type { McpContext } from './mcp-client.js'; /** * The separator used to qualify MCP tool names with their server prefix. - * e.g. "server_name__tool_name" + * e.g. "mcp_server_name_tool_name" */ -export const MCP_QUALIFIED_NAME_SEPARATOR = '__'; +export const MCP_QUALIFIED_NAME_SEPARATOR = '_'; /** - * Returns true if `name` matches the MCP qualified name format: "server__tool", - * i.e. exactly two non-empty parts separated by the MCP_QUALIFIED_NAME_SEPARATOR. + * The strict prefix that all MCP tools must start with. + */ +export const MCP_TOOL_PREFIX = 'mcp_'; + +/** + * Returns true if `name` matches the MCP qualified name format: "mcp_server_tool", + * i.e. starts with the "mcp_" prefix. */ export function isMcpToolName(name: string): boolean { - if (!name.includes(MCP_QUALIFIED_NAME_SEPARATOR)) return false; - const parts = name.split(MCP_QUALIFIED_NAME_SEPARATOR); - return parts.length === 2 && parts[0].length > 0 && parts[1].length > 0; + return name.startsWith(MCP_TOOL_PREFIX); +} + +/** + * Extracts the server name and tool name from a fully qualified MCP tool name. + * Expected format: `mcp_{server_name}_{tool_name}` + * @param name The fully qualified tool name. + * @returns An object containing the extracted `serverName` and `toolName`, or + * `undefined` properties if the name doesn't match the expected format. + */ +export function parseMcpToolName(name: string): { + serverName?: string; + toolName?: string; +} { + if (!isMcpToolName(name)) { + return {}; + } + // Remove the prefix + const withoutPrefix = name.slice(MCP_TOOL_PREFIX.length); + // The first segment is the server name, the rest is the tool name + const match = withoutPrefix.match(/^([^_]+)_(.+)$/); + if (match) { + return { + serverName: match[1], + toolName: match[2], + }; + } + return {}; +} + +/** + * Assembles a fully qualified MCP tool name (or wildcard pattern) from its server and tool components. + * + * @param serverName The backend MCP server name (can be '*' for global wildcards). + * @param toolName The name of the tool (can be undefined or '*' for tool-level wildcards). + * @returns The fully qualified name (e.g., `mcp_server_tool`, `mcp_*`, `mcp_server_*`). + */ +export function formatMcpToolName( + serverName: string, + toolName?: string, +): string { + if (serverName === '*' && !toolName) { + return `${MCP_TOOL_PREFIX}*`; + } else if (serverName === '*') { + return `${MCP_TOOL_PREFIX}*_${toolName}`; + } else if (!toolName) { + return `${MCP_TOOL_PREFIX}${serverName}_*`; + } else { + return `${MCP_TOOL_PREFIX}${serverName}_${toolName}`; + } +} + +/** + * Interface representing metadata annotations specific to an MCP tool. + * Ensures strongly-typed access to server-level properties. + */ +export interface McpToolAnnotation extends Record { + _serverName: string; +} + +/** + * Type guard to check if tool annotations implement McpToolAnnotation. + */ +export function isMcpToolAnnotation( + annotation: unknown, +): annotation is McpToolAnnotation { + return ( + typeof annotation === 'object' && + annotation !== null && + // eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion + typeof (annotation as Record)['_serverName'] === 'string' + ); } type ToolParams = Record; @@ -276,7 +351,10 @@ export class DiscoveredMCPTool extends BaseDeclarativeTool< private readonly _toolAnnotations?: Record, ) { super( - generateValidName(nameOverride ?? serverToolName), + nameOverride ?? + generateValidName( + `${serverName}${MCP_QUALIFIED_NAME_SEPARATOR}${serverToolName}`, + ), `${serverToolName} (${serverName} MCP Server)`, description, Kind.Other, @@ -304,7 +382,9 @@ export class DiscoveredMCPTool extends BaseDeclarativeTool< } getFullyQualifiedPrefix(): string { - return `${this.serverName}${MCP_QUALIFIED_NAME_SEPARATOR}`; + return generateValidName( + `${this.serverName}${MCP_QUALIFIED_NAME_SEPARATOR}`, + ); } getFullyQualifiedName(): string { @@ -495,8 +575,12 @@ const MAX_FUNCTION_NAME_LENGTH = 64; /** Visible for testing */ export function generateValidName(name: string) { - // Replace invalid characters (based on 400 error message from Gemini API) with underscores - let validToolname = name.replace(/[^a-zA-Z0-9_.:-]/g, '_'); + // Enforce the mcp_ prefix for all generated MCP tool names + let validToolname = name.startsWith('mcp_') ? name : `mcp_${name}`; + + // Replace invalid characters with underscores to conform to Gemini API: + // ^[a-zA-Z_][a-zA-Z0-9_\-.:]{0,63}$ + validToolname = validToolname.replace(/[^a-zA-Z0-9_\-.:]/g, '_'); // Ensure it starts with a letter or underscore if (/^[^a-zA-Z_]/.test(validToolname)) { @@ -507,6 +591,9 @@ export function generateValidName(name: string) { // Note: We use 63 instead of 64 to be safe, as some environments have off-by-one behaviors. const safeLimit = MAX_FUNCTION_NAME_LENGTH - 1; if (validToolname.length > safeLimit) { + debugLogger.warn( + `Truncating MCP tool name "${validToolname}" to fit within the 64 character limit. This tool may require user approval.`, + ); validToolname = validToolname.slice(0, 30) + '...' + validToolname.slice(-30); } diff --git a/packages/core/src/tools/tool-registry.test.ts b/packages/core/src/tools/tool-registry.test.ts index d44c133705..d451d72208 100644 --- a/packages/core/src/tools/tool-registry.test.ts +++ b/packages/core/src/tools/tool-registry.test.ts @@ -13,9 +13,12 @@ import { ApprovalMode } from '../policy/types.js'; import { ToolRegistry, DiscoveredTool } from './tool-registry.js'; import { DISCOVERED_TOOL_PREFIX } from './tool-names.js'; -import { DiscoveredMCPTool, MCP_QUALIFIED_NAME_SEPARATOR } from './mcp-tool.js'; -import type { FunctionDeclaration, CallableTool } from '@google/genai'; -import { mcpToTool } from '@google/genai'; +import { DiscoveredMCPTool } from './mcp-tool.js'; +import { + mcpToTool, + type FunctionDeclaration, + type CallableTool, +} from '@google/genai'; import { spawn } from 'node:child_process'; import fs from 'node:fs'; @@ -307,7 +310,7 @@ describe('ToolRegistry', () => { { name: 'should match qualified MCP tool names when qualified or unqualified', tools: [mcpTool, mcpTool.asFullyQualifiedTool()], - excludedTools: [`${mcpTool.getFullyQualifiedPrefix()}${mcpTool.name}`], + excludedTools: [mcpTool.name], }, { name: 'should match class names', @@ -395,7 +398,7 @@ describe('ToolRegistry', () => { // Assert that the returned array contains all tool names, with MCP qualified expect(toolNames).toContain('c-tool'); expect(toolNames).toContain('a-tool'); - expect(toolNames).toContain('my-server__my-tool'); + expect(toolNames).toContain('mcp_my-server_my-tool'); expect(toolNames).toHaveLength(3); }); @@ -409,7 +412,7 @@ describe('ToolRegistry', () => { toolRegistry.registerTool(mcpTool.asFullyQualifiedTool()); const toolNames = toolRegistry.getAllToolNames(); - expect(toolNames).toEqual([`${serverName}__${toolName}`]); + expect(toolNames).toEqual([`mcp_${serverName}_${toolName}`]); }); }); @@ -439,7 +442,11 @@ describe('ToolRegistry', () => { // Assert that the array has the correct tools and is sorted by name expect(toolsFromServer1).toHaveLength(3); - expect(toolNames).toEqual(['apple-tool', 'banana-tool', 'zebra-tool']); + expect(toolNames).toEqual([ + 'mcp_mcp-server-uno_apple-tool', + 'mcp_mcp-server-uno_banana-tool', + 'mcp_mcp-server-uno_zebra-tool', + ]); // Assert that all returned tools are indeed from the correct server for (const tool of toolsFromServer1) { @@ -481,8 +488,8 @@ describe('ToolRegistry', () => { 'builtin-1', 'builtin-2', DISCOVERED_TOOL_PREFIX + 'discovered-1', - 'apple-server__mcp-apple', - 'zebra-server__mcp-zebra', + 'mcp_apple-server_mcp-apple', + 'mcp_zebra-server_mcp-zebra', ]); }); }); @@ -598,25 +605,20 @@ describe('ToolRegistry', () => { }); describe('getTool', () => { - it('should retrieve an MCP tool by its fully qualified name even if registered with simple name', () => { + it('should retrieve an MCP tool by its fully qualified name', () => { const serverName = 'my-server'; const toolName = 'my-tool'; const mcpTool = createMCPTool(serverName, toolName, 'description'); - // Register tool (will be registered as 'my-tool' since no conflict) + // Register tool toolRegistry.registerTool(mcpTool); - // Verify it is available as 'my-tool' - expect(toolRegistry.getTool('my-tool')).toBeDefined(); - expect(toolRegistry.getTool('my-tool')?.name).toBe('my-tool'); - - // Verify it is available as 'my-server__my-tool' - const fullyQualifiedName = `${serverName}__${toolName}`; + // Verify it is available as 'mcp_my-server_my-tool' + const fullyQualifiedName = `mcp_${serverName}_${toolName}`; const retrievedTool = toolRegistry.getTool(fullyQualifiedName); expect(retrievedTool).toBeDefined(); - // The returned tool object is the same, so its name property is still 'my-tool' - expect(retrievedTool?.name).toBe('my-tool'); + expect(retrievedTool?.name).toBe(fullyQualifiedName); }); it('should retrieve an MCP tool by its fully qualified name when tool name has special characters', () => { @@ -626,19 +628,15 @@ describe('ToolRegistry', () => { const validToolName = 'my_tool'; const mcpTool = createMCPTool(serverName, toolName, 'description'); - // Register tool (will be registered as sanitized name) + // Register tool toolRegistry.registerTool(mcpTool); - // Verify it is available as sanitized name - expect(toolRegistry.getTool(validToolName)).toBeDefined(); - expect(toolRegistry.getTool(validToolName)?.name).toBe(validToolName); - - // Verify it is available as 'my-server__my_tool' - const fullyQualifiedName = `${serverName}__${validToolName}`; + // Verify it is available as 'mcp_my-server_my_tool' + const fullyQualifiedName = `mcp_${serverName}_${validToolName}`; const retrievedTool = toolRegistry.getTool(fullyQualifiedName); expect(retrievedTool).toBeDefined(); - expect(retrievedTool?.name).toBe(validToolName); + expect(retrievedTool?.name).toBe(fullyQualifiedName); }); it('should resolve qualified names in getFunctionDeclarationsFiltered', () => { @@ -648,13 +646,13 @@ describe('ToolRegistry', () => { toolRegistry.registerTool(mcpTool); - const fullyQualifiedName = `${serverName}${MCP_QUALIFIED_NAME_SEPARATOR}${toolName}`; + const fullyQualifiedName = `mcp_${serverName}_${toolName}`; const declarations = toolRegistry.getFunctionDeclarationsFiltered([ fullyQualifiedName, ]); expect(declarations).toHaveLength(1); - expect(declarations[0].name).toBe(toolName); + expect(declarations[0].name).toBe(fullyQualifiedName); }); it('should retrieve a tool using its legacy alias', async () => { @@ -685,7 +683,7 @@ describe('ToolRegistry', () => { const declarations = toolRegistry.getFunctionDeclarations(); expect(declarations).toHaveLength(1); - expect(declarations[0].name).toBe(`${serverName}__${toolName}`); + expect(declarations[0].name).toBe(`mcp_${serverName}_${toolName}`); }); it('should deduplicate MCP tools in declarations', () => { @@ -699,7 +697,7 @@ describe('ToolRegistry', () => { const declarations = toolRegistry.getFunctionDeclarations(); expect(declarations).toHaveLength(1); - expect(declarations[0].name).toBe(`${serverName}__${toolName}`); + expect(declarations[0].name).toBe(`mcp_${serverName}_${toolName}`); }); }); @@ -752,7 +750,7 @@ describe('ToolRegistry', () => { const allTools = toolRegistry.getAllTools(); const toolNames = allTools.map((t) => t.name); - expect(toolNames).toContain('read-only-tool'); + expect(toolNames).toContain('mcp_test-server_read-only-tool'); }); it('should exclude non-read-only MCP tools when denied by policy in plan mode', () => { @@ -769,7 +767,7 @@ describe('ToolRegistry', () => { const allTools = toolRegistry.getAllTools(); const toolNames = allTools.map((t) => t.name); - expect(toolNames).not.toContain('write-mcp-tool'); + expect(toolNames).not.toContain('mcp_test-server_write-mcp-tool'); }); }); diff --git a/packages/core/src/tools/tool-registry.ts b/packages/core/src/tools/tool-registry.ts index e7fd7a6a66..9caafe4a1d 100644 --- a/packages/core/src/tools/tool-registry.ts +++ b/packages/core/src/tools/tool-registry.ts @@ -443,13 +443,15 @@ export class ToolRegistry { private buildToolMetadata(): Map> { const toolMetadata = new Map>(); for (const [name, tool] of this.allKnownTools) { - if (tool.toolAnnotations) { - const metadata: Record = { ...tool.toolAnnotations }; - // Include server name so the policy engine can resolve composite - // wildcard patterns (e.g. "*__*") against unqualified tool names. - if (tool instanceof DiscoveredMCPTool) { - metadata['_serverName'] = tool.serverName; - } + const metadata: Record = tool.toolAnnotations + ? { ...tool.toolAnnotations } + : {}; + // Include server name so the policy engine can resolve composite + // wildcard patterns (e.g. "*__*") against unqualified tool names. + if (tool instanceof DiscoveredMCPTool) { + metadata['_serverName'] = tool.serverName; + } + if (Object.keys(metadata).length > 0) { toolMetadata.set(name, metadata); } }