diff --git a/packages/core/src/policy/config.ts b/packages/core/src/policy/config.ts index f09db53b70..a1e337436e 100644 --- a/packages/core/src/policy/config.ts +++ b/packages/core/src/policy/config.ts @@ -144,7 +144,8 @@ export function getPolicyTier( */ export function formatPolicyError(error: PolicyFileError): string { const tierLabel = error.tier.toUpperCase(); - let message = `[${tierLabel}] Policy file error in ${error.fileName}:\n`; + const severityLabel = error.severity === 'warning' ? 'warning' : 'error'; + let message = `[${tierLabel}] Policy file ${severityLabel} in ${error.fileName}:\n`; message += ` ${error.message}`; if (error.details) { message += `\n${error.details}`; @@ -293,7 +294,10 @@ export async function createPolicyEngineConfig( // coreEvents has a buffer that will display these once the UI is ready if (errors.length > 0) { for (const error of errors) { - coreEvents.emitFeedback('error', formatPolicyError(error)); + coreEvents.emitFeedback( + error.severity ?? 'error', + formatPolicyError(error), + ); } } diff --git a/packages/core/src/policy/toml-loader.test.ts b/packages/core/src/policy/toml-loader.test.ts index 54a81771b8..30236d80c2 100644 --- a/packages/core/src/policy/toml-loader.test.ts +++ b/packages/core/src/policy/toml-loader.test.ts @@ -14,13 +14,26 @@ import * as fs from 'node:fs/promises'; import * as path from 'node:path'; import * as os from 'node:os'; import { fileURLToPath } from 'node:url'; -import { loadPoliciesFromToml } from './toml-loader.js'; +import { + loadPoliciesFromToml, + validateMcpPolicyToolNames, +} from './toml-loader.js'; import type { PolicyLoadResult } from './toml-loader.js'; import { PolicyEngine } from './policy-engine.js'; const __filename = fileURLToPath(import.meta.url); const __dirname = path.dirname(__filename); +/** Returns only errors (severity !== 'warning') from a PolicyLoadResult. */ +function getErrors(result: PolicyLoadResult): PolicyLoadResult['errors'] { + return result.errors.filter((e) => e.severity !== 'warning'); +} + +/** Returns only warnings (severity === 'warning') from a PolicyLoadResult. */ +function getWarnings(result: PolicyLoadResult): PolicyLoadResult['errors'] { + return result.errors.filter((e) => e.severity === 'warning'); +} + describe('policy-toml-loader', () => { let tempDir: string; @@ -189,7 +202,7 @@ priority = 100 'grep', 'read', ]); - expect(result.errors).toHaveLength(0); + expect(getErrors(result)).toHaveLength(0); }); it('should transform mcpName to composite toolName', async () => { @@ -228,7 +241,7 @@ modes = ["yolo"] expect(result.rules[0].modes).toEqual(['default', 'yolo']); expect(result.rules[1].toolName).toBe('grep'); expect(result.rules[1].modes).toEqual(['yolo']); - expect(result.errors).toHaveLength(0); + expect(getErrors(result)).toHaveLength(0); }); it('should parse and transform allow_redirection property', async () => { @@ -259,7 +272,7 @@ deny_message = "Deletion is permanent" expect(result.rules[0].toolName).toBe('rm'); expect(result.rules[0].decision).toBe(PolicyDecision.DENY); expect(result.rules[0].denyMessage).toBe('Deletion is permanent'); - expect(result.errors).toHaveLength(0); + expect(getErrors(result)).toHaveLength(0); }); it('should support modes property for Tier 4 and Tier 5 policies', async () => { @@ -547,8 +560,8 @@ commandRegex = ".*" decision = "allow" priority = 100 `); - expect(result.errors).toHaveLength(1); - const error = result.errors[0]; + expect(getErrors(result)).toHaveLength(1); + const error = getErrors(result)[0]; expect(error.errorType).toBe('rule_validation'); expect(error.details).toContain('run_shell_command'); }); @@ -576,8 +589,8 @@ argsPattern = "([a-z)" decision = "allow" priority = 100 `); - expect(result.errors).toHaveLength(1); - const error = result.errors[0]; + expect(getErrors(result)).toHaveLength(1); + const error = getErrors(result)[0]; expect(error.errorType).toBe('regex_compilation'); expect(error.message).toBe('Invalid regex pattern'); }); @@ -592,7 +605,7 @@ priority = 100 const getPolicyTier = (_dir: string) => 1; const result = await loadPoliciesFromToml([filePath], getPolicyTier); - expect(result.errors).toHaveLength(0); + expect(getErrors(result)).toHaveLength(0); expect(result.rules).toHaveLength(1); expect(result.rules[0].toolName).toBe('test-tool'); expect(result.rules[0].decision).toBe(PolicyDecision.ALLOW); @@ -612,6 +625,177 @@ priority = 100 }); }); + describe('Tool name validation', () => { + it('should warn for unrecognized tool names with suggestions', async () => { + const result = await runLoadPoliciesFromToml(` +[[rule]] +toolName = "grob" +decision = "allow" +priority = 100 +`); + + const warnings = getWarnings(result); + expect(warnings).toHaveLength(1); + expect(warnings[0].errorType).toBe('tool_name_warning'); + expect(warnings[0].severity).toBe('warning'); + expect(warnings[0].details).toContain('Unrecognized tool name "grob"'); + expect(warnings[0].details).toContain('glob'); + // Rules should still load despite warnings + expect(result.rules).toHaveLength(1); + expect(result.rules[0].toolName).toBe('grob'); + }); + + it('should not warn for valid built-in tool names', async () => { + const result = await runLoadPoliciesFromToml(` +[[rule]] +toolName = "glob" +decision = "allow" +priority = 100 + +[[rule]] +toolName = "read_file" +decision = "allow" +priority = 100 +`); + + expect(getWarnings(result)).toHaveLength(0); + expect(getErrors(result)).toHaveLength(0); + expect(result.rules).toHaveLength(2); + }); + + it('should not warn for wildcard "*"', async () => { + const result = await runLoadPoliciesFromToml(` +[[rule]] +toolName = "*" +decision = "allow" +priority = 100 +`); + + expect(getWarnings(result)).toHaveLength(0); + expect(getErrors(result)).toHaveLength(0); + }); + + it('should not warn for MCP format tool names', async () => { + const result = await runLoadPoliciesFromToml(` +[[rule]] +toolName = "my-server__my-tool" +decision = "allow" +priority = 100 + +[[rule]] +toolName = "my-server__*" +decision = "allow" +priority = 100 +`); + + expect(getWarnings(result)).toHaveLength(0); + expect(getErrors(result)).toHaveLength(0); + }); + + it('should not warn when mcpName is present (skips validation)', async () => { + const result = await runLoadPoliciesFromToml(` +[[rule]] +mcpName = "my-server" +toolName = "nonexistent" +decision = "allow" +priority = 100 +`); + + expect(getWarnings(result)).toHaveLength(0); + expect(getErrors(result)).toHaveLength(0); + }); + + it('should not warn for legacy aliases', async () => { + const result = await runLoadPoliciesFromToml(` +[[rule]] +toolName = "search_file_content" +decision = "allow" +priority = 100 +`); + + expect(getWarnings(result)).toHaveLength(0); + expect(getErrors(result)).toHaveLength(0); + }); + + it('should not warn for discovered tool prefix', async () => { + const result = await runLoadPoliciesFromToml(` +[[rule]] +toolName = "discovered_tool_my_custom_tool" +decision = "allow" +priority = 100 +`); + + expect(getWarnings(result)).toHaveLength(0); + expect(getErrors(result)).toHaveLength(0); + }); + + it('should warn for each invalid name in a toolName array', async () => { + const result = await runLoadPoliciesFromToml(` +[[rule]] +toolName = ["grob", "glob", "replce"] +decision = "allow" +priority = 100 +`); + + const warnings = getWarnings(result); + expect(warnings).toHaveLength(2); + expect(warnings[0].details).toContain('"grob"'); + expect(warnings[1].details).toContain('"replce"'); + // All rules still load + expect(result.rules).toHaveLength(3); + }); + + it('should not warn for names far from any built-in (dynamic/agent tools)', async () => { + const result = await runLoadPoliciesFromToml(` +[[rule]] +toolName = "delegate_to_agent" +decision = "allow" +priority = 100 + +[[rule]] +toolName = "my_custom_tool" +decision = "allow" +priority = 100 +`); + + expect(getWarnings(result)).toHaveLength(0); + expect(getErrors(result)).toHaveLength(0); + expect(result.rules).toHaveLength(2); + }); + + it('should not warn for catch-all rules (no toolName)', async () => { + const result = await runLoadPoliciesFromToml(` +[[rule]] +decision = "deny" +priority = 100 +`); + + expect(getWarnings(result)).toHaveLength(0); + expect(getErrors(result)).toHaveLength(0); + expect(result.rules).toHaveLength(1); + }); + + it('should still load rules even with warnings', async () => { + const result = await runLoadPoliciesFromToml(` +[[rule]] +toolName = "wrte_file" +decision = "deny" +priority = 50 + +[[rule]] +toolName = "glob" +decision = "allow" +priority = 100 +`); + + expect(getWarnings(result)).toHaveLength(1); + expect(getErrors(result)).toHaveLength(0); + expect(result.rules).toHaveLength(2); + expect(result.rules[0].toolName).toBe('wrte_file'); + expect(result.rules[1].toolName).toBe('glob'); + }); + }); + describe('Built-in Plan Mode Policy', () => { it('should allow MCP tools with readOnlyHint annotation in Plan Mode (ASK_USER, not DENY)', async () => { const planTomlPath = path.resolve(__dirname, 'policies', 'plan.toml'); @@ -779,4 +963,88 @@ priority = 100 } }); }); + + describe('validateMcpPolicyToolNames', () => { + it('should warn for MCP tool names that are likely typos', () => { + const warnings = validateMcpPolicyToolNames( + 'google-workspace', + ['people.getMe', 'calendar.list', 'calendar.get'], + [ + { + toolName: 'google-workspace__people.getxMe', + source: 'User: workspace.toml', + }, + ], + ); + + expect(warnings).toHaveLength(1); + expect(warnings[0]).toContain('people.getxMe'); + expect(warnings[0]).toContain('google-workspace'); + expect(warnings[0]).toContain('people.getMe'); + }); + + it('should not warn for matching MCP tool names', () => { + const warnings = validateMcpPolicyToolNames( + 'google-workspace', + ['people.getMe', 'calendar.list'], + [ + { toolName: 'google-workspace__people.getMe' }, + { toolName: 'google-workspace__calendar.list' }, + ], + ); + + expect(warnings).toHaveLength(0); + }); + + it('should not warn for wildcard MCP rules', () => { + const warnings = validateMcpPolicyToolNames( + 'my-server', + ['tool1', 'tool2'], + [{ toolName: 'my-server__*' }], + ); + + expect(warnings).toHaveLength(0); + }); + + it('should not warn for rules targeting other servers', () => { + const warnings = validateMcpPolicyToolNames( + 'server-a', + ['tool1'], + [{ toolName: 'server-b__toolx' }], + ); + + expect(warnings).toHaveLength(0); + }); + + it('should not warn for tool names far from any discovered tool', () => { + const warnings = validateMcpPolicyToolNames( + 'my-server', + ['tool1', 'tool2'], + [{ toolName: 'my-server__completely_different_name' }], + ); + + expect(warnings).toHaveLength(0); + }); + + it('should skip rules without toolName', () => { + const warnings = validateMcpPolicyToolNames( + 'my-server', + ['tool1'], + [{ toolName: undefined }], + ); + + expect(warnings).toHaveLength(0); + }); + + it('should include source in warning when available', () => { + const warnings = validateMcpPolicyToolNames( + 'my-server', + ['tool1'], + [{ toolName: 'my-server__tol1', source: 'User: custom.toml' }], + ); + + expect(warnings).toHaveLength(1); + expect(warnings[0]).toContain('User: custom.toml'); + }); + }); }); diff --git a/packages/core/src/policy/toml-loader.ts b/packages/core/src/policy/toml-loader.ts index df4bd3ca9e..d2a24aa100 100644 --- a/packages/core/src/policy/toml-loader.ts +++ b/packages/core/src/policy/toml-loader.ts @@ -13,12 +13,25 @@ import { InProcessCheckerType, } from './types.js'; import { buildArgsPatterns, isSafeRegExp } from './utils.js'; +import { + isValidToolName, + ALL_BUILTIN_TOOL_NAMES, +} from '../tools/tool-names.js'; +import { getToolSuggestion } from '../utils/tool-utils.js'; +import levenshtein from 'fast-levenshtein'; import fs from 'node:fs/promises'; import path from 'node:path'; import toml from '@iarna/toml'; import { z, type ZodError } from 'zod'; import { isNodeError } from '../utils/errors.js'; +/** + * Maximum Levenshtein distance to consider a name a likely typo of a built-in tool. + * Names further from all built-in tools are assumed to be intentional + * (e.g., dynamically registered agent tools) and are not warned about. + */ +const MAX_TYPO_DISTANCE = 3; + /** * Schema for a single policy rule in the TOML file (before transformation). */ @@ -100,7 +113,8 @@ export type PolicyFileErrorType = | 'toml_parse' | 'schema_validation' | 'rule_validation' - | 'regex_compilation'; + | 'regex_compilation' + | 'tool_name_warning'; /** * Detailed error information for policy file loading failures. @@ -114,6 +128,7 @@ export interface PolicyFileError { message: string; details?: string; suggestion?: string; + severity?: 'error' | 'warning'; } /** @@ -241,6 +256,36 @@ function validateShellCommandSyntax( return null; } +/** + * Validates that a tool name is recognized. + * Returns a warning message if the tool name is a likely typo of a built-in + * tool name, or null if valid or not close to any built-in name. + */ +function validateToolName(name: string, ruleIndex: number): string | null { + // A name that looks like an MCP tool (e.g., "re__ad") could be a typo of a + // built-in tool ("read_file"). We should let such names fall through to the + // Levenshtein distance check below. Non-MCP-like names that are valid can + // be safely skipped. + if (isValidToolName(name, { allowWildcards: true }) && !name.includes('__')) { + return null; + } + + // Only warn if the name is close to a built-in name (likely typo). + // Names that are very different from all built-in names are likely + // intentional (dynamic tools, agent tools, etc.). + const allNames = [...ALL_BUILTIN_TOOL_NAMES]; + const minDistance = Math.min( + ...allNames.map((n) => levenshtein.get(name, n)), + ); + + if (minDistance > MAX_TYPO_DISTANCE) { + return null; + } + + const suggestion = getToolSuggestion(name, allNames); + return `Rule #${ruleIndex + 1}: Unrecognized tool name "${name}".${suggestion}`; +} + /** * Transforms a priority number based on the policy tier. * Formula: tier + priority/1000 @@ -354,6 +399,35 @@ export async function loadPoliciesFromToml( } } + // Validate tool names in rules + for (let i = 0; i < tomlRules.length; i++) { + const rule = tomlRules[i]; + // Skip MCP-scoped rules — MCP tool names are server-defined and dynamic + if (rule.mcpName) continue; + + const toolNames: string[] = rule.toolName + ? Array.isArray(rule.toolName) + ? rule.toolName + : [rule.toolName] + : []; + + for (const name of toolNames) { + const warning = validateToolName(name, i); + if (warning) { + errors.push({ + filePath, + fileName: file, + tier: tierName, + ruleIndex: i, + errorType: 'tool_name_warning', + message: 'Unrecognized tool name', + details: warning, + severity: 'warning', + }); + } + } + } + // Transform rules const parsedRules: PolicyRule[] = (validationResult.data.rule ?? []) .flatMap((rule) => { @@ -439,6 +513,35 @@ export async function loadPoliciesFromToml( rules.push(...parsedRules); + // Validate tool names in safety checker rules + const tomlCheckerRules = validationResult.data.safety_checker ?? []; + for (let i = 0; i < tomlCheckerRules.length; i++) { + const checker = tomlCheckerRules[i]; + if (checker.mcpName) continue; + + const checkerToolNames: string[] = checker.toolName + ? Array.isArray(checker.toolName) + ? checker.toolName + : [checker.toolName] + : []; + + for (const name of checkerToolNames) { + const warning = validateToolName(name, i); + if (warning) { + errors.push({ + filePath, + fileName: file, + tier: tierName, + ruleIndex: i, + errorType: 'tool_name_warning', + message: 'Unrecognized tool name in safety checker', + details: warning, + severity: 'warning', + }); + } + } + } + // Transform checkers const parsedCheckers: SafetyCheckerRule[] = ( validationResult.data.safety_checker ?? [] @@ -535,3 +638,55 @@ export async function loadPoliciesFromToml( return { rules, checkers, errors }; } + +/** + * Validates MCP tool names in policy rules against actually discovered MCP tools. + * Called after an MCP server connects and its tools are discovered. + * + * For each policy rule that references the given MCP server, checks if the + * tool name matches any discovered tool. Emits warnings for likely typos + * using Levenshtein distance. + * + * @param serverName The MCP server name (e.g., "google-workspace") + * @param discoveredToolNames The tool names discovered from this server (simple names, not fully qualified) + * @param policyRules The current set of policy rules to validate against + * @returns Array of warning messages for unrecognized MCP tool names + */ +export function validateMcpPolicyToolNames( + serverName: string, + discoveredToolNames: string[], + policyRules: ReadonlyArray<{ toolName?: string; source?: string }>, +): string[] { + const prefix = `${serverName}__`; + const warnings: string[] = []; + + for (const rule of policyRules) { + if (!rule.toolName) continue; + if (!rule.toolName.startsWith(prefix)) continue; + + const toolPart = rule.toolName.slice(prefix.length); + + // Skip wildcards + if (toolPart === '*') continue; + + // Check if the tool exists + if (discoveredToolNames.includes(toolPart)) continue; + + // Tool not found — check if it's a likely typo + if (discoveredToolNames.length === 0) continue; + + const minDistance = Math.min( + ...discoveredToolNames.map((n) => levenshtein.get(toolPart, n)), + ); + + if (minDistance > MAX_TYPO_DISTANCE) continue; + + const suggestion = getToolSuggestion(toolPart, discoveredToolNames); + const source = rule.source ? ` (from ${rule.source})` : ''; + warnings.push( + `Unrecognized MCP tool "${toolPart}" for server "${serverName}"${source}.${suggestion}`, + ); + } + + return warnings; +} diff --git a/packages/core/src/tools/mcp-client.ts b/packages/core/src/tools/mcp-client.ts index 18c2029d9e..24f93052bf 100644 --- a/packages/core/src/tools/mcp-client.ts +++ b/packages/core/src/tools/mcp-client.ts @@ -69,6 +69,7 @@ import { debugLogger } from '../utils/debugLogger.js'; import { type MessageBus } from '../confirmation-bus/message-bus.js'; import { coreEvents } from '../utils/events.js'; import type { ResourceRegistry } from '../resources/resource-registry.js'; +import { validateMcpPolicyToolNames } from '../policy/toml-loader.js'; import { sanitizeEnvironment, type EnvironmentSanitizationConfig, @@ -221,6 +222,23 @@ export class McpClient implements McpProgressReporter { this.toolRegistry.registerTool(tool); } this.toolRegistry.sortTools(); + + // Validate MCP tool names in policy rules against discovered tools + try { + const discoveredToolNames = tools.map((t) => t.serverToolName); + const policyRules = cliConfig.getPolicyEngine?.()?.getRules() ?? []; + const warnings = validateMcpPolicyToolNames( + this.serverName, + discoveredToolNames, + policyRules, + ); + for (const warning of warnings) { + coreEvents.emitFeedback('warning', warning); + } + } catch { + // Policy engine may not be available in all contexts (e.g. tests). + // Validation is best-effort; skip silently if unavailable. + } } /** @@ -1577,6 +1595,9 @@ export interface McpContext { ): void; setUserInteractedWithMcp?(): void; isTrustedFolder(): boolean; + getPolicyEngine?(): { + getRules(): ReadonlyArray<{ toolName?: string; source?: string }>; + }; } /**