feat(core): add tool name validation in TOML policy files (#19281)

Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
2026-06-14 13:27:38 -07:00 · 2026-03-02 13:47:21 -08:00
parent dd9ccc9807
commit bb6d1a2775
4 changed files with 460 additions and 12 deletions
@@ -144,7 +144,8 @@ export function getPolicyTier(
 */
 export function formatPolicyError(error: PolicyFileError): string {
  const tierLabel = error.tier.toUpperCase();
-  let message = `[${tierLabel}] Policy file error in ${error.fileName}:\n`;
+  const severityLabel = error.severity === 'warning' ? 'warning' : 'error';
+  let message = `[${tierLabel}] Policy file ${severityLabel} in ${error.fileName}:\n`;
  message += `  ${error.message}`;
  if (error.details) {
    message += `\n${error.details}`;
@@ -293,7 +294,10 @@ export async function createPolicyEngineConfig(
  // coreEvents has a buffer that will display these once the UI is ready
  if (errors.length > 0) {
    for (const error of errors) {
-      coreEvents.emitFeedback('error', formatPolicyError(error));
+      coreEvents.emitFeedback(
+        error.severity ?? 'error',
+        formatPolicyError(error),
+      );
    }
  }

@@ -14,13 +14,26 @@ import * as fs from 'node:fs/promises';
 import * as path from 'node:path';
 import * as os from 'node:os';
 import { fileURLToPath } from 'node:url';
-import { loadPoliciesFromToml } from './toml-loader.js';
+import {
+  loadPoliciesFromToml,
+  validateMcpPolicyToolNames,
+} from './toml-loader.js';
 import type { PolicyLoadResult } from './toml-loader.js';
 import { PolicyEngine } from './policy-engine.js';

 const __filename = fileURLToPath(import.meta.url);
 const __dirname = path.dirname(__filename);

+/** Returns only errors (severity !== 'warning') from a PolicyLoadResult. */
+function getErrors(result: PolicyLoadResult): PolicyLoadResult['errors'] {
+  return result.errors.filter((e) => e.severity !== 'warning');
+}
+
+/** Returns only warnings (severity === 'warning') from a PolicyLoadResult. */
+function getWarnings(result: PolicyLoadResult): PolicyLoadResult['errors'] {
+  return result.errors.filter((e) => e.severity === 'warning');
+}
+
 describe('policy-toml-loader', () => {
  let tempDir: string;

@@ -189,7 +202,7 @@ priority = 100
        'grep',
        'read',
      ]);
-      expect(result.errors).toHaveLength(0);
+      expect(getErrors(result)).toHaveLength(0);
    });

    it('should transform mcpName to composite toolName', async () => {
@@ -228,7 +241,7 @@ modes = ["yolo"]
      expect(result.rules[0].modes).toEqual(['default', 'yolo']);
      expect(result.rules[1].toolName).toBe('grep');
      expect(result.rules[1].modes).toEqual(['yolo']);
-      expect(result.errors).toHaveLength(0);
+      expect(getErrors(result)).toHaveLength(0);
    });

    it('should parse and transform allow_redirection property', async () => {
@@ -259,7 +272,7 @@ deny_message = "Deletion is permanent"
      expect(result.rules[0].toolName).toBe('rm');
      expect(result.rules[0].decision).toBe(PolicyDecision.DENY);
      expect(result.rules[0].denyMessage).toBe('Deletion is permanent');
-      expect(result.errors).toHaveLength(0);
+      expect(getErrors(result)).toHaveLength(0);
    });

    it('should support modes property for Tier 4 and Tier 5 policies', async () => {
@@ -547,8 +560,8 @@ commandRegex = ".*"
 decision = "allow"
 priority = 100
 `);
-      expect(result.errors).toHaveLength(1);
-      const error = result.errors[0];
+      expect(getErrors(result)).toHaveLength(1);
+      const error = getErrors(result)[0];
      expect(error.errorType).toBe('rule_validation');
      expect(error.details).toContain('run_shell_command');
    });
@@ -576,8 +589,8 @@ argsPattern = "([a-z)"
 decision = "allow"
 priority = 100
 `);
-      expect(result.errors).toHaveLength(1);
-      const error = result.errors[0];
+      expect(getErrors(result)).toHaveLength(1);
+      const error = getErrors(result)[0];
      expect(error.errorType).toBe('regex_compilation');
      expect(error.message).toBe('Invalid regex pattern');
    });
@@ -592,7 +605,7 @@ priority = 100
      const getPolicyTier = (_dir: string) => 1;
      const result = await loadPoliciesFromToml([filePath], getPolicyTier);

-      expect(result.errors).toHaveLength(0);
+      expect(getErrors(result)).toHaveLength(0);
      expect(result.rules).toHaveLength(1);
      expect(result.rules[0].toolName).toBe('test-tool');
      expect(result.rules[0].decision).toBe(PolicyDecision.ALLOW);
@@ -612,6 +625,177 @@ priority = 100
    });
  });

+  describe('Tool name validation', () => {
+    it('should warn for unrecognized tool names with suggestions', async () => {
+      const result = await runLoadPoliciesFromToml(`
+[[rule]]
+toolName = "grob"
+decision = "allow"
+priority = 100
+`);
+
+      const warnings = getWarnings(result);
+      expect(warnings).toHaveLength(1);
+      expect(warnings[0].errorType).toBe('tool_name_warning');
+      expect(warnings[0].severity).toBe('warning');
+      expect(warnings[0].details).toContain('Unrecognized tool name "grob"');
+      expect(warnings[0].details).toContain('glob');
+      // Rules should still load despite warnings
+      expect(result.rules).toHaveLength(1);
+      expect(result.rules[0].toolName).toBe('grob');
+    });
+
+    it('should not warn for valid built-in tool names', async () => {
+      const result = await runLoadPoliciesFromToml(`
+[[rule]]
+toolName = "glob"
+decision = "allow"
+priority = 100
+
+[[rule]]
+toolName = "read_file"
+decision = "allow"
+priority = 100
+`);
+
+      expect(getWarnings(result)).toHaveLength(0);
+      expect(getErrors(result)).toHaveLength(0);
+      expect(result.rules).toHaveLength(2);
+    });
+
+    it('should not warn for wildcard "*"', async () => {
+      const result = await runLoadPoliciesFromToml(`
+[[rule]]
+toolName = "*"
+decision = "allow"
+priority = 100
+`);
+
+      expect(getWarnings(result)).toHaveLength(0);
+      expect(getErrors(result)).toHaveLength(0);
+    });
+
+    it('should not warn for MCP format tool names', async () => {
+      const result = await runLoadPoliciesFromToml(`
+[[rule]]
+toolName = "my-server__my-tool"
+decision = "allow"
+priority = 100
+
+[[rule]]
+toolName = "my-server__*"
+decision = "allow"
+priority = 100
+`);
+
+      expect(getWarnings(result)).toHaveLength(0);
+      expect(getErrors(result)).toHaveLength(0);
+    });
+
+    it('should not warn when mcpName is present (skips validation)', async () => {
+      const result = await runLoadPoliciesFromToml(`
+[[rule]]
+mcpName = "my-server"
+toolName = "nonexistent"
+decision = "allow"
+priority = 100
+`);
+
+      expect(getWarnings(result)).toHaveLength(0);
+      expect(getErrors(result)).toHaveLength(0);
+    });
+
+    it('should not warn for legacy aliases', async () => {
+      const result = await runLoadPoliciesFromToml(`
+[[rule]]
+toolName = "search_file_content"
+decision = "allow"
+priority = 100
+`);
+
+      expect(getWarnings(result)).toHaveLength(0);
+      expect(getErrors(result)).toHaveLength(0);
+    });
+
+    it('should not warn for discovered tool prefix', async () => {
+      const result = await runLoadPoliciesFromToml(`
+[[rule]]
+toolName = "discovered_tool_my_custom_tool"
+decision = "allow"
+priority = 100
+`);
+
+      expect(getWarnings(result)).toHaveLength(0);
+      expect(getErrors(result)).toHaveLength(0);
+    });
+
+    it('should warn for each invalid name in a toolName array', async () => {
+      const result = await runLoadPoliciesFromToml(`
+[[rule]]
+toolName = ["grob", "glob", "replce"]
+decision = "allow"
+priority = 100
+`);
+
+      const warnings = getWarnings(result);
+      expect(warnings).toHaveLength(2);
+      expect(warnings[0].details).toContain('"grob"');
+      expect(warnings[1].details).toContain('"replce"');
+      // All rules still load
+      expect(result.rules).toHaveLength(3);
+    });
+
+    it('should not warn for names far from any built-in (dynamic/agent tools)', async () => {
+      const result = await runLoadPoliciesFromToml(`
+[[rule]]
+toolName = "delegate_to_agent"
+decision = "allow"
+priority = 100
+
+[[rule]]
+toolName = "my_custom_tool"
+decision = "allow"
+priority = 100
+`);
+
+      expect(getWarnings(result)).toHaveLength(0);
+      expect(getErrors(result)).toHaveLength(0);
+      expect(result.rules).toHaveLength(2);
+    });
+
+    it('should not warn for catch-all rules (no toolName)', async () => {
+      const result = await runLoadPoliciesFromToml(`
+[[rule]]
+decision = "deny"
+priority = 100
+`);
+
+      expect(getWarnings(result)).toHaveLength(0);
+      expect(getErrors(result)).toHaveLength(0);
+      expect(result.rules).toHaveLength(1);
+    });
+
+    it('should still load rules even with warnings', async () => {
+      const result = await runLoadPoliciesFromToml(`
+[[rule]]
+toolName = "wrte_file"
+decision = "deny"
+priority = 50
+
+[[rule]]
+toolName = "glob"
+decision = "allow"
+priority = 100
+`);
+
+      expect(getWarnings(result)).toHaveLength(1);
+      expect(getErrors(result)).toHaveLength(0);
+      expect(result.rules).toHaveLength(2);
+      expect(result.rules[0].toolName).toBe('wrte_file');
+      expect(result.rules[1].toolName).toBe('glob');
+    });
+  });
+
  describe('Built-in Plan Mode Policy', () => {
    it('should allow MCP tools with readOnlyHint annotation in Plan Mode (ASK_USER, not DENY)', async () => {
      const planTomlPath = path.resolve(__dirname, 'policies', 'plan.toml');
@@ -779,4 +963,88 @@ priority = 100
      }
    });
  });
+
+  describe('validateMcpPolicyToolNames', () => {
+    it('should warn for MCP tool names that are likely typos', () => {
+      const warnings = validateMcpPolicyToolNames(
+        'google-workspace',
+        ['people.getMe', 'calendar.list', 'calendar.get'],
+        [
+          {
+            toolName: 'google-workspace__people.getxMe',
+            source: 'User: workspace.toml',
+          },
+        ],
+      );
+
+      expect(warnings).toHaveLength(1);
+      expect(warnings[0]).toContain('people.getxMe');
+      expect(warnings[0]).toContain('google-workspace');
+      expect(warnings[0]).toContain('people.getMe');
+    });
+
+    it('should not warn for matching MCP tool names', () => {
+      const warnings = validateMcpPolicyToolNames(
+        'google-workspace',
+        ['people.getMe', 'calendar.list'],
+        [
+          { toolName: 'google-workspace__people.getMe' },
+          { toolName: 'google-workspace__calendar.list' },
+        ],
+      );
+
+      expect(warnings).toHaveLength(0);
+    });
+
+    it('should not warn for wildcard MCP rules', () => {
+      const warnings = validateMcpPolicyToolNames(
+        'my-server',
+        ['tool1', 'tool2'],
+        [{ toolName: 'my-server__*' }],
+      );
+
+      expect(warnings).toHaveLength(0);
+    });
+
+    it('should not warn for rules targeting other servers', () => {
+      const warnings = validateMcpPolicyToolNames(
+        'server-a',
+        ['tool1'],
+        [{ toolName: 'server-b__toolx' }],
+      );
+
+      expect(warnings).toHaveLength(0);
+    });
+
+    it('should not warn for tool names far from any discovered tool', () => {
+      const warnings = validateMcpPolicyToolNames(
+        'my-server',
+        ['tool1', 'tool2'],
+        [{ toolName: 'my-server__completely_different_name' }],
+      );
+
+      expect(warnings).toHaveLength(0);
+    });
+
+    it('should skip rules without toolName', () => {
+      const warnings = validateMcpPolicyToolNames(
+        'my-server',
+        ['tool1'],
+        [{ toolName: undefined }],
+      );
+
+      expect(warnings).toHaveLength(0);
+    });
+
+    it('should include source in warning when available', () => {
+      const warnings = validateMcpPolicyToolNames(
+        'my-server',
+        ['tool1'],
+        [{ toolName: 'my-server__tol1', source: 'User: custom.toml' }],
+      );
+
+      expect(warnings).toHaveLength(1);
+      expect(warnings[0]).toContain('User: custom.toml');
+    });
+  });
 });
@@ -13,12 +13,25 @@ import {
  InProcessCheckerType,
 } from './types.js';
 import { buildArgsPatterns, isSafeRegExp } from './utils.js';
+import {
+  isValidToolName,
+  ALL_BUILTIN_TOOL_NAMES,
+} from '../tools/tool-names.js';
+import { getToolSuggestion } from '../utils/tool-utils.js';
+import levenshtein from 'fast-levenshtein';
 import fs from 'node:fs/promises';
 import path from 'node:path';
 import toml from '@iarna/toml';
 import { z, type ZodError } from 'zod';
 import { isNodeError } from '../utils/errors.js';

+/**
+ * Maximum Levenshtein distance to consider a name a likely typo of a built-in tool.
+ * Names further from all built-in tools are assumed to be intentional
+ * (e.g., dynamically registered agent tools) and are not warned about.
+ */
+const MAX_TYPO_DISTANCE = 3;
+
 /**
 * Schema for a single policy rule in the TOML file (before transformation).
 */
@@ -100,7 +113,8 @@ export type PolicyFileErrorType =
  | 'toml_parse'
  | 'schema_validation'
  | 'rule_validation'
-  | 'regex_compilation';
+  | 'regex_compilation'
+  | 'tool_name_warning';

 /**
 * Detailed error information for policy file loading failures.
@@ -114,6 +128,7 @@ export interface PolicyFileError {
  message: string;
  details?: string;
  suggestion?: string;
+  severity?: 'error' | 'warning';
 }

 /**
@@ -241,6 +256,36 @@ function validateShellCommandSyntax(
  return null;
 }

+/**
+ * Validates that a tool name is recognized.
+ * Returns a warning message if the tool name is a likely typo of a built-in
+ * tool name, or null if valid or not close to any built-in name.
+ */
+function validateToolName(name: string, ruleIndex: number): string | null {
+  // A name that looks like an MCP tool (e.g., "re__ad") could be a typo of a
+  // built-in tool ("read_file"). We should let such names fall through to the
+  // Levenshtein distance check below. Non-MCP-like names that are valid can
+  // be safely skipped.
+  if (isValidToolName(name, { allowWildcards: true }) && !name.includes('__')) {
+    return null;
+  }
+
+  // Only warn if the name is close to a built-in name (likely typo).
+  // Names that are very different from all built-in names are likely
+  // intentional (dynamic tools, agent tools, etc.).
+  const allNames = [...ALL_BUILTIN_TOOL_NAMES];
+  const minDistance = Math.min(
+    ...allNames.map((n) => levenshtein.get(name, n)),
+  );
+
+  if (minDistance > MAX_TYPO_DISTANCE) {
+    return null;
+  }
+
+  const suggestion = getToolSuggestion(name, allNames);
+  return `Rule #${ruleIndex + 1}: Unrecognized tool name "${name}".${suggestion}`;
+}
+
 /**
 * Transforms a priority number based on the policy tier.
 * Formula: tier + priority/1000
@@ -354,6 +399,35 @@ export async function loadPoliciesFromToml(
          }
        }

+        // Validate tool names in rules
+        for (let i = 0; i < tomlRules.length; i++) {
+          const rule = tomlRules[i];
+          // Skip MCP-scoped rules — MCP tool names are server-defined and dynamic
+          if (rule.mcpName) continue;
+
+          const toolNames: string[] = rule.toolName
+            ? Array.isArray(rule.toolName)
+              ? rule.toolName
+              : [rule.toolName]
+            : [];
+
+          for (const name of toolNames) {
+            const warning = validateToolName(name, i);
+            if (warning) {
+              errors.push({
+                filePath,
+                fileName: file,
+                tier: tierName,
+                ruleIndex: i,
+                errorType: 'tool_name_warning',
+                message: 'Unrecognized tool name',
+                details: warning,
+                severity: 'warning',
+              });
+            }
+          }
+        }
+
        // Transform rules
        const parsedRules: PolicyRule[] = (validationResult.data.rule ?? [])
          .flatMap((rule) => {
@@ -439,6 +513,35 @@ export async function loadPoliciesFromToml(

        rules.push(...parsedRules);

+        // Validate tool names in safety checker rules
+        const tomlCheckerRules = validationResult.data.safety_checker ?? [];
+        for (let i = 0; i < tomlCheckerRules.length; i++) {
+          const checker = tomlCheckerRules[i];
+          if (checker.mcpName) continue;
+
+          const checkerToolNames: string[] = checker.toolName
+            ? Array.isArray(checker.toolName)
+              ? checker.toolName
+              : [checker.toolName]
+            : [];
+
+          for (const name of checkerToolNames) {
+            const warning = validateToolName(name, i);
+            if (warning) {
+              errors.push({
+                filePath,
+                fileName: file,
+                tier: tierName,
+                ruleIndex: i,
+                errorType: 'tool_name_warning',
+                message: 'Unrecognized tool name in safety checker',
+                details: warning,
+                severity: 'warning',
+              });
+            }
+          }
+        }
+
        // Transform checkers
        const parsedCheckers: SafetyCheckerRule[] = (
          validationResult.data.safety_checker ?? []
@@ -535,3 +638,55 @@ export async function loadPoliciesFromToml(

  return { rules, checkers, errors };
 }
+
+/**
+ * Validates MCP tool names in policy rules against actually discovered MCP tools.
+ * Called after an MCP server connects and its tools are discovered.
+ *
+ * For each policy rule that references the given MCP server, checks if the
+ * tool name matches any discovered tool. Emits warnings for likely typos
+ * using Levenshtein distance.
+ *
+ * @param serverName The MCP server name (e.g., "google-workspace")
+ * @param discoveredToolNames The tool names discovered from this server (simple names, not fully qualified)
+ * @param policyRules The current set of policy rules to validate against
+ * @returns Array of warning messages for unrecognized MCP tool names
+ */
+export function validateMcpPolicyToolNames(
+  serverName: string,
+  discoveredToolNames: string[],
+  policyRules: ReadonlyArray<{ toolName?: string; source?: string }>,
+): string[] {
+  const prefix = `${serverName}__`;
+  const warnings: string[] = [];
+
+  for (const rule of policyRules) {
+    if (!rule.toolName) continue;
+    if (!rule.toolName.startsWith(prefix)) continue;
+
+    const toolPart = rule.toolName.slice(prefix.length);
+
+    // Skip wildcards
+    if (toolPart === '*') continue;
+
+    // Check if the tool exists
+    if (discoveredToolNames.includes(toolPart)) continue;
+
+    // Tool not found — check if it's a likely typo
+    if (discoveredToolNames.length === 0) continue;
+
+    const minDistance = Math.min(
+      ...discoveredToolNames.map((n) => levenshtein.get(toolPart, n)),
+    );
+
+    if (minDistance > MAX_TYPO_DISTANCE) continue;
+
+    const suggestion = getToolSuggestion(toolPart, discoveredToolNames);
+    const source = rule.source ? ` (from ${rule.source})` : '';
+    warnings.push(
+      `Unrecognized MCP tool "${toolPart}" for server "${serverName}"${source}.${suggestion}`,
+    );
+  }
+
+  return warnings;
+}
@@ -69,6 +69,7 @@ import { debugLogger } from '../utils/debugLogger.js';
 import { type MessageBus } from '../confirmation-bus/message-bus.js';
 import { coreEvents } from '../utils/events.js';
 import type { ResourceRegistry } from '../resources/resource-registry.js';
+import { validateMcpPolicyToolNames } from '../policy/toml-loader.js';
 import {
  sanitizeEnvironment,
  type EnvironmentSanitizationConfig,
@@ -221,6 +222,23 @@ export class McpClient implements McpProgressReporter {
      this.toolRegistry.registerTool(tool);
    }
    this.toolRegistry.sortTools();
+
+    // Validate MCP tool names in policy rules against discovered tools
+    try {
+      const discoveredToolNames = tools.map((t) => t.serverToolName);
+      const policyRules = cliConfig.getPolicyEngine?.()?.getRules() ?? [];
+      const warnings = validateMcpPolicyToolNames(
+        this.serverName,
+        discoveredToolNames,
+        policyRules,
+      );
+      for (const warning of warnings) {
+        coreEvents.emitFeedback('warning', warning);
+      }
+    } catch {
+      // Policy engine may not be available in all contexts (e.g. tests).
+      // Validation is best-effort; skip silently if unavailable.
+    }
  }

  /**
@@ -1577,6 +1595,9 @@ export interface McpContext {
  ): void;
  setUserInteractedWithMcp?(): void;
  isTrustedFolder(): boolean;
+  getPolicyEngine?(): {
+    getRules(): ReadonlyArray<{ toolName?: string; source?: string }>;
+  };
 }

 /**