feat(core): add tool name validation in TOML policy files (#19281)

Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
This commit is contained in:
Allen Hutchison
2026-03-02 13:47:21 -08:00
committed by GitHub
parent dd9ccc9807
commit bb6d1a2775
4 changed files with 460 additions and 12 deletions

View File

@@ -144,7 +144,8 @@ export function getPolicyTier(
*/
export function formatPolicyError(error: PolicyFileError): string {
const tierLabel = error.tier.toUpperCase();
let message = `[${tierLabel}] Policy file error in ${error.fileName}:\n`;
const severityLabel = error.severity === 'warning' ? 'warning' : 'error';
let message = `[${tierLabel}] Policy file ${severityLabel} in ${error.fileName}:\n`;
message += ` ${error.message}`;
if (error.details) {
message += `\n${error.details}`;
@@ -293,7 +294,10 @@ export async function createPolicyEngineConfig(
// coreEvents has a buffer that will display these once the UI is ready
if (errors.length > 0) {
for (const error of errors) {
coreEvents.emitFeedback('error', formatPolicyError(error));
coreEvents.emitFeedback(
error.severity ?? 'error',
formatPolicyError(error),
);
}
}

View File

@@ -14,13 +14,26 @@ import * as fs from 'node:fs/promises';
import * as path from 'node:path';
import * as os from 'node:os';
import { fileURLToPath } from 'node:url';
import { loadPoliciesFromToml } from './toml-loader.js';
import {
loadPoliciesFromToml,
validateMcpPolicyToolNames,
} from './toml-loader.js';
import type { PolicyLoadResult } from './toml-loader.js';
import { PolicyEngine } from './policy-engine.js';
const __filename = fileURLToPath(import.meta.url);
const __dirname = path.dirname(__filename);
/** Returns only errors (severity !== 'warning') from a PolicyLoadResult. */
function getErrors(result: PolicyLoadResult): PolicyLoadResult['errors'] {
return result.errors.filter((e) => e.severity !== 'warning');
}
/** Returns only warnings (severity === 'warning') from a PolicyLoadResult. */
function getWarnings(result: PolicyLoadResult): PolicyLoadResult['errors'] {
return result.errors.filter((e) => e.severity === 'warning');
}
describe('policy-toml-loader', () => {
let tempDir: string;
@@ -189,7 +202,7 @@ priority = 100
'grep',
'read',
]);
expect(result.errors).toHaveLength(0);
expect(getErrors(result)).toHaveLength(0);
});
it('should transform mcpName to composite toolName', async () => {
@@ -228,7 +241,7 @@ modes = ["yolo"]
expect(result.rules[0].modes).toEqual(['default', 'yolo']);
expect(result.rules[1].toolName).toBe('grep');
expect(result.rules[1].modes).toEqual(['yolo']);
expect(result.errors).toHaveLength(0);
expect(getErrors(result)).toHaveLength(0);
});
it('should parse and transform allow_redirection property', async () => {
@@ -259,7 +272,7 @@ deny_message = "Deletion is permanent"
expect(result.rules[0].toolName).toBe('rm');
expect(result.rules[0].decision).toBe(PolicyDecision.DENY);
expect(result.rules[0].denyMessage).toBe('Deletion is permanent');
expect(result.errors).toHaveLength(0);
expect(getErrors(result)).toHaveLength(0);
});
it('should support modes property for Tier 4 and Tier 5 policies', async () => {
@@ -547,8 +560,8 @@ commandRegex = ".*"
decision = "allow"
priority = 100
`);
expect(result.errors).toHaveLength(1);
const error = result.errors[0];
expect(getErrors(result)).toHaveLength(1);
const error = getErrors(result)[0];
expect(error.errorType).toBe('rule_validation');
expect(error.details).toContain('run_shell_command');
});
@@ -576,8 +589,8 @@ argsPattern = "([a-z)"
decision = "allow"
priority = 100
`);
expect(result.errors).toHaveLength(1);
const error = result.errors[0];
expect(getErrors(result)).toHaveLength(1);
const error = getErrors(result)[0];
expect(error.errorType).toBe('regex_compilation');
expect(error.message).toBe('Invalid regex pattern');
});
@@ -592,7 +605,7 @@ priority = 100
const getPolicyTier = (_dir: string) => 1;
const result = await loadPoliciesFromToml([filePath], getPolicyTier);
expect(result.errors).toHaveLength(0);
expect(getErrors(result)).toHaveLength(0);
expect(result.rules).toHaveLength(1);
expect(result.rules[0].toolName).toBe('test-tool');
expect(result.rules[0].decision).toBe(PolicyDecision.ALLOW);
@@ -612,6 +625,177 @@ priority = 100
});
});
describe('Tool name validation', () => {
it('should warn for unrecognized tool names with suggestions', async () => {
const result = await runLoadPoliciesFromToml(`
[[rule]]
toolName = "grob"
decision = "allow"
priority = 100
`);
const warnings = getWarnings(result);
expect(warnings).toHaveLength(1);
expect(warnings[0].errorType).toBe('tool_name_warning');
expect(warnings[0].severity).toBe('warning');
expect(warnings[0].details).toContain('Unrecognized tool name "grob"');
expect(warnings[0].details).toContain('glob');
// Rules should still load despite warnings
expect(result.rules).toHaveLength(1);
expect(result.rules[0].toolName).toBe('grob');
});
it('should not warn for valid built-in tool names', async () => {
const result = await runLoadPoliciesFromToml(`
[[rule]]
toolName = "glob"
decision = "allow"
priority = 100
[[rule]]
toolName = "read_file"
decision = "allow"
priority = 100
`);
expect(getWarnings(result)).toHaveLength(0);
expect(getErrors(result)).toHaveLength(0);
expect(result.rules).toHaveLength(2);
});
it('should not warn for wildcard "*"', async () => {
const result = await runLoadPoliciesFromToml(`
[[rule]]
toolName = "*"
decision = "allow"
priority = 100
`);
expect(getWarnings(result)).toHaveLength(0);
expect(getErrors(result)).toHaveLength(0);
});
it('should not warn for MCP format tool names', async () => {
const result = await runLoadPoliciesFromToml(`
[[rule]]
toolName = "my-server__my-tool"
decision = "allow"
priority = 100
[[rule]]
toolName = "my-server__*"
decision = "allow"
priority = 100
`);
expect(getWarnings(result)).toHaveLength(0);
expect(getErrors(result)).toHaveLength(0);
});
it('should not warn when mcpName is present (skips validation)', async () => {
const result = await runLoadPoliciesFromToml(`
[[rule]]
mcpName = "my-server"
toolName = "nonexistent"
decision = "allow"
priority = 100
`);
expect(getWarnings(result)).toHaveLength(0);
expect(getErrors(result)).toHaveLength(0);
});
it('should not warn for legacy aliases', async () => {
const result = await runLoadPoliciesFromToml(`
[[rule]]
toolName = "search_file_content"
decision = "allow"
priority = 100
`);
expect(getWarnings(result)).toHaveLength(0);
expect(getErrors(result)).toHaveLength(0);
});
it('should not warn for discovered tool prefix', async () => {
const result = await runLoadPoliciesFromToml(`
[[rule]]
toolName = "discovered_tool_my_custom_tool"
decision = "allow"
priority = 100
`);
expect(getWarnings(result)).toHaveLength(0);
expect(getErrors(result)).toHaveLength(0);
});
it('should warn for each invalid name in a toolName array', async () => {
const result = await runLoadPoliciesFromToml(`
[[rule]]
toolName = ["grob", "glob", "replce"]
decision = "allow"
priority = 100
`);
const warnings = getWarnings(result);
expect(warnings).toHaveLength(2);
expect(warnings[0].details).toContain('"grob"');
expect(warnings[1].details).toContain('"replce"');
// All rules still load
expect(result.rules).toHaveLength(3);
});
it('should not warn for names far from any built-in (dynamic/agent tools)', async () => {
const result = await runLoadPoliciesFromToml(`
[[rule]]
toolName = "delegate_to_agent"
decision = "allow"
priority = 100
[[rule]]
toolName = "my_custom_tool"
decision = "allow"
priority = 100
`);
expect(getWarnings(result)).toHaveLength(0);
expect(getErrors(result)).toHaveLength(0);
expect(result.rules).toHaveLength(2);
});
it('should not warn for catch-all rules (no toolName)', async () => {
const result = await runLoadPoliciesFromToml(`
[[rule]]
decision = "deny"
priority = 100
`);
expect(getWarnings(result)).toHaveLength(0);
expect(getErrors(result)).toHaveLength(0);
expect(result.rules).toHaveLength(1);
});
it('should still load rules even with warnings', async () => {
const result = await runLoadPoliciesFromToml(`
[[rule]]
toolName = "wrte_file"
decision = "deny"
priority = 50
[[rule]]
toolName = "glob"
decision = "allow"
priority = 100
`);
expect(getWarnings(result)).toHaveLength(1);
expect(getErrors(result)).toHaveLength(0);
expect(result.rules).toHaveLength(2);
expect(result.rules[0].toolName).toBe('wrte_file');
expect(result.rules[1].toolName).toBe('glob');
});
});
describe('Built-in Plan Mode Policy', () => {
it('should allow MCP tools with readOnlyHint annotation in Plan Mode (ASK_USER, not DENY)', async () => {
const planTomlPath = path.resolve(__dirname, 'policies', 'plan.toml');
@@ -779,4 +963,88 @@ priority = 100
}
});
});
describe('validateMcpPolicyToolNames', () => {
it('should warn for MCP tool names that are likely typos', () => {
const warnings = validateMcpPolicyToolNames(
'google-workspace',
['people.getMe', 'calendar.list', 'calendar.get'],
[
{
toolName: 'google-workspace__people.getxMe',
source: 'User: workspace.toml',
},
],
);
expect(warnings).toHaveLength(1);
expect(warnings[0]).toContain('people.getxMe');
expect(warnings[0]).toContain('google-workspace');
expect(warnings[0]).toContain('people.getMe');
});
it('should not warn for matching MCP tool names', () => {
const warnings = validateMcpPolicyToolNames(
'google-workspace',
['people.getMe', 'calendar.list'],
[
{ toolName: 'google-workspace__people.getMe' },
{ toolName: 'google-workspace__calendar.list' },
],
);
expect(warnings).toHaveLength(0);
});
it('should not warn for wildcard MCP rules', () => {
const warnings = validateMcpPolicyToolNames(
'my-server',
['tool1', 'tool2'],
[{ toolName: 'my-server__*' }],
);
expect(warnings).toHaveLength(0);
});
it('should not warn for rules targeting other servers', () => {
const warnings = validateMcpPolicyToolNames(
'server-a',
['tool1'],
[{ toolName: 'server-b__toolx' }],
);
expect(warnings).toHaveLength(0);
});
it('should not warn for tool names far from any discovered tool', () => {
const warnings = validateMcpPolicyToolNames(
'my-server',
['tool1', 'tool2'],
[{ toolName: 'my-server__completely_different_name' }],
);
expect(warnings).toHaveLength(0);
});
it('should skip rules without toolName', () => {
const warnings = validateMcpPolicyToolNames(
'my-server',
['tool1'],
[{ toolName: undefined }],
);
expect(warnings).toHaveLength(0);
});
it('should include source in warning when available', () => {
const warnings = validateMcpPolicyToolNames(
'my-server',
['tool1'],
[{ toolName: 'my-server__tol1', source: 'User: custom.toml' }],
);
expect(warnings).toHaveLength(1);
expect(warnings[0]).toContain('User: custom.toml');
});
});
});

View File

@@ -13,12 +13,25 @@ import {
InProcessCheckerType,
} from './types.js';
import { buildArgsPatterns, isSafeRegExp } from './utils.js';
import {
isValidToolName,
ALL_BUILTIN_TOOL_NAMES,
} from '../tools/tool-names.js';
import { getToolSuggestion } from '../utils/tool-utils.js';
import levenshtein from 'fast-levenshtein';
import fs from 'node:fs/promises';
import path from 'node:path';
import toml from '@iarna/toml';
import { z, type ZodError } from 'zod';
import { isNodeError } from '../utils/errors.js';
/**
* Maximum Levenshtein distance to consider a name a likely typo of a built-in tool.
* Names further from all built-in tools are assumed to be intentional
* (e.g., dynamically registered agent tools) and are not warned about.
*/
const MAX_TYPO_DISTANCE = 3;
/**
* Schema for a single policy rule in the TOML file (before transformation).
*/
@@ -100,7 +113,8 @@ export type PolicyFileErrorType =
| 'toml_parse'
| 'schema_validation'
| 'rule_validation'
| 'regex_compilation';
| 'regex_compilation'
| 'tool_name_warning';
/**
* Detailed error information for policy file loading failures.
@@ -114,6 +128,7 @@ export interface PolicyFileError {
message: string;
details?: string;
suggestion?: string;
severity?: 'error' | 'warning';
}
/**
@@ -241,6 +256,36 @@ function validateShellCommandSyntax(
return null;
}
/**
* Validates that a tool name is recognized.
* Returns a warning message if the tool name is a likely typo of a built-in
* tool name, or null if valid or not close to any built-in name.
*/
function validateToolName(name: string, ruleIndex: number): string | null {
// A name that looks like an MCP tool (e.g., "re__ad") could be a typo of a
// built-in tool ("read_file"). We should let such names fall through to the
// Levenshtein distance check below. Non-MCP-like names that are valid can
// be safely skipped.
if (isValidToolName(name, { allowWildcards: true }) && !name.includes('__')) {
return null;
}
// Only warn if the name is close to a built-in name (likely typo).
// Names that are very different from all built-in names are likely
// intentional (dynamic tools, agent tools, etc.).
const allNames = [...ALL_BUILTIN_TOOL_NAMES];
const minDistance = Math.min(
...allNames.map((n) => levenshtein.get(name, n)),
);
if (minDistance > MAX_TYPO_DISTANCE) {
return null;
}
const suggestion = getToolSuggestion(name, allNames);
return `Rule #${ruleIndex + 1}: Unrecognized tool name "${name}".${suggestion}`;
}
/**
* Transforms a priority number based on the policy tier.
* Formula: tier + priority/1000
@@ -354,6 +399,35 @@ export async function loadPoliciesFromToml(
}
}
// Validate tool names in rules
for (let i = 0; i < tomlRules.length; i++) {
const rule = tomlRules[i];
// Skip MCP-scoped rules — MCP tool names are server-defined and dynamic
if (rule.mcpName) continue;
const toolNames: string[] = rule.toolName
? Array.isArray(rule.toolName)
? rule.toolName
: [rule.toolName]
: [];
for (const name of toolNames) {
const warning = validateToolName(name, i);
if (warning) {
errors.push({
filePath,
fileName: file,
tier: tierName,
ruleIndex: i,
errorType: 'tool_name_warning',
message: 'Unrecognized tool name',
details: warning,
severity: 'warning',
});
}
}
}
// Transform rules
const parsedRules: PolicyRule[] = (validationResult.data.rule ?? [])
.flatMap((rule) => {
@@ -439,6 +513,35 @@ export async function loadPoliciesFromToml(
rules.push(...parsedRules);
// Validate tool names in safety checker rules
const tomlCheckerRules = validationResult.data.safety_checker ?? [];
for (let i = 0; i < tomlCheckerRules.length; i++) {
const checker = tomlCheckerRules[i];
if (checker.mcpName) continue;
const checkerToolNames: string[] = checker.toolName
? Array.isArray(checker.toolName)
? checker.toolName
: [checker.toolName]
: [];
for (const name of checkerToolNames) {
const warning = validateToolName(name, i);
if (warning) {
errors.push({
filePath,
fileName: file,
tier: tierName,
ruleIndex: i,
errorType: 'tool_name_warning',
message: 'Unrecognized tool name in safety checker',
details: warning,
severity: 'warning',
});
}
}
}
// Transform checkers
const parsedCheckers: SafetyCheckerRule[] = (
validationResult.data.safety_checker ?? []
@@ -535,3 +638,55 @@ export async function loadPoliciesFromToml(
return { rules, checkers, errors };
}
/**
* Validates MCP tool names in policy rules against actually discovered MCP tools.
* Called after an MCP server connects and its tools are discovered.
*
* For each policy rule that references the given MCP server, checks if the
* tool name matches any discovered tool. Emits warnings for likely typos
* using Levenshtein distance.
*
* @param serverName The MCP server name (e.g., "google-workspace")
* @param discoveredToolNames The tool names discovered from this server (simple names, not fully qualified)
* @param policyRules The current set of policy rules to validate against
* @returns Array of warning messages for unrecognized MCP tool names
*/
export function validateMcpPolicyToolNames(
serverName: string,
discoveredToolNames: string[],
policyRules: ReadonlyArray<{ toolName?: string; source?: string }>,
): string[] {
const prefix = `${serverName}__`;
const warnings: string[] = [];
for (const rule of policyRules) {
if (!rule.toolName) continue;
if (!rule.toolName.startsWith(prefix)) continue;
const toolPart = rule.toolName.slice(prefix.length);
// Skip wildcards
if (toolPart === '*') continue;
// Check if the tool exists
if (discoveredToolNames.includes(toolPart)) continue;
// Tool not found — check if it's a likely typo
if (discoveredToolNames.length === 0) continue;
const minDistance = Math.min(
...discoveredToolNames.map((n) => levenshtein.get(toolPart, n)),
);
if (minDistance > MAX_TYPO_DISTANCE) continue;
const suggestion = getToolSuggestion(toolPart, discoveredToolNames);
const source = rule.source ? ` (from ${rule.source})` : '';
warnings.push(
`Unrecognized MCP tool "${toolPart}" for server "${serverName}"${source}.${suggestion}`,
);
}
return warnings;
}

View File

@@ -69,6 +69,7 @@ import { debugLogger } from '../utils/debugLogger.js';
import { type MessageBus } from '../confirmation-bus/message-bus.js';
import { coreEvents } from '../utils/events.js';
import type { ResourceRegistry } from '../resources/resource-registry.js';
import { validateMcpPolicyToolNames } from '../policy/toml-loader.js';
import {
sanitizeEnvironment,
type EnvironmentSanitizationConfig,
@@ -221,6 +222,23 @@ export class McpClient implements McpProgressReporter {
this.toolRegistry.registerTool(tool);
}
this.toolRegistry.sortTools();
// Validate MCP tool names in policy rules against discovered tools
try {
const discoveredToolNames = tools.map((t) => t.serverToolName);
const policyRules = cliConfig.getPolicyEngine?.()?.getRules() ?? [];
const warnings = validateMcpPolicyToolNames(
this.serverName,
discoveredToolNames,
policyRules,
);
for (const warning of warnings) {
coreEvents.emitFeedback('warning', warning);
}
} catch {
// Policy engine may not be available in all contexts (e.g. tests).
// Validation is best-effort; skip silently if unavailable.
}
}
/**
@@ -1577,6 +1595,9 @@ export interface McpContext {
): void;
setUserInteractedWithMcp?(): void;
isTrustedFolder(): boolean;
getPolicyEngine?(): {
getRules(): ReadonlyArray<{ toolName?: string; source?: string }>;
};
}
/**