feat(policy): Implement Tool Annotation Matching in Policy Engine (#20029)

2026-06-15 22:07:29 -07:00 · 2026-02-23 16:39:40 -05:00
parent 0bc2d3ab16
commit 3e5e608a22
6 changed files with 159 additions and 1 deletions
@@ -352,6 +352,38 @@ describe('Policy Engine Integration Tests', () => {
      ).toBe(PolicyDecision.DENY);
    });

+    it('should correctly match tool annotations', async () => {
+      const settings: Settings = {};
+
+      const config = await createPolicyEngineConfig(
+        settings,
+        ApprovalMode.DEFAULT,
+      );
+
+      // Add a manual rule with annotations to the config
+      config.rules = config.rules || [];
+      config.rules.push({
+        toolAnnotations: { readOnlyHint: true },
+        decision: PolicyDecision.ALLOW,
+        priority: 10,
+      });
+
+      const engine = new PolicyEngine(config);
+
+      // A tool with readOnlyHint=true should be ALLOWED
+      const roCall = { name: 'some_tool', args: {} };
+      const roMeta = { readOnlyHint: true };
+      expect((await engine.check(roCall, undefined, roMeta)).decision).toBe(
+        PolicyDecision.ALLOW,
+      );
+
+      // A tool without the hint (or with false) should follow default decision (ASK_USER)
+      const rwMeta = { readOnlyHint: false };
+      expect((await engine.check(roCall, undefined, rwMeta)).decision).toBe(
+        PolicyDecision.ASK_USER,
+      );
+    });
+
    describe.each(['write_file', 'replace'])(
      'Plan Mode policy for %s',
      (toolName) => {
@@ -2557,4 +2557,68 @@ describe('PolicyEngine', () => {
      expect(checkers[0].priority).toBe(2.5);
    });
  });
+
+  describe('Tool Annotations', () => {
+    it('should match tools by semantic annotations', async () => {
+      engine = new PolicyEngine({
+        rules: [
+          {
+            toolAnnotations: { readOnlyHint: true },
+            decision: PolicyDecision.ALLOW,
+            priority: 10,
+          },
+        ],
+        defaultDecision: PolicyDecision.DENY,
+      });
+
+      const readOnlyTool = { name: 'read', args: {} };
+      const readOnlyMeta = { readOnlyHint: true, extra: 'info' };
+
+      const writeTool = { name: 'write', args: {} };
+      const writeMeta = { readOnlyHint: false };
+
+      expect(
+        (await engine.check(readOnlyTool, undefined, readOnlyMeta)).decision,
+      ).toBe(PolicyDecision.ALLOW);
+      expect(
+        (await engine.check(writeTool, undefined, writeMeta)).decision,
+      ).toBe(PolicyDecision.DENY);
+      expect((await engine.check(writeTool, undefined, {})).decision).toBe(
+        PolicyDecision.DENY,
+      );
+    });
+
+    it('should support scoped annotation rules', async () => {
+      engine = new PolicyEngine({
+        rules: [
+          {
+            toolName: '*__*',
+            toolAnnotations: { experimental: true },
+            decision: PolicyDecision.DENY,
+            priority: 20,
+          },
+          {
+            toolName: '*__*',
+            decision: PolicyDecision.ALLOW,
+            priority: 10,
+          },
+        ],
+      });
+
+      expect(
+        (
+          await engine.check({ name: 'mcp__test' }, 'mcp', {
+            experimental: true,
+          })
+        ).decision,
+      ).toBe(PolicyDecision.DENY);
+      expect(
+        (
+          await engine.check({ name: 'mcp__stable' }, 'mcp', {
+            experimental: false,
+          })
+        ).decision,
+      ).toBe(PolicyDecision.ALLOW);
+    });
+  });
 });
@@ -102,6 +102,7 @@ function ruleMatches(
  stringifiedArgs: string | undefined,
  serverName: string | undefined,
  currentApprovalMode: ApprovalMode,
+  toolAnnotations?: Record<string, unknown>,
 ): boolean {
  // Check if rule applies to current approval mode
  if (rule.modes && rule.modes.length > 0) {
@@ -124,6 +125,18 @@ function ruleMatches(
    }
  }

+  // Check annotations if specified
+  if (rule.toolAnnotations) {
+    if (!toolAnnotations) {
+      return false;
+    }
+    for (const [key, value] of Object.entries(rule.toolAnnotations)) {
+      if (toolAnnotations[key] !== value) {
+        return false;
+      }
+    }
+  }
+
  // Check args pattern if specified
  if (rule.argsPattern) {
    // If rule has an args pattern but tool has no args, no match
@@ -204,6 +217,7 @@ export class PolicyEngine {
    dir_path: string | undefined,
    allowRedirection?: boolean,
    rule?: PolicyRule,
+    toolAnnotations?: Record<string, unknown>,
  ): Promise<CheckResult> {
    if (!command) {
      return {
@@ -294,6 +308,7 @@ export class PolicyEngine {
        const subResult = await this.check(
          { name: toolName, args: { command: subCmd, dir_path } },
          serverName,
+          toolAnnotations,
        );

        // subResult.decision is already filtered through applyNonInteractiveMode by this.check()
@@ -351,6 +366,7 @@ export class PolicyEngine {
  async check(
    toolCall: FunctionCall,
    serverName: string | undefined,
+    toolAnnotations?: Record<string, unknown>,
  ): Promise<CheckResult> {
    let stringifiedArgs: string | undefined;
    // Compute stringified args once before the loop
@@ -403,7 +419,14 @@ export class PolicyEngine {

    for (const rule of this.rules) {
      const match = toolCallsToTry.some((tc) =>
-        ruleMatches(rule, tc, stringifiedArgs, serverName, this.approvalMode),
+        ruleMatches(
+          rule,
+          tc,
+          stringifiedArgs,
+          serverName,
+          this.approvalMode,
+          toolAnnotations,
+        ),
      );

      if (match) {
@@ -420,6 +443,7 @@ export class PolicyEngine {
            shellDirPath,
            rule.allowRedirection,
            rule,
+            toolAnnotations,
          );
          decision = shellResult.decision;
          if (shellResult.rule) {
@@ -446,6 +470,9 @@ export class PolicyEngine {
          this.defaultDecision,
          serverName,
          shellDirPath,
+          undefined,
+          undefined,
+          toolAnnotations,
        );
        decision = shellResult.decision;
        matchedRule = shellResult.rule;
@@ -464,6 +491,7 @@ export class PolicyEngine {
            stringifiedArgs,
            serverName,
            this.approvalMode,
+            toolAnnotations,
          )
        ) {
          debugLogger.debug(
@@ -89,6 +89,24 @@ priority = 100
      expect(result.errors).toHaveLength(0);
    });

+    it('should parse toolAnnotations from TOML', async () => {
+      const result = await runLoadPoliciesFromToml(`
+[[rule]]
+toolName = "annotated-tool"
+toolAnnotations = { readOnlyHint = true, custom = "value" }
+decision = "allow"
+priority = 70
+`);
+
+      expect(result.rules).toHaveLength(1);
+      expect(result.rules[0].toolName).toBe('annotated-tool');
+      expect(result.rules[0].toolAnnotations).toEqual({
+        readOnlyHint: true,
+        custom: 'value',
+      });
+      expect(result.errors).toHaveLength(0);
+    });
+
    it('should transform mcpName = "*" to wildcard toolName', async () => {
      const result = await runLoadPoliciesFromToml(`
 [[rule]]
@@ -46,6 +46,7 @@ const PolicyRuleSchema = z.object({
        'priority must be <= 999 to prevent tier overflow. Priorities >= 1000 would jump to the next tier.',
    }),
  modes: z.array(z.nativeEnum(ApprovalMode)).optional(),
+  toolAnnotations: z.record(z.any()).optional(),
  allow_redirection: z.boolean().optional(),
  deny_message: z.string().optional(),
 });
@@ -61,6 +62,7 @@ const SafetyCheckerRuleSchema = z.object({
  commandRegex: z.string().optional(),
  priority: z.number().int().default(0),
  modes: z.array(z.nativeEnum(ApprovalMode)).optional(),
+  toolAnnotations: z.record(z.any()).optional(),
  checker: z.discriminatedUnion('type', [
    z.object({
      type: z.literal('in-process'),
@@ -383,6 +385,7 @@ export async function loadPoliciesFromToml(
                  decision: rule.decision,
                  priority: transformPriority(rule.priority, tier),
                  modes: rule.modes,
+                  toolAnnotations: rule.toolAnnotations,
                  allowRedirection: rule.allow_redirection,
                  source: `${tierName.charAt(0).toUpperCase() + tierName.slice(1)}: ${file}`,
                  denyMessage: rule.deny_message,
@@ -467,6 +470,7 @@ export async function loadPoliciesFromToml(
                  // eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion
                  checker: checker.checker as SafetyCheckerConfig,
                  modes: checker.modes,
+                  toolAnnotations: checker.toolAnnotations,
                  source: `${tierName.charAt(0).toUpperCase() + tierName.slice(1)}: ${file}`,
                };

@@ -115,6 +115,12 @@ export interface PolicyRule {
   */
  argsPattern?: RegExp;

+  /**
+   * Metadata annotations provided by the tool (e.g. readOnlyHint).
+   * All keys and values in this record must match the tool's annotations.
+   */
+  toolAnnotations?: Record<string, unknown>;
+
  /**
   * The decision to make when this rule matches.
   */
@@ -165,6 +171,12 @@ export interface SafetyCheckerRule {
   */
  argsPattern?: RegExp;

+  /**
+   * Metadata annotations provided by the tool (e.g. readOnlyHint).
+   * All keys and values in this record must match the tool's annotations.
+   */
+  toolAnnotations?: Record<string, unknown>;
+
  /**
   * Priority of this checker. Higher numbers run first.
   * Default is 0.