feat(safety): Introduce safety checker framework (#12504)

This commit is contained in:
Allen Hutchison
2025-11-12 13:18:34 -08:00
committed by GitHub
parent aa9922bc98
commit 1ed163a666
21 changed files with 2636 additions and 328 deletions
+89 -11
View File
@@ -9,12 +9,15 @@ import {
PolicyDecision,
type PolicyEngineConfig,
type PolicyRule,
type SafetyCheckerRule,
} from './types.js';
import { stableStringify } from './stable-stringify.js';
import { debugLogger } from '../utils/debugLogger.js';
import type { CheckerRunner } from '../safety/checker-runner.js';
import { SafetyCheckDecision } from '../safety/protocol.js';
function ruleMatches(
rule: PolicyRule,
rule: PolicyRule | SafetyCheckerRule,
toolCall: FunctionCall,
stringifiedArgs: string | undefined,
serverName: string | undefined,
@@ -60,27 +63,41 @@ function ruleMatches(
export class PolicyEngine {
private rules: PolicyRule[];
private checkers: SafetyCheckerRule[];
private readonly defaultDecision: PolicyDecision;
private readonly nonInteractive: boolean;
private readonly checkerRunner?: CheckerRunner;
constructor(config: PolicyEngineConfig = {}) {
constructor(config: PolicyEngineConfig = {}, checkerRunner?: CheckerRunner) {
this.rules = (config.rules ?? []).sort(
(a, b) => (b.priority ?? 0) - (a.priority ?? 0),
);
this.checkers = (config.checkers ?? []).sort(
(a, b) => (b.priority ?? 0) - (a.priority ?? 0),
);
this.defaultDecision = config.defaultDecision ?? PolicyDecision.ASK_USER;
this.nonInteractive = config.nonInteractive ?? false;
this.checkerRunner = checkerRunner;
}
/**
* Check if a tool call is allowed based on the configured policies.
* Returns the decision and the matching rule (if any).
*/
check(
async check(
toolCall: FunctionCall,
serverName: string | undefined,
): PolicyDecision {
): Promise<{
decision: PolicyDecision;
rule?: PolicyRule;
}> {
let stringifiedArgs: string | undefined;
// Compute stringified args once before the loop
if (toolCall.args && this.rules.some((rule) => rule.argsPattern)) {
if (
toolCall.args &&
(this.rules.some((rule) => rule.argsPattern) ||
this.checkers.some((checker) => checker.argsPattern))
) {
stringifiedArgs = stableStringify(toolCall.args);
}
@@ -89,20 +106,72 @@ export class PolicyEngine {
);
// Find the first matching rule (already sorted by priority)
let matchedRule: PolicyRule | undefined;
let decision: PolicyDecision | undefined;
for (const rule of this.rules) {
if (ruleMatches(rule, toolCall, stringifiedArgs, serverName)) {
debugLogger.debug(
`[PolicyEngine.check] MATCHED rule: toolName=${rule.toolName}, decision=${rule.decision}, priority=${rule.priority}, argsPattern=${rule.argsPattern?.source || 'none'}`,
);
return this.applyNonInteractiveMode(rule.decision);
matchedRule = rule;
decision = this.applyNonInteractiveMode(rule.decision);
break;
}
}
// No matching rule found, use default decision
debugLogger.debug(
`[PolicyEngine.check] NO MATCH - using default decision: ${this.defaultDecision}`,
);
return this.applyNonInteractiveMode(this.defaultDecision);
if (!decision) {
// No matching rule found, use default decision
debugLogger.debug(
`[PolicyEngine.check] NO MATCH - using default decision: ${this.defaultDecision}`,
);
decision = this.applyNonInteractiveMode(this.defaultDecision);
}
// If decision is not DENY, run safety checkers
if (decision !== PolicyDecision.DENY && this.checkerRunner) {
for (const checkerRule of this.checkers) {
if (ruleMatches(checkerRule, toolCall, stringifiedArgs, serverName)) {
debugLogger.debug(
`[PolicyEngine.check] Running safety checker: ${checkerRule.checker.name}`,
);
try {
const result = await this.checkerRunner.runChecker(
toolCall,
checkerRule.checker,
);
if (result.decision === SafetyCheckDecision.DENY) {
debugLogger.debug(
`[PolicyEngine.check] Safety checker denied: ${result.reason}`,
);
return {
decision: PolicyDecision.DENY,
rule: matchedRule,
};
} else if (result.decision === SafetyCheckDecision.ASK_USER) {
debugLogger.debug(
`[PolicyEngine.check] Safety checker requested ASK_USER: ${result.reason}`,
);
decision = PolicyDecision.ASK_USER;
}
} catch (error) {
debugLogger.debug(
`[PolicyEngine.check] Safety checker failed: ${error}`,
);
return {
decision: PolicyDecision.DENY,
rule: matchedRule,
};
}
}
}
}
return {
decision: this.applyNonInteractiveMode(decision),
rule: matchedRule,
};
}
/**
@@ -114,6 +183,11 @@ export class PolicyEngine {
this.rules.sort((a, b) => (b.priority ?? 0) - (a.priority ?? 0));
}
addChecker(checker: SafetyCheckerRule): void {
this.checkers.push(checker);
this.checkers.sort((a, b) => (b.priority ?? 0) - (a.priority ?? 0));
}
/**
* Remove rules for a specific tool.
*/
@@ -128,6 +202,10 @@ export class PolicyEngine {
return this.rules;
}
getCheckers(): readonly SafetyCheckerRule[] {
return this.checkers;
}
private applyNonInteractiveMode(decision: PolicyDecision): PolicyDecision {
// In non-interactive mode, ASK_USER becomes DENY
if (this.nonInteractive && decision === PolicyDecision.ASK_USER) {