mirror of
https://github.com/google-gemini/gemini-cli.git
synced 2026-04-20 10:10:56 -07:00
feat(safety): Introduce safety checker framework (#12504)
This commit is contained in:
@@ -30,24 +30,24 @@ describe('Policy Engine Integration Tests', () => {
|
||||
const engine = new PolicyEngine(config);
|
||||
|
||||
// Allowed tool should be allowed
|
||||
expect(engine.check({ name: 'run_shell_command' }, undefined)).toBe(
|
||||
PolicyDecision.ALLOW,
|
||||
);
|
||||
expect(
|
||||
(await engine.check({ name: 'run_shell_command' }, undefined)).decision,
|
||||
).toBe(PolicyDecision.ALLOW);
|
||||
|
||||
// Excluded tool should be denied
|
||||
expect(engine.check({ name: 'write_file' }, undefined)).toBe(
|
||||
PolicyDecision.DENY,
|
||||
);
|
||||
expect(
|
||||
(await engine.check({ name: 'write_file' }, undefined)).decision,
|
||||
).toBe(PolicyDecision.DENY);
|
||||
|
||||
// Other write tools should ask user
|
||||
expect(engine.check({ name: 'replace' }, undefined)).toBe(
|
||||
PolicyDecision.ASK_USER,
|
||||
);
|
||||
expect(
|
||||
(await engine.check({ name: 'replace' }, undefined)).decision,
|
||||
).toBe(PolicyDecision.ASK_USER);
|
||||
|
||||
// Unknown tools should use default
|
||||
expect(engine.check({ name: 'unknown_tool' }, undefined)).toBe(
|
||||
PolicyDecision.ASK_USER,
|
||||
);
|
||||
expect(
|
||||
(await engine.check({ name: 'unknown_tool' }, undefined)).decision,
|
||||
).toBe(PolicyDecision.ASK_USER);
|
||||
});
|
||||
|
||||
it('should handle MCP server wildcard patterns correctly', async () => {
|
||||
@@ -72,33 +72,49 @@ describe('Policy Engine Integration Tests', () => {
|
||||
const engine = new PolicyEngine(config);
|
||||
|
||||
// Tools from allowed server should be allowed
|
||||
expect(engine.check({ name: 'allowed-server__tool1' }, undefined)).toBe(
|
||||
PolicyDecision.ALLOW,
|
||||
);
|
||||
// Tools from allowed server should be allowed
|
||||
expect(
|
||||
engine.check({ name: 'allowed-server__another_tool' }, undefined),
|
||||
(await engine.check({ name: 'allowed-server__tool1' }, undefined))
|
||||
.decision,
|
||||
).toBe(PolicyDecision.ALLOW);
|
||||
expect(
|
||||
(
|
||||
await engine.check(
|
||||
{ name: 'allowed-server__another_tool' },
|
||||
undefined,
|
||||
)
|
||||
).decision,
|
||||
).toBe(PolicyDecision.ALLOW);
|
||||
|
||||
// Tools from trusted server should be allowed
|
||||
expect(engine.check({ name: 'trusted-server__tool1' }, undefined)).toBe(
|
||||
PolicyDecision.ALLOW,
|
||||
);
|
||||
expect(
|
||||
engine.check({ name: 'trusted-server__special_tool' }, undefined),
|
||||
(await engine.check({ name: 'trusted-server__tool1' }, undefined))
|
||||
.decision,
|
||||
).toBe(PolicyDecision.ALLOW);
|
||||
expect(
|
||||
(
|
||||
await engine.check(
|
||||
{ name: 'trusted-server__special_tool' },
|
||||
undefined,
|
||||
)
|
||||
).decision,
|
||||
).toBe(PolicyDecision.ALLOW);
|
||||
|
||||
// Tools from blocked server should be denied
|
||||
expect(engine.check({ name: 'blocked-server__tool1' }, undefined)).toBe(
|
||||
PolicyDecision.DENY,
|
||||
);
|
||||
expect(
|
||||
engine.check({ name: 'blocked-server__any_tool' }, undefined),
|
||||
(await engine.check({ name: 'blocked-server__tool1' }, undefined))
|
||||
.decision,
|
||||
).toBe(PolicyDecision.DENY);
|
||||
expect(
|
||||
(await engine.check({ name: 'blocked-server__any_tool' }, undefined))
|
||||
.decision,
|
||||
).toBe(PolicyDecision.DENY);
|
||||
|
||||
// Tools from unknown servers should use default
|
||||
expect(engine.check({ name: 'unknown-server__tool' }, undefined)).toBe(
|
||||
PolicyDecision.ASK_USER,
|
||||
);
|
||||
expect(
|
||||
(await engine.check({ name: 'unknown-server__tool' }, undefined))
|
||||
.decision,
|
||||
).toBe(PolicyDecision.ASK_USER);
|
||||
});
|
||||
|
||||
it('should correctly prioritize specific tool excludes over MCP server wildcards', async () => {
|
||||
@@ -118,12 +134,15 @@ describe('Policy Engine Integration Tests', () => {
|
||||
const engine = new PolicyEngine(config);
|
||||
|
||||
// MCP server allowed (priority 2.1) provides general allow for server
|
||||
expect(engine.check({ name: 'my-server__safe-tool' }, undefined)).toBe(
|
||||
PolicyDecision.ALLOW,
|
||||
);
|
||||
// MCP server allowed (priority 2.1) provides general allow for server
|
||||
expect(
|
||||
(await engine.check({ name: 'my-server__safe-tool' }, undefined))
|
||||
.decision,
|
||||
).toBe(PolicyDecision.ALLOW);
|
||||
// But specific tool exclude (priority 2.4) wins over server allow
|
||||
expect(
|
||||
engine.check({ name: 'my-server__dangerous-tool' }, undefined),
|
||||
(await engine.check({ name: 'my-server__dangerous-tool' }, undefined))
|
||||
.decision,
|
||||
).toBe(PolicyDecision.DENY);
|
||||
});
|
||||
|
||||
@@ -154,46 +173,50 @@ describe('Policy Engine Integration Tests', () => {
|
||||
const engine = new PolicyEngine(config);
|
||||
|
||||
// Read-only tools should be allowed (autoAccept)
|
||||
expect(engine.check({ name: 'read_file' }, undefined)).toBe(
|
||||
PolicyDecision.ALLOW,
|
||||
);
|
||||
expect(engine.check({ name: 'list_directory' }, undefined)).toBe(
|
||||
PolicyDecision.ALLOW,
|
||||
);
|
||||
expect(
|
||||
(await engine.check({ name: 'read_file' }, undefined)).decision,
|
||||
).toBe(PolicyDecision.ALLOW);
|
||||
expect(
|
||||
(await engine.check({ name: 'list_directory' }, undefined)).decision,
|
||||
).toBe(PolicyDecision.ALLOW);
|
||||
|
||||
// But glob is explicitly excluded, so it should be denied
|
||||
expect(engine.check({ name: 'glob' }, undefined)).toBe(
|
||||
expect((await engine.check({ name: 'glob' }, undefined)).decision).toBe(
|
||||
PolicyDecision.DENY,
|
||||
);
|
||||
|
||||
// Replace should ask user (normal write tool behavior)
|
||||
expect(engine.check({ name: 'replace' }, undefined)).toBe(
|
||||
PolicyDecision.ASK_USER,
|
||||
);
|
||||
expect(
|
||||
(await engine.check({ name: 'replace' }, undefined)).decision,
|
||||
).toBe(PolicyDecision.ASK_USER);
|
||||
|
||||
// Explicitly allowed tools
|
||||
expect(engine.check({ name: 'custom-tool' }, undefined)).toBe(
|
||||
PolicyDecision.ALLOW,
|
||||
);
|
||||
expect(engine.check({ name: 'my-server__special-tool' }, undefined)).toBe(
|
||||
PolicyDecision.ALLOW,
|
||||
);
|
||||
expect(
|
||||
(await engine.check({ name: 'custom-tool' }, undefined)).decision,
|
||||
).toBe(PolicyDecision.ALLOW);
|
||||
expect(
|
||||
(await engine.check({ name: 'my-server__special-tool' }, undefined))
|
||||
.decision,
|
||||
).toBe(PolicyDecision.ALLOW);
|
||||
|
||||
// MCP server tools
|
||||
expect(engine.check({ name: 'allowed-server__tool' }, undefined)).toBe(
|
||||
PolicyDecision.ALLOW,
|
||||
);
|
||||
expect(engine.check({ name: 'trusted-server__tool' }, undefined)).toBe(
|
||||
PolicyDecision.ALLOW,
|
||||
);
|
||||
expect(engine.check({ name: 'blocked-server__tool' }, undefined)).toBe(
|
||||
PolicyDecision.DENY,
|
||||
);
|
||||
expect(
|
||||
(await engine.check({ name: 'allowed-server__tool' }, undefined))
|
||||
.decision,
|
||||
).toBe(PolicyDecision.ALLOW);
|
||||
expect(
|
||||
(await engine.check({ name: 'trusted-server__tool' }, undefined))
|
||||
.decision,
|
||||
).toBe(PolicyDecision.ALLOW);
|
||||
expect(
|
||||
(await engine.check({ name: 'blocked-server__tool' }, undefined))
|
||||
.decision,
|
||||
).toBe(PolicyDecision.DENY);
|
||||
|
||||
// Write tools should ask by default
|
||||
expect(engine.check({ name: 'write_file' }, undefined)).toBe(
|
||||
PolicyDecision.ASK_USER,
|
||||
);
|
||||
expect(
|
||||
(await engine.check({ name: 'write_file' }, undefined)).decision,
|
||||
).toBe(PolicyDecision.ASK_USER);
|
||||
});
|
||||
|
||||
it('should handle YOLO mode correctly', async () => {
|
||||
@@ -210,20 +233,20 @@ describe('Policy Engine Integration Tests', () => {
|
||||
const engine = new PolicyEngine(config);
|
||||
|
||||
// Most tools should be allowed in YOLO mode
|
||||
expect(engine.check({ name: 'run_shell_command' }, undefined)).toBe(
|
||||
PolicyDecision.ALLOW,
|
||||
);
|
||||
expect(engine.check({ name: 'write_file' }, undefined)).toBe(
|
||||
PolicyDecision.ALLOW,
|
||||
);
|
||||
expect(engine.check({ name: 'unknown_tool' }, undefined)).toBe(
|
||||
PolicyDecision.ALLOW,
|
||||
);
|
||||
expect(
|
||||
(await engine.check({ name: 'run_shell_command' }, undefined)).decision,
|
||||
).toBe(PolicyDecision.ALLOW);
|
||||
expect(
|
||||
(await engine.check({ name: 'write_file' }, undefined)).decision,
|
||||
).toBe(PolicyDecision.ALLOW);
|
||||
expect(
|
||||
(await engine.check({ name: 'unknown_tool' }, undefined)).decision,
|
||||
).toBe(PolicyDecision.ALLOW);
|
||||
|
||||
// But explicitly excluded tools should still be denied
|
||||
expect(engine.check({ name: 'dangerous-tool' }, undefined)).toBe(
|
||||
PolicyDecision.DENY,
|
||||
);
|
||||
expect(
|
||||
(await engine.check({ name: 'dangerous-tool' }, undefined)).decision,
|
||||
).toBe(PolicyDecision.DENY);
|
||||
});
|
||||
|
||||
it('should handle AUTO_EDIT mode correctly', async () => {
|
||||
@@ -236,17 +259,17 @@ describe('Policy Engine Integration Tests', () => {
|
||||
const engine = new PolicyEngine(config);
|
||||
|
||||
// Edit tools should be allowed in AUTO_EDIT mode
|
||||
expect(engine.check({ name: 'replace' }, undefined)).toBe(
|
||||
PolicyDecision.ALLOW,
|
||||
);
|
||||
expect(engine.check({ name: 'write_file' }, undefined)).toBe(
|
||||
PolicyDecision.ALLOW,
|
||||
);
|
||||
expect(
|
||||
(await engine.check({ name: 'replace' }, undefined)).decision,
|
||||
).toBe(PolicyDecision.ALLOW);
|
||||
expect(
|
||||
(await engine.check({ name: 'write_file' }, undefined)).decision,
|
||||
).toBe(PolicyDecision.ALLOW);
|
||||
|
||||
// Other tools should follow normal rules
|
||||
expect(engine.check({ name: 'run_shell_command' }, undefined)).toBe(
|
||||
PolicyDecision.ASK_USER,
|
||||
);
|
||||
expect(
|
||||
(await engine.check({ name: 'run_shell_command' }, undefined)).decision,
|
||||
).toBe(PolicyDecision.ASK_USER);
|
||||
});
|
||||
|
||||
it('should verify priority ordering works correctly in practice', async () => {
|
||||
@@ -305,22 +328,24 @@ describe('Policy Engine Integration Tests', () => {
|
||||
expect(readOnlyToolRule?.priority).toBeCloseTo(1.05, 5);
|
||||
|
||||
// Verify the engine applies these priorities correctly
|
||||
expect(engine.check({ name: 'blocked-tool' }, undefined)).toBe(
|
||||
PolicyDecision.DENY,
|
||||
);
|
||||
expect(engine.check({ name: 'blocked-server__any' }, undefined)).toBe(
|
||||
PolicyDecision.DENY,
|
||||
);
|
||||
expect(engine.check({ name: 'specific-tool' }, undefined)).toBe(
|
||||
PolicyDecision.ALLOW,
|
||||
);
|
||||
expect(engine.check({ name: 'trusted-server__any' }, undefined)).toBe(
|
||||
PolicyDecision.ALLOW,
|
||||
);
|
||||
expect(engine.check({ name: 'mcp-server__any' }, undefined)).toBe(
|
||||
PolicyDecision.ALLOW,
|
||||
);
|
||||
expect(engine.check({ name: 'glob' }, undefined)).toBe(
|
||||
expect(
|
||||
(await engine.check({ name: 'blocked-tool' }, undefined)).decision,
|
||||
).toBe(PolicyDecision.DENY);
|
||||
expect(
|
||||
(await engine.check({ name: 'blocked-server__any' }, undefined))
|
||||
.decision,
|
||||
).toBe(PolicyDecision.DENY);
|
||||
expect(
|
||||
(await engine.check({ name: 'specific-tool' }, undefined)).decision,
|
||||
).toBe(PolicyDecision.ALLOW);
|
||||
expect(
|
||||
(await engine.check({ name: 'trusted-server__any' }, undefined))
|
||||
.decision,
|
||||
).toBe(PolicyDecision.ALLOW);
|
||||
expect(
|
||||
(await engine.check({ name: 'mcp-server__any' }, undefined)).decision,
|
||||
).toBe(PolicyDecision.ALLOW);
|
||||
expect((await engine.check({ name: 'glob' }, undefined)).decision).toBe(
|
||||
PolicyDecision.ALLOW,
|
||||
);
|
||||
});
|
||||
@@ -346,9 +371,10 @@ describe('Policy Engine Integration Tests', () => {
|
||||
const engine = new PolicyEngine(config);
|
||||
|
||||
// Exclusion (195) should win over trust (90)
|
||||
expect(engine.check({ name: 'conflicted-server__tool' }, undefined)).toBe(
|
||||
PolicyDecision.DENY,
|
||||
);
|
||||
expect(
|
||||
(await engine.check({ name: 'conflicted-server__tool' }, undefined))
|
||||
.decision,
|
||||
).toBe(PolicyDecision.DENY);
|
||||
});
|
||||
|
||||
it('should handle edge case: specific tool allowed but server excluded', async () => {
|
||||
@@ -369,12 +395,14 @@ describe('Policy Engine Integration Tests', () => {
|
||||
|
||||
// Server exclusion (195) wins over specific tool allow (100)
|
||||
// This might be counterintuitive but follows the priority system
|
||||
expect(engine.check({ name: 'my-server__special-tool' }, undefined)).toBe(
|
||||
PolicyDecision.DENY,
|
||||
);
|
||||
expect(engine.check({ name: 'my-server__other-tool' }, undefined)).toBe(
|
||||
PolicyDecision.DENY,
|
||||
);
|
||||
expect(
|
||||
(await engine.check({ name: 'my-server__special-tool' }, undefined))
|
||||
.decision,
|
||||
).toBe(PolicyDecision.DENY);
|
||||
expect(
|
||||
(await engine.check({ name: 'my-server__other-tool' }, undefined))
|
||||
.decision,
|
||||
).toBe(PolicyDecision.DENY);
|
||||
});
|
||||
|
||||
it('should verify non-interactive mode transformation', async () => {
|
||||
@@ -389,12 +417,12 @@ describe('Policy Engine Integration Tests', () => {
|
||||
const engine = new PolicyEngine(engineConfig);
|
||||
|
||||
// ASK_USER should become DENY in non-interactive mode
|
||||
expect(engine.check({ name: 'unknown_tool' }, undefined)).toBe(
|
||||
PolicyDecision.DENY,
|
||||
);
|
||||
expect(engine.check({ name: 'run_shell_command' }, undefined)).toBe(
|
||||
PolicyDecision.DENY,
|
||||
);
|
||||
expect(
|
||||
(await engine.check({ name: 'unknown_tool' }, undefined)).decision,
|
||||
).toBe(PolicyDecision.DENY);
|
||||
expect(
|
||||
(await engine.check({ name: 'run_shell_command' }, undefined)).decision,
|
||||
).toBe(PolicyDecision.DENY);
|
||||
});
|
||||
|
||||
it('should handle empty settings gracefully', async () => {
|
||||
@@ -407,17 +435,17 @@ describe('Policy Engine Integration Tests', () => {
|
||||
const engine = new PolicyEngine(config);
|
||||
|
||||
// Should have default rules for write tools
|
||||
expect(engine.check({ name: 'write_file' }, undefined)).toBe(
|
||||
PolicyDecision.ASK_USER,
|
||||
);
|
||||
expect(engine.check({ name: 'replace' }, undefined)).toBe(
|
||||
PolicyDecision.ASK_USER,
|
||||
);
|
||||
expect(
|
||||
(await engine.check({ name: 'write_file' }, undefined)).decision,
|
||||
).toBe(PolicyDecision.ASK_USER);
|
||||
expect(
|
||||
(await engine.check({ name: 'replace' }, undefined)).decision,
|
||||
).toBe(PolicyDecision.ASK_USER);
|
||||
|
||||
// Unknown tools should use default
|
||||
expect(engine.check({ name: 'unknown' }, undefined)).toBe(
|
||||
PolicyDecision.ASK_USER,
|
||||
);
|
||||
expect(
|
||||
(await engine.check({ name: 'unknown' }, undefined)).decision,
|
||||
).toBe(PolicyDecision.ASK_USER);
|
||||
});
|
||||
|
||||
it('should verify rules are created with correct priorities', async () => {
|
||||
|
||||
Reference in New Issue
Block a user