mirror of
https://github.com/google-gemini/gemini-cli.git
synced 2026-04-21 10:34:35 -07:00
fix(core): prevent subagent bypass in plan mode (#18484)
This commit is contained in:
@@ -434,8 +434,8 @@ describe('Policy Engine Integration Tests', () => {
|
|||||||
expect(mcpServerRule?.priority).toBe(2.1); // MCP allowed server
|
expect(mcpServerRule?.priority).toBe(2.1); // MCP allowed server
|
||||||
|
|
||||||
const readOnlyToolRule = rules.find((r) => r.toolName === 'glob');
|
const readOnlyToolRule = rules.find((r) => r.toolName === 'glob');
|
||||||
// Priority 50 in default tier → 1.05
|
// Priority 70 in default tier → 1.07 (Overriding Plan Mode Deny)
|
||||||
expect(readOnlyToolRule?.priority).toBeCloseTo(1.05, 5);
|
expect(readOnlyToolRule?.priority).toBeCloseTo(1.07, 5);
|
||||||
|
|
||||||
// Verify the engine applies these priorities correctly
|
// Verify the engine applies these priorities correctly
|
||||||
expect(
|
expect(
|
||||||
@@ -590,8 +590,8 @@ describe('Policy Engine Integration Tests', () => {
|
|||||||
expect(server1Rule?.priority).toBe(2.1); // Allowed servers (user tier)
|
expect(server1Rule?.priority).toBe(2.1); // Allowed servers (user tier)
|
||||||
|
|
||||||
const globRule = rules.find((r) => r.toolName === 'glob');
|
const globRule = rules.find((r) => r.toolName === 'glob');
|
||||||
// Priority 50 in default tier → 1.05
|
// Priority 70 in default tier → 1.07
|
||||||
expect(globRule?.priority).toBeCloseTo(1.05, 5); // Auto-accept read-only
|
expect(globRule?.priority).toBeCloseTo(1.07, 5); // Auto-accept read-only
|
||||||
|
|
||||||
// The PolicyEngine will sort these by priority when it's created
|
// The PolicyEngine will sort these by priority when it's created
|
||||||
const engine = new PolicyEngine(config);
|
const engine = new PolicyEngine(config);
|
||||||
|
|||||||
@@ -21,7 +21,7 @@ import {
|
|||||||
type ModelConfig,
|
type ModelConfig,
|
||||||
ModelConfigService,
|
ModelConfigService,
|
||||||
} from '../services/modelConfigService.js';
|
} from '../services/modelConfigService.js';
|
||||||
import { PolicyDecision } from '../policy/types.js';
|
import { PolicyDecision, PRIORITY_SUBAGENT_TOOL } from '../policy/types.js';
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns the model config alias for a given agent definition.
|
* Returns the model config alias for a given agent definition.
|
||||||
@@ -297,7 +297,7 @@ export class AgentRegistry {
|
|||||||
definition.kind === 'local'
|
definition.kind === 'local'
|
||||||
? PolicyDecision.ALLOW
|
? PolicyDecision.ALLOW
|
||||||
: PolicyDecision.ASK_USER,
|
: PolicyDecision.ASK_USER,
|
||||||
priority: 1.05,
|
priority: PRIORITY_SUBAGENT_TOOL,
|
||||||
source: 'AgentRegistry (Dynamic)',
|
source: 'AgentRegistry (Dynamic)',
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -194,6 +194,8 @@ export async function createPolicyEngineConfig(
|
|||||||
// 10: Write tools default to ASK_USER (becomes 1.010 in default tier)
|
// 10: Write tools default to ASK_USER (becomes 1.010 in default tier)
|
||||||
// 15: Auto-edit tool override (becomes 1.015 in default tier)
|
// 15: Auto-edit tool override (becomes 1.015 in default tier)
|
||||||
// 50: Read-only tools (becomes 1.050 in default tier)
|
// 50: Read-only tools (becomes 1.050 in default tier)
|
||||||
|
// 60: Plan mode catch-all DENY override (becomes 1.060 in default tier)
|
||||||
|
// 70: Plan mode explicit ALLOW override (becomes 1.070 in default tier)
|
||||||
// 999: YOLO mode allow-all (becomes 1.999 in default tier)
|
// 999: YOLO mode allow-all (becomes 1.999 in default tier)
|
||||||
|
|
||||||
// MCP servers that are explicitly excluded in settings.mcp.excluded
|
// MCP servers that are explicitly excluded in settings.mcp.excluded
|
||||||
|
|||||||
@@ -21,66 +21,36 @@
|
|||||||
#
|
#
|
||||||
# TOML policy priorities (before transformation):
|
# TOML policy priorities (before transformation):
|
||||||
# 10: Write tools default to ASK_USER (becomes 1.010 in default tier)
|
# 10: Write tools default to ASK_USER (becomes 1.010 in default tier)
|
||||||
# 20: Plan mode catch-all DENY override (becomes 1.020 in default tier)
|
# 60: Plan mode catch-all DENY override (becomes 1.060 in default tier)
|
||||||
# 50: Read-only tools (becomes 1.050 in default tier)
|
# 70: Plan mode explicit ALLOW override (becomes 1.070 in default tier)
|
||||||
# 999: YOLO mode allow-all (becomes 1.999 in default tier)
|
# 999: YOLO mode allow-all (becomes 1.999 in default tier)
|
||||||
|
|
||||||
# Catch-All: Deny everything by default in Plan mode.
|
# Catch-All: Deny everything by default in Plan mode.
|
||||||
|
|
||||||
[[rule]]
|
[[rule]]
|
||||||
decision = "deny"
|
decision = "deny"
|
||||||
priority = 20
|
priority = 60
|
||||||
modes = ["plan"]
|
modes = ["plan"]
|
||||||
deny_message = "You are in Plan Mode - adjust your prompt to only use read and search tools."
|
deny_message = "You are in Plan Mode - adjust your prompt to only use read and search tools."
|
||||||
|
|
||||||
# Explicitly Allow Read-Only Tools in Plan mode.
|
# Explicitly Allow Read-Only Tools in Plan mode.
|
||||||
|
|
||||||
[[rule]]
|
[[rule]]
|
||||||
toolName = "glob"
|
toolName = ["glob", "grep_search", "list_directory", "read_file", "google_web_search"]
|
||||||
decision = "allow"
|
decision = "allow"
|
||||||
priority = 50
|
priority = 70
|
||||||
modes = ["plan"]
|
modes = ["plan"]
|
||||||
|
|
||||||
[[rule]]
|
[[rule]]
|
||||||
toolName = "grep_search"
|
toolName = ["ask_user", "exit_plan_mode"]
|
||||||
decision = "allow"
|
|
||||||
priority = 50
|
|
||||||
modes = ["plan"]
|
|
||||||
|
|
||||||
[[rule]]
|
|
||||||
toolName = "list_directory"
|
|
||||||
decision = "allow"
|
|
||||||
priority = 50
|
|
||||||
modes = ["plan"]
|
|
||||||
|
|
||||||
[[rule]]
|
|
||||||
toolName = "read_file"
|
|
||||||
decision = "allow"
|
|
||||||
priority = 50
|
|
||||||
modes = ["plan"]
|
|
||||||
|
|
||||||
[[rule]]
|
|
||||||
toolName = "google_web_search"
|
|
||||||
decision = "allow"
|
|
||||||
priority = 50
|
|
||||||
modes = ["plan"]
|
|
||||||
|
|
||||||
[[rule]]
|
|
||||||
toolName = "ask_user"
|
|
||||||
decision = "ask_user"
|
decision = "ask_user"
|
||||||
priority = 50
|
priority = 70
|
||||||
modes = ["plan"]
|
|
||||||
|
|
||||||
[[rule]]
|
|
||||||
toolName = "exit_plan_mode"
|
|
||||||
decision = "ask_user"
|
|
||||||
priority = 50
|
|
||||||
modes = ["plan"]
|
modes = ["plan"]
|
||||||
|
|
||||||
# Allow write_file and replace for .md files in plans directory
|
# Allow write_file and replace for .md files in plans directory
|
||||||
[[rule]]
|
[[rule]]
|
||||||
toolName = ["write_file", "replace"]
|
toolName = ["write_file", "replace"]
|
||||||
decision = "allow"
|
decision = "allow"
|
||||||
priority = 50
|
priority = 70
|
||||||
modes = ["plan"]
|
modes = ["plan"]
|
||||||
argsPattern = "\"file_path\":\"[^\"]+/\\.gemini/tmp/[a-zA-Z0-9_-]+/plans/[a-zA-Z0-9_-]+\\.md\""
|
argsPattern = "\"file_path\":\"[^\"]+/\\.gemini/tmp/[a-zA-Z0-9_-]+/plans/[a-zA-Z0-9_-]+\\.md\""
|
||||||
|
|||||||
@@ -13,6 +13,7 @@ import {
|
|||||||
type SafetyCheckerRule,
|
type SafetyCheckerRule,
|
||||||
InProcessCheckerType,
|
InProcessCheckerType,
|
||||||
ApprovalMode,
|
ApprovalMode,
|
||||||
|
PRIORITY_SUBAGENT_TOOL,
|
||||||
} from './types.js';
|
} from './types.js';
|
||||||
import type { FunctionCall } from '@google/genai';
|
import type { FunctionCall } from '@google/genai';
|
||||||
import { SafetyCheckDecision } from '../safety/protocol.js';
|
import { SafetyCheckDecision } from '../safety/protocol.js';
|
||||||
@@ -1481,6 +1482,37 @@ describe('PolicyEngine', () => {
|
|||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
describe('Plan Mode vs Subagent Priority (Regression)', () => {
|
||||||
|
it('should DENY subagents in Plan Mode despite dynamic allow rules', async () => {
|
||||||
|
// Plan Mode Deny (1.06) > Subagent Allow (1.05)
|
||||||
|
|
||||||
|
const fixedRules: PolicyRule[] = [
|
||||||
|
{
|
||||||
|
decision: PolicyDecision.DENY,
|
||||||
|
priority: 1.06,
|
||||||
|
modes: [ApprovalMode.PLAN],
|
||||||
|
},
|
||||||
|
{
|
||||||
|
toolName: 'codebase_investigator',
|
||||||
|
decision: PolicyDecision.ALLOW,
|
||||||
|
priority: PRIORITY_SUBAGENT_TOOL,
|
||||||
|
},
|
||||||
|
];
|
||||||
|
|
||||||
|
const fixedEngine = new PolicyEngine({
|
||||||
|
rules: fixedRules,
|
||||||
|
approvalMode: ApprovalMode.PLAN,
|
||||||
|
});
|
||||||
|
|
||||||
|
const fixedResult = await fixedEngine.check(
|
||||||
|
{ name: 'codebase_investigator' },
|
||||||
|
undefined,
|
||||||
|
);
|
||||||
|
|
||||||
|
expect(fixedResult.decision).toBe(PolicyDecision.DENY);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
describe('shell command parsing failure', () => {
|
describe('shell command parsing failure', () => {
|
||||||
it('should return ALLOW in YOLO mode even if shell command parsing fails', async () => {
|
it('should return ALLOW in YOLO mode even if shell command parsing fails', async () => {
|
||||||
const { splitCommands } = await import('../utils/shell-utils.js');
|
const { splitCommands } = await import('../utils/shell-utils.js');
|
||||||
|
|||||||
@@ -5,12 +5,21 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
import { describe, it, expect, beforeEach, afterEach } from 'vitest';
|
import { describe, it, expect, beforeEach, afterEach } from 'vitest';
|
||||||
import { PolicyDecision } from './types.js';
|
import {
|
||||||
|
PolicyDecision,
|
||||||
|
ApprovalMode,
|
||||||
|
PRIORITY_SUBAGENT_TOOL,
|
||||||
|
} from './types.js';
|
||||||
import * as fs from 'node:fs/promises';
|
import * as fs from 'node:fs/promises';
|
||||||
import * as path from 'node:path';
|
import * as path from 'node:path';
|
||||||
import * as os from 'node:os';
|
import * as os from 'node:os';
|
||||||
|
import { fileURLToPath } from 'node:url';
|
||||||
import { loadPoliciesFromToml } from './toml-loader.js';
|
import { loadPoliciesFromToml } from './toml-loader.js';
|
||||||
import type { PolicyLoadResult } from './toml-loader.js';
|
import type { PolicyLoadResult } from './toml-loader.js';
|
||||||
|
import { PolicyEngine } from './policy-engine.js';
|
||||||
|
|
||||||
|
const __filename = fileURLToPath(import.meta.url);
|
||||||
|
const __dirname = path.dirname(__filename);
|
||||||
|
|
||||||
describe('policy-toml-loader', () => {
|
describe('policy-toml-loader', () => {
|
||||||
let tempDir: string;
|
let tempDir: string;
|
||||||
@@ -500,4 +509,60 @@ priority = 100
|
|||||||
expect(error.message).toContain('Failed to read policy directory');
|
expect(error.message).toContain('Failed to read policy directory');
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
describe('Built-in Plan Mode Policy', () => {
|
||||||
|
it('should override default subagent rules when in Plan Mode', async () => {
|
||||||
|
const planTomlPath = path.resolve(__dirname, 'policies', 'plan.toml');
|
||||||
|
const fileContent = await fs.readFile(planTomlPath, 'utf-8');
|
||||||
|
const tempPolicyDir = await fs.mkdtemp(
|
||||||
|
path.join(os.tmpdir(), 'plan-policy-test-'),
|
||||||
|
);
|
||||||
|
try {
|
||||||
|
await fs.writeFile(path.join(tempPolicyDir, 'plan.toml'), fileContent);
|
||||||
|
const getPolicyTier = () => 1; // Default tier
|
||||||
|
|
||||||
|
// 1. Load the actual Plan Mode policies
|
||||||
|
const result = await loadPoliciesFromToml(
|
||||||
|
[tempPolicyDir],
|
||||||
|
getPolicyTier,
|
||||||
|
);
|
||||||
|
|
||||||
|
// 2. Initialize Policy Engine with these rules
|
||||||
|
const engine = new PolicyEngine({
|
||||||
|
rules: result.rules,
|
||||||
|
approvalMode: ApprovalMode.PLAN,
|
||||||
|
});
|
||||||
|
|
||||||
|
// 3. Simulate a Subagent being registered (Dynamic Rule)
|
||||||
|
engine.addRule({
|
||||||
|
toolName: 'codebase_investigator',
|
||||||
|
decision: PolicyDecision.ALLOW,
|
||||||
|
priority: PRIORITY_SUBAGENT_TOOL,
|
||||||
|
source: 'AgentRegistry (Dynamic)',
|
||||||
|
});
|
||||||
|
|
||||||
|
// 4. Verify Behavior:
|
||||||
|
// The Plan Mode "Catch-All Deny" (from plan.toml) should override the Subagent Allow
|
||||||
|
const checkResult = await engine.check(
|
||||||
|
{ name: 'codebase_investigator' },
|
||||||
|
undefined,
|
||||||
|
);
|
||||||
|
|
||||||
|
expect(
|
||||||
|
checkResult.decision,
|
||||||
|
'Subagent should be DENIED in Plan Mode',
|
||||||
|
).toBe(PolicyDecision.DENY);
|
||||||
|
|
||||||
|
// 5. Verify Explicit Allows still work
|
||||||
|
// e.g. 'read_file' should be allowed because its priority in plan.toml (70) is higher than the deny (60)
|
||||||
|
const readResult = await engine.check({ name: 'read_file' }, undefined);
|
||||||
|
expect(
|
||||||
|
readResult.decision,
|
||||||
|
'Explicitly allowed tools (read_file) should be ALLOWED in Plan Mode',
|
||||||
|
).toBe(PolicyDecision.ALLOW);
|
||||||
|
} finally {
|
||||||
|
await fs.rm(tempPolicyDir, { recursive: true, force: true });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
});
|
||||||
});
|
});
|
||||||
|
|||||||
@@ -276,3 +276,9 @@ export interface CheckResult {
|
|||||||
decision: PolicyDecision;
|
decision: PolicyDecision;
|
||||||
rule?: PolicyRule;
|
rule?: PolicyRule;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Priority for subagent tools (registered dynamically).
|
||||||
|
* Effective priority matching Tier 1 (Default) read-only tools.
|
||||||
|
*/
|
||||||
|
export const PRIORITY_SUBAGENT_TOOL = 1.05;
|
||||||
|
|||||||
Reference in New Issue
Block a user