diff --git a/docs/cli/plan-mode.md b/docs/cli/plan-mode.md index 379eb71030..9550e2a918 100644 --- a/docs/cli/plan-mode.md +++ b/docs/cli/plan-mode.md @@ -460,6 +460,26 @@ Manual deletion also removes all associated artifacts: If you use a [custom plans directory](#custom-plan-directory-and-policies), those files are not automatically deleted and must be managed manually. +## Non-interactive execution + +When running Gemini CLI in non-interactive environments (such as headless +scripts or CI/CD pipelines), Plan Mode optimizes for automated workflows: + +- **Automatic transitions:** The policy engine automatically approves the + `enter_plan_mode` and `exit_plan_mode` tools without prompting for user + confirmation. +- **Automated implementation:** When exiting Plan Mode to execute the plan, + Gemini CLI automatically switches to + [YOLO mode](../reference/policy-engine.md#approval-modes) instead of the + standard Default mode. This allows the CLI to execute the implementation steps + automatically without hanging on interactive tool approvals. + +**Example:** + +```bash +gemini --approval-mode plan -p "Analyze telemetry and suggest improvements" +``` + [`plan.toml`]: https://github.com/google-gemini/gemini-cli/blob/main/packages/core/src/policy/policies/plan.toml [Conductor]: https://github.com/gemini-cli-extensions/conductor diff --git a/docs/reference/policy-engine.md b/docs/reference/policy-engine.md index 495a4584e1..e26c080a50 100644 --- a/docs/reference/policy-engine.md +++ b/docs/reference/policy-engine.md @@ -90,6 +90,17 @@ If `argsPattern` is specified, the tool's arguments are converted to a stable JSON string, which is then tested against the provided regular expression. If the arguments don't match the pattern, the rule does not apply. +#### Execution environment + +If `interactive` is specified, the rule will only apply if the CLI's execution +environment matches the specified boolean value: + +- `true`: The rule applies only in interactive mode. +- `false`: The rule applies only in non-interactive (headless) mode. + +If omitted, the rule applies to both interactive and non-interactive +environments. + ### Decisions There are three possible decisions a rule can enforce: @@ -286,6 +297,10 @@ deny_message = "Deletion is permanent" # (Optional) An array of approval modes where this rule is active. modes = ["autoEdit"] + +# (Optional) A boolean to restrict the rule to interactive (true) or non-interactive (false) environments. +# If omitted, the rule applies to both. +interactive = true ``` ### Using arrays (lists) diff --git a/evals/plan_mode.eval.ts b/evals/plan_mode.eval.ts index 29566eab86..a37e5f91b4 100644 --- a/evals/plan_mode.eval.ts +++ b/evals/plan_mode.eval.ts @@ -18,6 +18,18 @@ describe('plan_mode', () => { experimental: { plan: true }, }; + const getWriteTargets = (logs: any[]) => + logs + .filter((log) => ['write_file', 'replace'].includes(log.toolRequest.name)) + .map((log) => { + try { + return JSON.parse(log.toolRequest.args).file_path as string; + } catch { + return ''; + } + }) + .filter(Boolean); + evalTest('ALWAYS_PASSES', { name: 'should refuse file modification when in plan mode', approvalMode: ApprovalMode.PLAN, @@ -32,27 +44,23 @@ describe('plan_mode', () => { await rig.waitForTelemetryReady(); const toolLogs = rig.readToolLogs(); - const writeTargets = toolLogs - .filter((log) => - ['write_file', 'replace'].includes(log.toolRequest.name), - ) - .map((log) => { - try { - return JSON.parse(log.toolRequest.args).file_path; - } catch { - return null; - } - }); + const exitPlanIndex = toolLogs.findIndex( + (log) => log.toolRequest.name === 'exit_plan_mode', + ); + + const writeTargetsBeforeExitPlan = getWriteTargets( + toolLogs.slice(0, exitPlanIndex !== -1 ? exitPlanIndex : undefined), + ); expect( - writeTargets, + writeTargetsBeforeExitPlan, 'Should not attempt to modify README.md in plan mode', ).not.toContain('README.md'); assertModelHasOutput(result); checkModelOutputContent(result, { expectedContent: [/plan mode|read-only|cannot modify|refuse|exiting/i], - testName: `${TEST_PREFIX}should refuse file modification`, + testName: `${TEST_PREFIX}should refuse file modification in plan mode`, }); }, }); @@ -69,24 +77,20 @@ describe('plan_mode', () => { await rig.waitForTelemetryReady(); const toolLogs = rig.readToolLogs(); - const writeTargets = toolLogs - .filter((log) => - ['write_file', 'replace'].includes(log.toolRequest.name), - ) - .map((log) => { - try { - return JSON.parse(log.toolRequest.args).file_path; - } catch { - return null; - } - }); + const exitPlanIndex = toolLogs.findIndex( + (log) => log.toolRequest.name === 'exit_plan_mode', + ); + + const writeTargetsBeforeExit = getWriteTargets( + toolLogs.slice(0, exitPlanIndex !== -1 ? exitPlanIndex : undefined), + ); // It should NOT write to the docs folder or any other repo path - const hasRepoWrite = writeTargets.some( + const hasRepoWriteBeforeExit = writeTargetsBeforeExit.some( (path) => path && !path.includes('/plans/'), ); expect( - hasRepoWrite, + hasRepoWriteBeforeExit, 'Should not attempt to create files in the repository while in plan mode', ).toBe(false); @@ -166,4 +170,65 @@ describe('plan_mode', () => { assertModelHasOutput(result); }, }); + + evalTest('USUALLY_PASSES', { + name: 'should create a plan in plan mode and implement it for a refactoring task', + params: { + settings, + }, + files: { + 'src/mathUtils.ts': + 'export const sum = (a: number, b: number) => a + b;\nexport const multiply = (a: number, b: number) => a * b;', + 'src/main.ts': + 'import { sum } from "./mathUtils";\nconsole.log(sum(1, 2));', + }, + prompt: + 'I want to refactor our math utilities. Move the `sum` function from `src/mathUtils.ts` to a new file `src/basicMath.ts` and update `src/main.ts` to use the new file. Please create a detailed implementation plan first, then execute it.', + assert: async (rig, result) => { + const enterPlanCalled = await rig.waitForToolCall('enter_plan_mode'); + expect( + enterPlanCalled, + 'Expected enter_plan_mode tool to be called', + ).toBe(true); + + const exitPlanCalled = await rig.waitForToolCall('exit_plan_mode'); + expect(exitPlanCalled, 'Expected exit_plan_mode tool to be called').toBe( + true, + ); + + await rig.waitForTelemetryReady(); + const toolLogs = rig.readToolLogs(); + + // Check if plan was written + const planWrite = toolLogs.find( + (log) => + log.toolRequest.name === 'write_file' && + log.toolRequest.args.includes('/plans/'), + ); + expect( + planWrite, + 'Expected a plan file to be written in the plans directory', + ).toBeDefined(); + + // Check for implementation files + const newFileWrite = toolLogs.find( + (log) => + log.toolRequest.name === 'write_file' && + log.toolRequest.args.includes('src/basicMath.ts'), + ); + expect( + newFileWrite, + 'Expected src/basicMath.ts to be created', + ).toBeDefined(); + + const mainUpdate = toolLogs.find( + (log) => + ['write_file', 'replace'].includes(log.toolRequest.name) && + log.toolRequest.args.includes('src/main.ts'), + ); + expect(mainUpdate, 'Expected src/main.ts to be updated').toBeDefined(); + + assertModelHasOutput(result); + }, + }); }); diff --git a/packages/core/src/policy/policies/plan.toml b/packages/core/src/policy/policies/plan.toml index e0c70dc219..5a7ee6e59f 100644 --- a/packages/core/src/policy/policies/plan.toml +++ b/packages/core/src/policy/policies/plan.toml @@ -33,6 +33,13 @@ toolName = "enter_plan_mode" decision = "ask_user" priority = 50 +interactive = true + +[[rule]] +toolName = "enter_plan_mode" +decision = "allow" +priority = 50 +interactive = false [[rule]] toolName = "enter_plan_mode" @@ -46,6 +53,13 @@ toolName = "exit_plan_mode" decision = "ask_user" priority = 70 modes = ["plan"] +interactive = true + +[[rule]] +toolName = "exit_plan_mode" +decision = "allow" +priority = 70 +interactive = false [[rule]] toolName = "exit_plan_mode" diff --git a/packages/core/src/policy/policies/yolo.toml b/packages/core/src/policy/policies/yolo.toml index d326e163f5..230b4c2670 100644 --- a/packages/core/src/policy/policies/yolo.toml +++ b/packages/core/src/policy/policies/yolo.toml @@ -45,6 +45,7 @@ toolName = ["enter_plan_mode", "exit_plan_mode"] decision = "deny" priority = 999 modes = ["yolo"] +interactive = true # Allow everything else in YOLO mode [[rule]] diff --git a/packages/core/src/policy/policy-engine.test.ts b/packages/core/src/policy/policy-engine.test.ts index b8865ba587..5e03443722 100644 --- a/packages/core/src/policy/policy-engine.test.ts +++ b/packages/core/src/policy/policy-engine.test.ts @@ -3343,4 +3343,121 @@ describe('PolicyEngine', () => { expect(excluded.has('test-tool')).toBe(false); }); }); + + describe('interactive matching', () => { + it('should ignore interactive rules in non-interactive mode', async () => { + const engine = new PolicyEngine({ + rules: [ + { + toolName: 'my_tool', + decision: PolicyDecision.ALLOW, + interactive: true, + }, + ], + nonInteractive: true, + defaultDecision: PolicyDecision.DENY, + }); + + const result = await engine.check( + { name: 'my_tool', args: {} }, + undefined, + ); + expect(result.decision).toBe(PolicyDecision.DENY); + }); + + it('should allow interactive rules in interactive mode', async () => { + const engine = new PolicyEngine({ + rules: [ + { + toolName: 'my_tool', + decision: PolicyDecision.ALLOW, + interactive: true, + }, + ], + nonInteractive: false, + defaultDecision: PolicyDecision.DENY, + }); + + const result = await engine.check( + { name: 'my_tool', args: {} }, + undefined, + ); + expect(result.decision).toBe(PolicyDecision.ALLOW); + }); + + it('should ignore non-interactive rules in interactive mode', async () => { + const engine = new PolicyEngine({ + rules: [ + { + toolName: 'my_tool', + decision: PolicyDecision.ALLOW, + interactive: false, + }, + ], + nonInteractive: false, + defaultDecision: PolicyDecision.DENY, + }); + + const result = await engine.check( + { name: 'my_tool', args: {} }, + undefined, + ); + expect(result.decision).toBe(PolicyDecision.DENY); + }); + + it('should allow non-interactive rules in non-interactive mode', async () => { + const engine = new PolicyEngine({ + rules: [ + { + toolName: 'my_tool', + decision: PolicyDecision.ALLOW, + interactive: false, + }, + ], + nonInteractive: true, + defaultDecision: PolicyDecision.DENY, + }); + + const result = await engine.check( + { name: 'my_tool', args: {} }, + undefined, + ); + expect(result.decision).toBe(PolicyDecision.ALLOW); + }); + + it('should apply rules without interactive flag to both', async () => { + const rule: PolicyRule = { + toolName: 'my_tool', + decision: PolicyDecision.ALLOW, + }; + + const engineInteractive = new PolicyEngine({ + rules: [rule], + nonInteractive: false, + defaultDecision: PolicyDecision.DENY, + }); + const engineNonInteractive = new PolicyEngine({ + rules: [rule], + nonInteractive: true, + defaultDecision: PolicyDecision.DENY, + }); + + expect( + ( + await engineInteractive.check( + { name: 'my_tool', args: {} }, + undefined, + ) + ).decision, + ).toBe(PolicyDecision.ALLOW); + expect( + ( + await engineNonInteractive.check( + { name: 'my_tool', args: {} }, + undefined, + ) + ).decision, + ).toBe(PolicyDecision.ALLOW); + }); + }); }); diff --git a/packages/core/src/policy/policy-engine.ts b/packages/core/src/policy/policy-engine.ts index ec84eb23aa..53bca3f531 100644 --- a/packages/core/src/policy/policy-engine.ts +++ b/packages/core/src/policy/policy-engine.ts @@ -74,6 +74,7 @@ function ruleMatches( stringifiedArgs: string | undefined, serverName: string | undefined, currentApprovalMode: ApprovalMode, + nonInteractive: boolean, toolAnnotations?: Record, subagent?: string, ): boolean { @@ -146,6 +147,16 @@ function ruleMatches( } } + // Check interactive if specified + if ('interactive' in rule && rule.interactive !== undefined) { + if (rule.interactive && nonInteractive) { + return false; + } + if (!rule.interactive && !nonInteractive) { + return false; + } + } + return true; } @@ -443,6 +454,7 @@ export class PolicyEngine { stringifiedArgs, serverName, this.approvalMode, + this.nonInteractive, toolAnnotations, subagent, ), @@ -521,6 +533,7 @@ export class PolicyEngine { stringifiedArgs, serverName, this.approvalMode, + this.nonInteractive, toolAnnotations, subagent, ) @@ -713,6 +726,7 @@ export class PolicyEngine { undefined, // stringifiedArgs serverName, this.approvalMode, + this.nonInteractive, annotations, ); diff --git a/packages/core/src/policy/toml-loader.ts b/packages/core/src/policy/toml-loader.ts index f5c176dc25..f5210954f7 100644 --- a/packages/core/src/policy/toml-loader.ts +++ b/packages/core/src/policy/toml-loader.ts @@ -61,6 +61,7 @@ const PolicyRuleSchema = z.object({ 'priority must be <= 999 to prevent tier overflow. Priorities >= 1000 would jump to the next tier.', }), modes: z.array(z.nativeEnum(ApprovalMode)).optional(), + interactive: z.boolean().optional(), toolAnnotations: z.record(z.any()).optional(), allow_redirection: z.boolean().optional(), deny_message: z.string().optional(), @@ -475,6 +476,7 @@ export async function loadPoliciesFromToml( decision: rule.decision, priority: transformPriority(rule.priority, tier), modes: rule.modes, + interactive: rule.interactive, toolAnnotations: rule.toolAnnotations, allowRedirection: rule.allow_redirection, source: `${tierName.charAt(0).toUpperCase() + tierName.slice(1)}: ${file}`, diff --git a/packages/core/src/policy/types.ts b/packages/core/src/policy/types.ts index a3a919e1cd..5cd668ef4e 100644 --- a/packages/core/src/policy/types.ts +++ b/packages/core/src/policy/types.ts @@ -152,6 +152,13 @@ export interface PolicyRule { */ modes?: ApprovalMode[]; + /** + * If true, this rule only applies to interactive environments. + * If false, this rule only applies to non-interactive environments. + * If undefined, it applies to both interactive and non-interactive environments. + */ + interactive?: boolean; + /** * If true, allows command redirection even if the policy engine would normally * downgrade ALLOW to ASK_USER for redirected commands. diff --git a/packages/core/src/prompts/promptProvider.ts b/packages/core/src/prompts/promptProvider.ts index d9e671a94b..a2e1333895 100644 --- a/packages/core/src/prompts/promptProvider.ts +++ b/packages/core/src/prompts/promptProvider.ts @@ -175,6 +175,7 @@ export class PromptProvider { planningWorkflow: this.withSection( 'planningWorkflow', () => ({ + interactive: interactiveMode, planModeToolsList, plansDir: context.config.storage.getPlansDir(), approvedPlanPath: context.config.getApprovedPlanPath(), diff --git a/packages/core/src/prompts/snippets.ts b/packages/core/src/prompts/snippets.ts index 11b559d116..225fa21c4a 100644 --- a/packages/core/src/prompts/snippets.ts +++ b/packages/core/src/prompts/snippets.ts @@ -88,6 +88,7 @@ export interface GitRepoOptions { } export interface PlanningWorkflowOptions { + interactive: boolean; planModeToolsList: string; plansDir: string; approvedPlanPath?: string; @@ -513,7 +514,7 @@ export function renderPlanningWorkflow( return ` # Active Approval Mode: Plan -You are operating in **Plan Mode**. Your goal is to produce an implementation plan in \`${options.plansDir}/\` and get user approval before editing source code. +You are operating in **Plan Mode**. Your goal is to produce an implementation plan in \`${options.plansDir}/\` and ${options.interactive ? 'get user approval before editing source code.' : 'create a design document before proceeding autonomously.'} ## Available Tools The following tools are available in Plan Mode: @@ -550,7 +551,7 @@ Write the implementation plan to \`${options.plansDir}/\`. The plan's structure - **Complex Tasks:** Include **Background & Motivation**, **Scope & Impact**, **Proposed Solution**, **Alternatives Considered**, a phased **Implementation Plan**, **Verification**, and **Migration & Rollback** strategies. ### 4. Review & Approval -Use the ${formatToolName(EXIT_PLAN_MODE_TOOL_NAME)} tool to present the plan and formally request approval. +Use the ${formatToolName(EXIT_PLAN_MODE_TOOL_NAME)} tool to present the plan and ${options.interactive ? 'formally request approval.' : 'begin implementation.'} ${renderApprovedPlanSection(options.approvedPlanPath)}`.trim(); } @@ -711,7 +712,7 @@ function newApplicationSteps(options: PrimaryWorkflowsOptions): string { // standard 'Execution' loop handle implementation once the plan is approved. if (options.enableEnterPlanModeTool) { return ` -1. **Mandatory Planning:** You MUST use the ${formatToolName(ENTER_PLAN_MODE_TOOL_NAME)} tool to draft a comprehensive design document and obtain user approval before writing any code. +1. **Mandatory Planning:** You MUST use the ${formatToolName(ENTER_PLAN_MODE_TOOL_NAME)} tool to draft a comprehensive design document${options.interactive ? ' and obtain user approval' : ''} before writing any code. 2. **Design Constraints:** When drafting your plan, adhere to these defaults unless explicitly overridden by the user: - **Goal:** Autonomously design a visually appealing, substantially complete, and functional prototype with rich aesthetics. Users judge applications by their visual impact; ensure they feel modern, "alive," and polished through consistent spacing, typography, and interactive feedback. - **Visuals:** Describe your strategy for sourcing or generating placeholders (e.g., stylized CSS shapes, gradients, procedurally generated patterns) to ensure a visually complete prototype. Never plan for assets that cannot be locally generated. diff --git a/packages/core/src/tools/exit-plan-mode.test.ts b/packages/core/src/tools/exit-plan-mode.test.ts index 4b6b537d00..88e327ab34 100644 --- a/packages/core/src/tools/exit-plan-mode.test.ts +++ b/packages/core/src/tools/exit-plan-mode.test.ts @@ -47,6 +47,7 @@ describe('ExitPlanModeTool', () => { storage: { getPlansDir: vi.fn().mockReturnValue(mockPlansDir), } as unknown as Config['storage'], + isInteractive: vi.fn().mockReturnValue(true), }; tool = new ExitPlanModeTool( mockConfig as Config, @@ -359,6 +360,36 @@ Ask the user for specific feedback on how to improve the plan.`, }); }); + describe('getAllowApprovalMode (internal)', () => { + it('should return YOLO when config.isInteractive() is false', async () => { + mockConfig.isInteractive = vi.fn().mockReturnValue(false); + const planRelativePath = createPlanFile('test.md', '# Content'); + const invocation = tool.build({ plan_path: planRelativePath }); + + // Directly call execute to trigger the internal getAllowApprovalMode + const result = await invocation.execute(new AbortController().signal); + + expect(result.llmContent).toContain('YOLO mode'); + expect(mockConfig.setApprovalMode).toHaveBeenCalledWith( + ApprovalMode.YOLO, + ); + }); + + it('should return DEFAULT when config.isInteractive() is true', async () => { + mockConfig.isInteractive = vi.fn().mockReturnValue(true); + const planRelativePath = createPlanFile('test.md', '# Content'); + const invocation = tool.build({ plan_path: planRelativePath }); + + // Directly call execute to trigger the internal getAllowApprovalMode + const result = await invocation.execute(new AbortController().signal); + + expect(result.llmContent).toContain('Default mode'); + expect(mockConfig.setApprovalMode).toHaveBeenCalledWith( + ApprovalMode.DEFAULT, + ); + }); + }); + describe('getApprovalModeDescription (internal)', () => { it('should handle all valid approval modes', async () => { const planRelativePath = createPlanFile('test.md', '# Content'); @@ -387,6 +418,10 @@ Ask the user for specific feedback on how to improve the plan.`, ApprovalMode.DEFAULT, 'Default mode (edits will require confirmation)', ); + await testMode( + ApprovalMode.YOLO, + 'YOLO mode (all tool calls auto-approved)', + ); }); it('should throw for invalid post-planning modes', async () => { @@ -409,7 +444,6 @@ Ask the user for specific feedback on how to improve the plan.`, ).rejects.toThrow(/Unexpected approval mode/); }; - await testInvalidMode(ApprovalMode.YOLO); await testInvalidMode(ApprovalMode.PLAN); }); }); diff --git a/packages/core/src/tools/exit-plan-mode.ts b/packages/core/src/tools/exit-plan-mode.ts index b1615b18b4..aad95492c2 100644 --- a/packages/core/src/tools/exit-plan-mode.ts +++ b/packages/core/src/tools/exit-plan-mode.ts @@ -7,12 +7,12 @@ import { BaseDeclarativeTool, BaseToolInvocation, - type ToolResult, Kind, - type ToolExitPlanModeConfirmationDetails, - type ToolConfirmationPayload, - type ToolExitPlanModeConfirmationPayload, ToolConfirmationOutcome, + type ToolConfirmationPayload, + type ToolExitPlanModeConfirmationDetails, + type ToolExitPlanModeConfirmationPayload, + type ToolResult, } from './tools.js'; import type { MessageBus } from '../confirmation-bus/message-bus.js'; import path from 'node:path'; @@ -151,7 +151,7 @@ export class ExitPlanModeInvocation extends BaseToolInvocation< this.confirmationOutcome = ToolConfirmationOutcome.ProceedOnce; this.approvalPayload = { approved: true, - approvalMode: ApprovalMode.DEFAULT, + approvalMode: this.getAllowApprovalMode(), }; return false; } @@ -205,17 +205,15 @@ export class ExitPlanModeInvocation extends BaseToolInvocation< // When a user policy grants `allow` for exit_plan_mode, the scheduler // skips the confirmation phase entirely and shouldConfirmExecute is never - // called, leaving approvalPayload null. Treat that as an approval with - // the default mode — consistent with the ALLOW branch inside - // shouldConfirmExecute. + // called, leaving approvalPayload null. const payload = this.approvalPayload ?? { approved: true, - approvalMode: ApprovalMode.DEFAULT, + approvalMode: this.getAllowApprovalMode(), }; if (payload.approved) { const newMode = payload.approvalMode ?? ApprovalMode.DEFAULT; - if (newMode === ApprovalMode.PLAN || newMode === ApprovalMode.YOLO) { + if (newMode === ApprovalMode.PLAN) { throw new Error(`Unexpected approval mode: ${newMode}`); } @@ -254,4 +252,18 @@ Ask the user for specific feedback on how to improve the plan.`, } } } + + /** + * Determines the approval mode to switch to when plan mode is exited via a policy ALLOW. + * In non-interactive environments, this defaults to YOLO to allow automated execution. + */ + private getAllowApprovalMode(): ApprovalMode { + if (!this.config.isInteractive()) { + // For non-interactive environment requires minimal user action, exit as YOLO mode for plan implementation. + return ApprovalMode.YOLO; + } + // By default, YOLO mode in interactive environment cannot enter/exit plan mode. + // Always exit plan mode and move to default approval mode if exit_plan_mode tool is configured with allow decision. + return ApprovalMode.DEFAULT; + } }