diff --git a/docs/cli/plan-mode.md b/docs/cli/plan-mode.md index ad87bc591b..56895e42b6 100644 --- a/docs/cli/plan-mode.md +++ b/docs/cli/plan-mode.md @@ -56,19 +56,21 @@ Gemini CLI takes action. 1. **Provide a goal:** Start by describing what you want to achieve. Gemini CLI will then enter Plan Mode (if it's not already) to research the task. -2. **Review research and provide input:** As Gemini CLI analyzes your codebase, - it may ask you questions or present different implementation options using - [`ask_user`](../tools/ask-user.md). Provide your preferences to help guide - the design. -3. **Review the plan:** Once Gemini CLI has a proposed strategy, it creates a - detailed implementation plan as a Markdown file in your plans directory. +2. **Discuss and agree on strategy:** As Gemini CLI analyzes your codebase, it + will discuss its findings and proposed strategy with you to ensure + alignment. It may ask you questions or present different implementation + options using [`ask_user`](../tools/ask-user.md). **Gemini CLI will stop and + wait for your confirmation** before drafting the formal plan. You should + reach an informal agreement on the approach before proceeding. +3. **Review the plan:** Once you've agreed on the strategy, Gemini CLI creates + a detailed implementation plan as a Markdown file in your plans directory. - **View:** You can open and read this file to understand the proposed changes. - **Edit:** Press `Ctrl+X` to open the plan directly in your configured external editor. 4. **Approve or iterate:** Gemini CLI will present the finalized plan for your - approval. + formal approval. - **Approve:** If you're satisfied with the plan, approve it to start the implementation immediately: **Yes, automatically accept edits** or **Yes, manually accept edits**. diff --git a/docs/tools/planning.md b/docs/tools/planning.md index e554e47a34..13e9cd4fd8 100644 --- a/docs/tools/planning.md +++ b/docs/tools/planning.md @@ -32,7 +32,9 @@ and planning. ## 2. `exit_plan_mode` (ExitPlanMode) `exit_plan_mode` signals that the planning phase is complete. It presents the -finalized plan to the user and requests approval to start the implementation. +finalized plan to the user and requests formal approval to start the +implementation. The agent MUST reach an informal agreement with the user in the +chat regarding the proposed strategy BEFORE calling this tool. - **Tool name:** `exit_plan_mode` - **Display name:** Exit Plan Mode @@ -44,7 +46,7 @@ finalized plan to the user and requests approval to start the implementation. - **Behavior:** - Validates that the `plan_path` is within the allowed directory and that the file exists and has content. - - Presents the plan to the user for review. + - Presents the plan to the user for formal review. - If the user approves the plan: - Switches the CLI's approval mode to the user's chosen approval mode ( `DEFAULT` or `AUTO_EDIT`). @@ -56,5 +58,5 @@ finalized plan to the user and requests approval to start the implementation. - On approval: A message indicating the plan was approved and the new approval mode. - On rejection: A message containing the user's feedback. -- **Confirmation:** Yes. Shows the finalized plan and asks for user approval to - proceed with implementation. +- **Confirmation:** Yes. Shows the finalized plan and asks for user formal + approval to proceed with implementation. diff --git a/evals/plan_mode.eval.ts b/evals/plan_mode.eval.ts index 05bce0e6a5..6eea0c62ba 100644 --- a/evals/plan_mode.eval.ts +++ b/evals/plan_mode.eval.ts @@ -174,7 +174,8 @@ describe('plan_mode', () => { params: { settings, }, - prompt: 'Create a plan for a new login feature.', + prompt: + 'I agree with the strategy to use a JWT-based login. Create a plan for a new login feature.', assert: async (rig, result) => { await rig.waitForTelemetryReady(); const toolLogs = rig.readToolLogs(); @@ -211,7 +212,7 @@ describe('plan_mode', () => { 'import { sum } from "./mathUtils";\nconsole.log(sum(1, 2));', }, prompt: - 'I want to refactor our math utilities. Move the `sum` function from `src/mathUtils.ts` to a new file `src/basicMath.ts` and update `src/main.ts` to use the new file. Please create a detailed implementation plan first, then execute it.', + 'I want to refactor our math utilities. I agree with the strategy to move the `sum` function from `src/mathUtils.ts` to a new file `src/basicMath.ts` and update `src/main.ts`. Please create a detailed implementation plan first, then execute it.', assert: async (rig, result) => { const enterPlanCalled = await rig.waitForToolCall('enter_plan_mode'); expect( @@ -326,4 +327,37 @@ describe('plan_mode', () => { assertModelHasOutput(result); }, }); + + evalTest('USUALLY_PASSES', { + name: 'should not exit plan mode or draft before informal agreement', + approvalMode: ApprovalMode.PLAN, + params: { + settings, + }, + prompt: 'I need to build a new login feature. Please plan it.', + assert: async (rig, result) => { + await rig.waitForTelemetryReady(); + const toolLogs = rig.readToolLogs(); + + const exitPlanCall = toolLogs.find( + (log) => log.toolRequest.name === 'exit_plan_mode', + ); + expect( + exitPlanCall, + 'Should NOT call exit_plan_mode before informal agreement', + ).toBeUndefined(); + + const planWrite = toolLogs.find( + (log) => + log.toolRequest.name === 'write_file' && + log.toolRequest.args.includes('/plans/'), + ); + expect( + planWrite, + 'Should NOT draft the plan file before informal agreement', + ).toBeUndefined(); + + assertModelHasOutput(result); + }, + }); }); diff --git a/packages/core/src/core/__snapshots__/prompts.test.ts.snap b/packages/core/src/core/__snapshots__/prompts.test.ts.snap index b4e8dd4e7e..40a3ee6a52 100644 --- a/packages/core/src/core/__snapshots__/prompts.test.ts.snap +++ b/packages/core/src/core/__snapshots__/prompts.test.ts.snap @@ -126,11 +126,13 @@ Plan Mode uses an adaptive planning workflow where the research depth, plan stru Analyze requirements and use search/read tools to explore the codebase. Systematically map affected modules, trace data flow, and identify dependencies. ### 2. Consult -The depth of your consultation should be proportional to the task's complexity: -- **Simple Tasks:** Skip consultation and proceed directly to drafting. +The depth of your consultation should be proportional to the task's complexity. Before proceeding to Step 3 (Draft), you MUST discuss your findings and proposed strategy with the user to reach an informal agreement. +- **Simple Tasks:** Briefly describe your proposed strategy in the chat to ensure alignment, then **STOP and wait** for the user to confirm agreement before drafting the plan. - **Standard Tasks:** If multiple viable approaches exist, present a concise summary (including pros/cons and your recommendation) via \`ask_user\` and wait for a decision. - **Complex Tasks:** You MUST present at least two viable approaches with detailed trade-offs via \`ask_user\` and obtain approval before drafting the plan. +**CRITICAL:** You MUST NOT proceed to Step 3 (Draft) or Step 4 (Review & Approval) in the same turn as your initial strategy proposal. You MUST wait for user feedback and reach a clear agreement before drafting or submitting the plan. + ### 3. Draft Write the implementation plan to \`/tmp/plans/\`. The plan's structure adapts to the task: - **Simple Tasks:** Include a bulleted list of specific **Changes** and **Verification** steps. @@ -138,7 +140,7 @@ Write the implementation plan to \`/tmp/plans/\`. The plan's structure adapts to - **Complex Tasks:** Include **Background & Motivation**, **Scope & Impact**, **Proposed Solution**, **Alternatives Considered**, a phased **Implementation Plan**, **Verification**, and **Migration & Rollback** strategies. ### 4. Review & Approval -Use the \`exit_plan_mode\` tool to present the plan and formally request approval. +ONLY use the \`exit_plan_mode\` tool to present the plan for formal approval AFTER you have reached an informal agreement with the user in the chat regarding the proposed strategy. When called, this tool will present the plan and formally request approval. # Operational Guidelines @@ -301,11 +303,13 @@ Plan Mode uses an adaptive planning workflow where the research depth, plan stru Analyze requirements and use search/read tools to explore the codebase. Systematically map affected modules, trace data flow, and identify dependencies. ### 2. Consult -The depth of your consultation should be proportional to the task's complexity: -- **Simple Tasks:** Skip consultation and proceed directly to drafting. +The depth of your consultation should be proportional to the task's complexity. Before proceeding to Step 3 (Draft), you MUST discuss your findings and proposed strategy with the user to reach an informal agreement. +- **Simple Tasks:** Briefly describe your proposed strategy in the chat to ensure alignment, then **STOP and wait** for the user to confirm agreement before drafting the plan. - **Standard Tasks:** If multiple viable approaches exist, present a concise summary (including pros/cons and your recommendation) via \`ask_user\` and wait for a decision. - **Complex Tasks:** You MUST present at least two viable approaches with detailed trade-offs via \`ask_user\` and obtain approval before drafting the plan. +**CRITICAL:** You MUST NOT proceed to Step 3 (Draft) or Step 4 (Review & Approval) in the same turn as your initial strategy proposal. You MUST wait for user feedback and reach a clear agreement before drafting or submitting the plan. + ### 3. Draft Write the implementation plan to \`/tmp/plans/\`. The plan's structure adapts to the task: - **Simple Tasks:** Include a bulleted list of specific **Changes** and **Verification** steps. @@ -313,7 +317,7 @@ Write the implementation plan to \`/tmp/plans/\`. The plan's structure adapts to - **Complex Tasks:** Include **Background & Motivation**, **Scope & Impact**, **Proposed Solution**, **Alternatives Considered**, a phased **Implementation Plan**, **Verification**, and **Migration & Rollback** strategies. ### 4. Review & Approval -Use the \`exit_plan_mode\` tool to present the plan and formally request approval. +ONLY use the \`exit_plan_mode\` tool to present the plan for formal approval AFTER you have reached an informal agreement with the user in the chat regarding the proposed strategy. When called, this tool will present the plan and formally request approval. ## Approved Plan An approved plan is available for this task at \`/tmp/plans/feature-x.md\`. @@ -595,11 +599,13 @@ Plan Mode uses an adaptive planning workflow where the research depth, plan stru Analyze requirements and use search/read tools to explore the codebase. Systematically map affected modules, trace data flow, and identify dependencies. ### 2. Consult -The depth of your consultation should be proportional to the task's complexity: -- **Simple Tasks:** Skip consultation and proceed directly to drafting. +The depth of your consultation should be proportional to the task's complexity. Before proceeding to Step 3 (Draft), you MUST discuss your findings and proposed strategy with the user to reach an informal agreement. +- **Simple Tasks:** Briefly describe your proposed strategy in the chat to ensure alignment, then **STOP and wait** for the user to confirm agreement before drafting the plan. - **Standard Tasks:** If multiple viable approaches exist, present a concise summary (including pros/cons and your recommendation) via \`ask_user\` and wait for a decision. - **Complex Tasks:** You MUST present at least two viable approaches with detailed trade-offs via \`ask_user\` and obtain approval before drafting the plan. +**CRITICAL:** You MUST NOT proceed to Step 3 (Draft) or Step 4 (Review & Approval) in the same turn as your initial strategy proposal. You MUST wait for user feedback and reach a clear agreement before drafting or submitting the plan. + ### 3. Draft Write the implementation plan to \`/tmp/project-temp/plans/\`. The plan's structure adapts to the task: - **Simple Tasks:** Include a bulleted list of specific **Changes** and **Verification** steps. @@ -607,7 +613,7 @@ Write the implementation plan to \`/tmp/project-temp/plans/\`. The plan's struct - **Complex Tasks:** Include **Background & Motivation**, **Scope & Impact**, **Proposed Solution**, **Alternatives Considered**, a phased **Implementation Plan**, **Verification**, and **Migration & Rollback** strategies. ### 4. Review & Approval -Use the \`exit_plan_mode\` tool to present the plan and formally request approval. +ONLY use the \`exit_plan_mode\` tool to present the plan for formal approval AFTER you have reached an informal agreement with the user in the chat regarding the proposed strategy. When called, this tool will present the plan and formally request approval. # Operational Guidelines diff --git a/packages/core/src/prompts/snippets.ts b/packages/core/src/prompts/snippets.ts index d7e95a1f4e..b71fca746d 100644 --- a/packages/core/src/prompts/snippets.ts +++ b/packages/core/src/prompts/snippets.ts @@ -587,11 +587,13 @@ Plan Mode uses an adaptive planning workflow where the research depth, plan stru Analyze requirements and use search/read tools to explore the codebase. Systematically map affected modules, trace data flow, and identify dependencies. ### 2. Consult -The depth of your consultation should be proportional to the task's complexity: -- **Simple Tasks:** Skip consultation and proceed directly to drafting. +The depth of your consultation should be proportional to the task's complexity. Before proceeding to Step 3 (Draft), you MUST discuss your findings and proposed strategy with the user to reach an informal agreement. +- **Simple Tasks:** Briefly describe your proposed strategy in the chat to ensure alignment, then **STOP and wait** for the user to confirm agreement before drafting the plan. - **Standard Tasks:** If multiple viable approaches exist, present a concise summary (including pros/cons and your recommendation) via ${formatToolName(ASK_USER_TOOL_NAME)} and wait for a decision. - **Complex Tasks:** You MUST present at least two viable approaches with detailed trade-offs via ${formatToolName(ASK_USER_TOOL_NAME)} and obtain approval before drafting the plan. +**CRITICAL:** You MUST NOT proceed to Step 3 (Draft) or Step 4 (Review & Approval) in the same turn as your initial strategy proposal. You MUST wait for user feedback and reach a clear agreement before drafting or submitting the plan. + ### 3. Draft Write the implementation plan to \`${options.plansDir}/\`. The plan's structure adapts to the task: - **Simple Tasks:** Include a bulleted list of specific **Changes** and **Verification** steps. @@ -599,7 +601,7 @@ Write the implementation plan to \`${options.plansDir}/\`. The plan's structure - **Complex Tasks:** Include **Background & Motivation**, **Scope & Impact**, **Proposed Solution**, **Alternatives Considered**, a phased **Implementation Plan**, **Verification**, and **Migration & Rollback** strategies. ### 4. Review & Approval -Use the ${formatToolName(EXIT_PLAN_MODE_TOOL_NAME)} tool to present the plan and ${options.interactive ? 'formally request approval.' : 'begin implementation.'} +ONLY use the ${formatToolName(EXIT_PLAN_MODE_TOOL_NAME)} tool to present the plan for formal approval AFTER you have reached an informal agreement with the user in the chat regarding the proposed strategy. When called, this tool will present the plan and ${options.interactive ? 'formally request approval.' : 'begin implementation.'} ${renderApprovedPlanSection(options.approvedPlanPath)}`.trim(); } diff --git a/packages/core/src/tools/definitions/__snapshots__/coreToolsModelSnapshots.test.ts.snap b/packages/core/src/tools/definitions/__snapshots__/coreToolsModelSnapshots.test.ts.snap index dbaad2d1f8..ba93e42e62 100644 --- a/packages/core/src/tools/definitions/__snapshots__/coreToolsModelSnapshots.test.ts.snap +++ b/packages/core/src/tools/definitions/__snapshots__/coreToolsModelSnapshots.test.ts.snap @@ -165,7 +165,7 @@ exports[`coreTools snapshots for specific models > Model: gemini-2.5-pro > snaps exports[`coreTools snapshots for specific models > Model: gemini-2.5-pro > snapshot for tool: exit_plan_mode 1`] = ` { - "description": "Finalizes the planning phase and transitions to implementation by presenting the plan for user approval. This tool MUST be used to exit Plan Mode before any source code edits can be performed. Call this whenever a plan is ready or the user requests implementation.", + "description": "Finalizes the planning phase and transitions to implementation by presenting the plan for formal user approval. You MUST reach an informal agreement with the user in the chat regarding the proposed strategy BEFORE calling this tool. This tool MUST be used to exit Plan Mode before any source code edits can be performed.", "name": "exit_plan_mode", "parametersJsonSchema": { "properties": { @@ -991,7 +991,7 @@ exports[`coreTools snapshots for specific models > Model: gemini-3-pro-preview > exports[`coreTools snapshots for specific models > Model: gemini-3-pro-preview > snapshot for tool: exit_plan_mode 1`] = ` { - "description": "Finalizes the planning phase and transitions to implementation by presenting the plan for user approval. This tool MUST be used to exit Plan Mode before any source code edits can be performed. Call this whenever a plan is ready or the user requests implementation.", + "description": "Finalizes the planning phase and transitions to implementation by presenting the plan for formal user approval. You MUST reach an informal agreement with the user in the chat regarding the proposed strategy BEFORE calling this tool. This tool MUST be used to exit Plan Mode before any source code edits can be performed.", "name": "exit_plan_mode", "parametersJsonSchema": { "properties": { diff --git a/packages/core/src/tools/definitions/dynamic-declaration-helpers.ts b/packages/core/src/tools/definitions/dynamic-declaration-helpers.ts index 59b1bf7479..1e7a36e639 100644 --- a/packages/core/src/tools/definitions/dynamic-declaration-helpers.ts +++ b/packages/core/src/tools/definitions/dynamic-declaration-helpers.ts @@ -161,7 +161,7 @@ export function getExitPlanModeDeclaration(): FunctionDeclaration { return { name: EXIT_PLAN_MODE_TOOL_NAME, description: - 'Finalizes the planning phase and transitions to implementation by presenting the plan for user approval. This tool MUST be used to exit Plan Mode before any source code edits can be performed. Call this whenever a plan is ready or the user requests implementation.', + 'Finalizes the planning phase and transitions to implementation by presenting the plan for formal user approval. You MUST reach an informal agreement with the user in the chat regarding the proposed strategy BEFORE calling this tool. This tool MUST be used to exit Plan Mode before any source code edits can be performed.', parametersJsonSchema: { type: 'object', required: [EXIT_PLAN_PARAM_PLAN_FILENAME],