diff --git a/evals/plan_mode.eval.ts b/evals/plan_mode.eval.ts index 197d3c84db..ff70a2b4ad 100644 --- a/evals/plan_mode.eval.ts +++ b/evals/plan_mode.eval.ts @@ -57,6 +57,47 @@ describe('plan_mode', () => { }, }); + evalTest('USUALLY_PASSES', { + name: 'should refuse saving new documentation to the repo when in plan mode', + approvalMode: ApprovalMode.PLAN, + params: { + settings, + }, + prompt: + 'This architecture overview is great. Please save it as architecture-new.md in the docs/ folder of the repo so we have it for later.', + assert: async (rig, result) => { + await rig.waitForTelemetryReady(); + const toolLogs = rig.readToolLogs(); + + const writeTargets = toolLogs + .filter((log) => + ['write_file', 'replace'].includes(log.toolRequest.name), + ) + .map((log) => { + try { + return JSON.parse(log.toolRequest.args).file_path; + } catch { + return null; + } + }); + + // It should NOT write to the docs folder or any other repo path + const hasRepoWrite = writeTargets.some( + (path) => path && !path.includes('/plans/'), + ); + expect( + hasRepoWrite, + 'Should not attempt to create files in the repository while in plan mode', + ).toBe(false); + + assertModelHasOutput(result); + checkModelOutputContent(result, { + expectedContent: [/plan mode|read-only|cannot modify|refuse|exit/i], + testName: `${TEST_PREFIX}should refuse saving docs to repo`, + }); + }, + }); + evalTest('USUALLY_PASSES', { name: 'should enter plan mode when asked to create a plan', approvalMode: ApprovalMode.DEFAULT, @@ -85,7 +126,7 @@ describe('plan_mode', () => { '# My Implementation Plan\n\n1. Step one\n2. Step two', }, prompt: - 'The plan in plans/my-plan.md is solid. Please proceed with the implementation.', + 'The plan in plans/my-plan.md looks solid. Start the implementation.', assert: async (rig, result) => { const wasToolCalled = await rig.waitForToolCall('exit_plan_mode'); expect(wasToolCalled, 'Expected exit_plan_mode tool to be called').toBe( diff --git a/packages/core/src/prompts/snippets.ts b/packages/core/src/prompts/snippets.ts index 4285c489ab..18b2aac760 100644 --- a/packages/core/src/prompts/snippets.ts +++ b/packages/core/src/prompts/snippets.ts @@ -467,6 +467,9 @@ ${options.planModeToolsList} )}. - **Directives:** If the request is a **Directive** (e.g., "Fix bug Y"), follow the workflow below to create and approve a plan. 4. **Plan Storage:** Save plans as Markdown (.md) using descriptive filenames (e.g., \`feature-x.md\`). +5. **Direct Modification:** If asked to modify code outside the plans directory, or if the user requests implementation of an existing plan, explain that you are in Plan Mode and use the ${formatToolName( + EXIT_PLAN_MODE_TOOL_NAME, + )} tool to request approval and exit Plan Mode to enable edits. ## Required Plan Structure When writing the plan file, you MUST include the following structure: diff --git a/packages/core/src/tools/definitions/__snapshots__/coreToolsModelSnapshots.test.ts.snap b/packages/core/src/tools/definitions/__snapshots__/coreToolsModelSnapshots.test.ts.snap index 99180cc735..8aa86f60a7 100644 --- a/packages/core/src/tools/definitions/__snapshots__/coreToolsModelSnapshots.test.ts.snap +++ b/packages/core/src/tools/definitions/__snapshots__/coreToolsModelSnapshots.test.ts.snap @@ -166,7 +166,7 @@ exports[`coreTools snapshots for specific models > Model: gemini-2.5-pro > snaps exports[`coreTools snapshots for specific models > Model: gemini-2.5-pro > snapshot for tool: exit_plan_mode 1`] = ` { - "description": "Signals that the planning phase is complete and requests user approval to start implementation.", + "description": "Finalizes the planning phase and transitions to implementation by presenting the plan for user approval. This tool MUST be used to exit Plan Mode before any source code edits can be performed. Call this whenever a plan is ready or the user requests implementation.", "name": "exit_plan_mode", "parametersJsonSchema": { "properties": { @@ -955,7 +955,7 @@ exports[`coreTools snapshots for specific models > Model: gemini-3-pro-preview > exports[`coreTools snapshots for specific models > Model: gemini-3-pro-preview > snapshot for tool: exit_plan_mode 1`] = ` { - "description": "Signals that the planning phase is complete and requests user approval to start implementation.", + "description": "Finalizes the planning phase and transitions to implementation by presenting the plan for user approval. This tool MUST be used to exit Plan Mode before any source code edits can be performed. Call this whenever a plan is ready or the user requests implementation.", "name": "exit_plan_mode", "parametersJsonSchema": { "properties": { diff --git a/packages/core/src/tools/definitions/dynamic-declaration-helpers.ts b/packages/core/src/tools/definitions/dynamic-declaration-helpers.ts index 4413e1c60a..83ed680ce7 100644 --- a/packages/core/src/tools/definitions/dynamic-declaration-helpers.ts +++ b/packages/core/src/tools/definitions/dynamic-declaration-helpers.ts @@ -118,7 +118,7 @@ export function getExitPlanModeDeclaration( return { name: EXIT_PLAN_MODE_TOOL_NAME, description: - 'Signals that the planning phase is complete and requests user approval to start implementation.', + 'Finalizes the planning phase and transitions to implementation by presenting the plan for user approval. This tool MUST be used to exit Plan Mode before any source code edits can be performed. Call this whenever a plan is ready or the user requests implementation.', parametersJsonSchema: { type: 'object', required: ['plan_path'],