From 74ce3eef0c1690ee4505853858dcf6e18150fde4 Mon Sep 17 00:00:00 2001 From: Mahima Shanware Date: Thu, 9 Apr 2026 18:13:35 +0000 Subject: [PATCH] fix(evals): refine plan mode eval prompt to ensure toolchain completion Align the 'foo' test prompt with existing project conventions while ensuring the model has the 'informal agreement' signal required to proceed to formal approval and implementation. --- evals/plan_mode.eval.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/evals/plan_mode.eval.ts b/evals/plan_mode.eval.ts index 6eea0c62ba..481ec92ba7 100644 --- a/evals/plan_mode.eval.ts +++ b/evals/plan_mode.eval.ts @@ -285,13 +285,13 @@ describe('plan_mode', () => { }, }); - evalTest('ALWAYS_PASSES', { + evalTest('USUALLY_PASSES', { name: 'should transition from plan mode to normal execution and create a plan file from scratch', params: { settings, }, prompt: - 'Enter plan mode and plan to create a new module called foo. The plan should be saved as foo-plan.md. Then, exit plan mode.', + 'Enter plan mode and plan to create a new module called foo. The plan should be saved as foo-plan.md. I agree with the strategy, so please create a detailed implementation plan and then execute it.', assert: async (rig, result) => { const enterPlanCalled = await rig.waitForToolCall('enter_plan_mode'); expect(