diff --git a/evals/plan_mode.eval.ts b/evals/plan_mode.eval.ts index a37e5f91b4..9794dd2b6b 100644 --- a/evals/plan_mode.eval.ts +++ b/evals/plan_mode.eval.ts @@ -136,6 +136,30 @@ describe('plan_mode', () => { expect(wasToolCalled, 'Expected exit_plan_mode tool to be called').toBe( true, ); + + const toolLogs = rig.readToolLogs(); + const exitPlanCall = toolLogs.find( + (log) => log.toolRequest.name === 'exit_plan_mode', + ); + if (exitPlanCall) { + const args = JSON.parse(exitPlanCall.toolRequest.args); + expect( + args.plan_filename, + 'plan_filename should be a string', + ).toBeTypeOf('string'); + expect(args.plan_filename, 'plan_filename should end with .md').toMatch( + /\.md$/, + ); + expect( + args.plan_filename, + 'plan_filename should not be a path', + ).not.toContain('/'); + expect( + args.plan_filename, + 'plan_filename should not be a path', + ).not.toContain('\\'); + } + assertModelHasOutput(result); }, }); @@ -199,6 +223,28 @@ describe('plan_mode', () => { await rig.waitForTelemetryReady(); const toolLogs = rig.readToolLogs(); + const exitPlanCall = toolLogs.find( + (log) => log.toolRequest.name === 'exit_plan_mode', + ); + if (exitPlanCall) { + const args = JSON.parse(exitPlanCall.toolRequest.args); + expect( + args.plan_filename, + 'plan_filename should be a string', + ).toBeTypeOf('string'); + expect(args.plan_filename, 'plan_filename should end with .md').toMatch( + /\.md$/, + ); + expect( + args.plan_filename, + 'plan_filename should not be a path', + ).not.toContain('/'); + expect( + args.plan_filename, + 'plan_filename should not be a path', + ).not.toContain('\\'); + } + // Check if plan was written const planWrite = toolLogs.find( (log) =>