From 96752a13f60b396c4c1c5816f6ff612315aa845c Mon Sep 17 00:00:00 2001 From: "A.K.M. Adib" Date: Mon, 23 Mar 2026 14:14:51 -0400 Subject: [PATCH] update evals for changes --- evals/plan_mode.eval.ts | 46 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/evals/plan_mode.eval.ts b/evals/plan_mode.eval.ts index a37e5f91b4..9794dd2b6b 100644 --- a/evals/plan_mode.eval.ts +++ b/evals/plan_mode.eval.ts @@ -136,6 +136,30 @@ describe('plan_mode', () => { expect(wasToolCalled, 'Expected exit_plan_mode tool to be called').toBe( true, ); + + const toolLogs = rig.readToolLogs(); + const exitPlanCall = toolLogs.find( + (log) => log.toolRequest.name === 'exit_plan_mode', + ); + if (exitPlanCall) { + const args = JSON.parse(exitPlanCall.toolRequest.args); + expect( + args.plan_filename, + 'plan_filename should be a string', + ).toBeTypeOf('string'); + expect(args.plan_filename, 'plan_filename should end with .md').toMatch( + /\.md$/, + ); + expect( + args.plan_filename, + 'plan_filename should not be a path', + ).not.toContain('/'); + expect( + args.plan_filename, + 'plan_filename should not be a path', + ).not.toContain('\\'); + } + assertModelHasOutput(result); }, }); @@ -199,6 +223,28 @@ describe('plan_mode', () => { await rig.waitForTelemetryReady(); const toolLogs = rig.readToolLogs(); + const exitPlanCall = toolLogs.find( + (log) => log.toolRequest.name === 'exit_plan_mode', + ); + if (exitPlanCall) { + const args = JSON.parse(exitPlanCall.toolRequest.args); + expect( + args.plan_filename, + 'plan_filename should be a string', + ).toBeTypeOf('string'); + expect(args.plan_filename, 'plan_filename should end with .md').toMatch( + /\.md$/, + ); + expect( + args.plan_filename, + 'plan_filename should not be a path', + ).not.toContain('/'); + expect( + args.plan_filename, + 'plan_filename should not be a path', + ).not.toContain('\\'); + } + // Check if plan was written const planWrite = toolLogs.find( (log) =>