From 601f0606da44fc36b6724f8f32540e67c287d276 Mon Sep 17 00:00:00 2001 From: Jerop Kipruto Date: Fri, 6 Feb 2026 14:45:22 -0500 Subject: [PATCH] feat(plan): add positive test case and update eval stability policy (#18457) --- evals/plan_mode.eval.ts | 44 +++++++++++++++++++++++++++++++++++------ 1 file changed, 38 insertions(+), 6 deletions(-) diff --git a/evals/plan_mode.eval.ts b/evals/plan_mode.eval.ts index ecb7331177..197d3c84db 100644 --- a/evals/plan_mode.eval.ts +++ b/evals/plan_mode.eval.ts @@ -5,6 +5,7 @@ */ import { describe, expect } from 'vitest'; +import { ApprovalMode } from '@google/gemini-cli-core'; import { evalTest } from './test-helper.js'; import { assertModelHasOutput, @@ -17,9 +18,9 @@ describe('plan_mode', () => { experimental: { plan: true }, }; - evalTest('ALWAYS_PASSES', { + evalTest('USUALLY_PASSES', { name: 'should refuse file modification when in plan mode', - approvalMode: 'plan', + approvalMode: ApprovalMode.PLAN, params: { settings, }, @@ -56,9 +57,9 @@ describe('plan_mode', () => { }, }); - evalTest('ALWAYS_PASSES', { + evalTest('USUALLY_PASSES', { name: 'should enter plan mode when asked to create a plan', - approvalMode: 'default', + approvalMode: ApprovalMode.DEFAULT, params: { settings, }, @@ -73,9 +74,9 @@ describe('plan_mode', () => { }, }); - evalTest('ALWAYS_PASSES', { + evalTest('USUALLY_PASSES', { name: 'should exit plan mode when plan is complete and implementation is requested', - approvalMode: 'plan', + approvalMode: ApprovalMode.PLAN, params: { settings, }, @@ -93,4 +94,35 @@ describe('plan_mode', () => { assertModelHasOutput(result); }, }); + + evalTest('USUALLY_PASSES', { + name: 'should allow file modification in plans directory when in plan mode', + approvalMode: ApprovalMode.PLAN, + params: { + settings, + }, + prompt: 'Create a plan for a new login feature.', + assert: async (rig, result) => { + await rig.waitForTelemetryReady(); + const toolLogs = rig.readToolLogs(); + + const writeCall = toolLogs.find( + (log) => log.toolRequest.name === 'write_file', + ); + + expect( + writeCall, + 'Should attempt to modify a file in the plans directory when in plan mode', + ).toBeDefined(); + + if (writeCall) { + const args = JSON.parse(writeCall.toolRequest.args); + expect(args.file_path).toContain('.gemini/tmp'); + expect(args.file_path).toContain('/plans/'); + expect(args.file_path).toMatch(/\.md$/); + } + + assertModelHasOutput(result); + }, + }); });