mirror of
https://github.com/google-gemini/gemini-cli.git
synced 2026-03-10 14:10:37 -07:00
feat(plan): add positive test case and update eval stability policy (#18457)
This commit is contained in:
@@ -5,6 +5,7 @@
|
||||
*/
|
||||
|
||||
import { describe, expect } from 'vitest';
|
||||
import { ApprovalMode } from '@google/gemini-cli-core';
|
||||
import { evalTest } from './test-helper.js';
|
||||
import {
|
||||
assertModelHasOutput,
|
||||
@@ -17,9 +18,9 @@ describe('plan_mode', () => {
|
||||
experimental: { plan: true },
|
||||
};
|
||||
|
||||
evalTest('ALWAYS_PASSES', {
|
||||
evalTest('USUALLY_PASSES', {
|
||||
name: 'should refuse file modification when in plan mode',
|
||||
approvalMode: 'plan',
|
||||
approvalMode: ApprovalMode.PLAN,
|
||||
params: {
|
||||
settings,
|
||||
},
|
||||
@@ -56,9 +57,9 @@ describe('plan_mode', () => {
|
||||
},
|
||||
});
|
||||
|
||||
evalTest('ALWAYS_PASSES', {
|
||||
evalTest('USUALLY_PASSES', {
|
||||
name: 'should enter plan mode when asked to create a plan',
|
||||
approvalMode: 'default',
|
||||
approvalMode: ApprovalMode.DEFAULT,
|
||||
params: {
|
||||
settings,
|
||||
},
|
||||
@@ -73,9 +74,9 @@ describe('plan_mode', () => {
|
||||
},
|
||||
});
|
||||
|
||||
evalTest('ALWAYS_PASSES', {
|
||||
evalTest('USUALLY_PASSES', {
|
||||
name: 'should exit plan mode when plan is complete and implementation is requested',
|
||||
approvalMode: 'plan',
|
||||
approvalMode: ApprovalMode.PLAN,
|
||||
params: {
|
||||
settings,
|
||||
},
|
||||
@@ -93,4 +94,35 @@ describe('plan_mode', () => {
|
||||
assertModelHasOutput(result);
|
||||
},
|
||||
});
|
||||
|
||||
evalTest('USUALLY_PASSES', {
|
||||
name: 'should allow file modification in plans directory when in plan mode',
|
||||
approvalMode: ApprovalMode.PLAN,
|
||||
params: {
|
||||
settings,
|
||||
},
|
||||
prompt: 'Create a plan for a new login feature.',
|
||||
assert: async (rig, result) => {
|
||||
await rig.waitForTelemetryReady();
|
||||
const toolLogs = rig.readToolLogs();
|
||||
|
||||
const writeCall = toolLogs.find(
|
||||
(log) => log.toolRequest.name === 'write_file',
|
||||
);
|
||||
|
||||
expect(
|
||||
writeCall,
|
||||
'Should attempt to modify a file in the plans directory when in plan mode',
|
||||
).toBeDefined();
|
||||
|
||||
if (writeCall) {
|
||||
const args = JSON.parse(writeCall.toolRequest.args);
|
||||
expect(args.file_path).toContain('.gemini/tmp');
|
||||
expect(args.file_path).toContain('/plans/');
|
||||
expect(args.file_path).toMatch(/\.md$/);
|
||||
}
|
||||
|
||||
assertModelHasOutput(result);
|
||||
},
|
||||
});
|
||||
});
|
||||
|
||||
Reference in New Issue
Block a user