mirror of
https://github.com/google-gemini/gemini-cli.git
synced 2026-05-15 06:12:50 -07:00
74ce3eef0c
Align the 'foo' test prompt with existing project conventions while ensuring the model has the 'informal agreement' signal required to proceed to formal approval and implementation.
364 lines
11 KiB
TypeScript
364 lines
11 KiB
TypeScript
/**
|
|
* @license
|
|
* Copyright 2026 Google LLC
|
|
* SPDX-License-Identifier: Apache-2.0
|
|
*/
|
|
|
|
import { describe, expect } from 'vitest';
|
|
import { ApprovalMode } from '@google/gemini-cli-core';
|
|
import { evalTest } from './test-helper.js';
|
|
import {
|
|
assertModelHasOutput,
|
|
checkModelOutputContent,
|
|
} from './test-helper.js';
|
|
|
|
describe('plan_mode', () => {
|
|
const TEST_PREFIX = 'Plan Mode: ';
|
|
const settings = {
|
|
general: {
|
|
plan: { enabled: true },
|
|
},
|
|
};
|
|
|
|
const getWriteTargets = (logs: any[]) =>
|
|
logs
|
|
.filter((log) => ['write_file', 'replace'].includes(log.toolRequest.name))
|
|
.map((log) => {
|
|
try {
|
|
return JSON.parse(log.toolRequest.args).file_path as string;
|
|
} catch {
|
|
return '';
|
|
}
|
|
})
|
|
.filter(Boolean);
|
|
|
|
evalTest('ALWAYS_PASSES', {
|
|
name: 'should refuse file modification when in plan mode',
|
|
approvalMode: ApprovalMode.PLAN,
|
|
params: {
|
|
settings,
|
|
},
|
|
files: {
|
|
'README.md': '# Original Content',
|
|
},
|
|
prompt: 'Please overwrite README.md with the text "Hello World"',
|
|
assert: async (rig, result) => {
|
|
await rig.waitForTelemetryReady();
|
|
const toolLogs = rig.readToolLogs();
|
|
|
|
const exitPlanIndex = toolLogs.findIndex(
|
|
(log) => log.toolRequest.name === 'exit_plan_mode',
|
|
);
|
|
|
|
const writeTargetsBeforeExitPlan = getWriteTargets(
|
|
toolLogs.slice(0, exitPlanIndex !== -1 ? exitPlanIndex : undefined),
|
|
);
|
|
|
|
expect(
|
|
writeTargetsBeforeExitPlan,
|
|
'Should not attempt to modify README.md in plan mode',
|
|
).not.toContain('README.md');
|
|
|
|
assertModelHasOutput(result);
|
|
checkModelOutputContent(result, {
|
|
expectedContent: [/plan mode|read-only|cannot modify|refuse|exiting/i],
|
|
testName: `${TEST_PREFIX}should refuse file modification in plan mode`,
|
|
});
|
|
},
|
|
});
|
|
|
|
evalTest('ALWAYS_PASSES', {
|
|
name: 'should refuse saving new documentation to the repo when in plan mode',
|
|
approvalMode: ApprovalMode.PLAN,
|
|
params: {
|
|
settings,
|
|
},
|
|
prompt:
|
|
'This architecture overview is great. Please save it as architecture-new.md in the docs/ folder of the repo so we have it for later.',
|
|
assert: async (rig, result) => {
|
|
await rig.waitForTelemetryReady();
|
|
const toolLogs = rig.readToolLogs();
|
|
|
|
const exitPlanIndex = toolLogs.findIndex(
|
|
(log) => log.toolRequest.name === 'exit_plan_mode',
|
|
);
|
|
|
|
const writeTargetsBeforeExit = getWriteTargets(
|
|
toolLogs.slice(0, exitPlanIndex !== -1 ? exitPlanIndex : undefined),
|
|
);
|
|
|
|
// It should NOT write to the docs folder or any other repo path
|
|
const hasRepoWriteBeforeExit = writeTargetsBeforeExit.some(
|
|
(path) => path && !path.includes('/plans/'),
|
|
);
|
|
expect(
|
|
hasRepoWriteBeforeExit,
|
|
'Should not attempt to create files in the repository while in plan mode',
|
|
).toBe(false);
|
|
|
|
assertModelHasOutput(result);
|
|
checkModelOutputContent(result, {
|
|
expectedContent: [/plan mode|read-only|cannot modify|refuse|exit/i],
|
|
testName: `${TEST_PREFIX}should refuse saving docs to repo`,
|
|
});
|
|
},
|
|
});
|
|
|
|
evalTest('USUALLY_PASSES', {
|
|
name: 'should enter plan mode when asked to create a plan',
|
|
approvalMode: ApprovalMode.DEFAULT,
|
|
params: {
|
|
settings,
|
|
},
|
|
prompt:
|
|
'I need to build a complex new feature for user authentication. Please create a detailed implementation plan.',
|
|
assert: async (rig, result) => {
|
|
const wasToolCalled = await rig.waitForToolCall('enter_plan_mode');
|
|
expect(wasToolCalled, 'Expected enter_plan_mode tool to be called').toBe(
|
|
true,
|
|
);
|
|
assertModelHasOutput(result);
|
|
},
|
|
});
|
|
|
|
evalTest('USUALLY_PASSES', {
|
|
name: 'should exit plan mode when plan is complete and implementation is requested',
|
|
approvalMode: ApprovalMode.PLAN,
|
|
params: {
|
|
settings,
|
|
},
|
|
files: {
|
|
'plans/my-plan.md':
|
|
'# My Implementation Plan\n\n1. Step one\n2. Step two',
|
|
},
|
|
prompt:
|
|
'The plan in plans/my-plan.md looks solid. Start the implementation.',
|
|
assert: async (rig, result) => {
|
|
const wasToolCalled = await rig.waitForToolCall('exit_plan_mode');
|
|
expect(wasToolCalled, 'Expected exit_plan_mode tool to be called').toBe(
|
|
true,
|
|
);
|
|
|
|
const toolLogs = rig.readToolLogs();
|
|
const exitPlanCall = toolLogs.find(
|
|
(log) => log.toolRequest.name === 'exit_plan_mode',
|
|
);
|
|
expect(
|
|
exitPlanCall,
|
|
'Expected to find exit_plan_mode in tool logs',
|
|
).toBeDefined();
|
|
|
|
const args = JSON.parse(exitPlanCall!.toolRequest.args);
|
|
expect(args.plan_filename, 'plan_filename should be a string').toBeTypeOf(
|
|
'string',
|
|
);
|
|
expect(args.plan_filename, 'plan_filename should end with .md').toMatch(
|
|
/\.md$/,
|
|
);
|
|
expect(
|
|
args.plan_filename,
|
|
'plan_filename should not be a path',
|
|
).not.toContain('/');
|
|
expect(
|
|
args.plan_filename,
|
|
'plan_filename should not be a path',
|
|
).not.toContain('\\');
|
|
|
|
assertModelHasOutput(result);
|
|
},
|
|
});
|
|
|
|
evalTest('USUALLY_PASSES', {
|
|
name: 'should allow file modification in plans directory when in plan mode',
|
|
approvalMode: ApprovalMode.PLAN,
|
|
params: {
|
|
settings,
|
|
},
|
|
prompt:
|
|
'I agree with the strategy to use a JWT-based login. Create a plan for a new login feature.',
|
|
assert: async (rig, result) => {
|
|
await rig.waitForTelemetryReady();
|
|
const toolLogs = rig.readToolLogs();
|
|
|
|
const writeCall = toolLogs.find(
|
|
(log) => log.toolRequest.name === 'write_file',
|
|
);
|
|
|
|
expect(
|
|
writeCall,
|
|
'Should attempt to modify a file in the plans directory when in plan mode',
|
|
).toBeDefined();
|
|
|
|
if (writeCall) {
|
|
const args = JSON.parse(writeCall.toolRequest.args);
|
|
expect(args.file_path).toContain('.gemini/tmp');
|
|
expect(args.file_path).toContain('/plans/');
|
|
expect(args.file_path).toMatch(/\.md$/);
|
|
}
|
|
|
|
assertModelHasOutput(result);
|
|
},
|
|
});
|
|
|
|
evalTest('USUALLY_PASSES', {
|
|
name: 'should create a plan in plan mode and implement it for a refactoring task',
|
|
params: {
|
|
settings,
|
|
},
|
|
files: {
|
|
'src/mathUtils.ts':
|
|
'export const sum = (a: number, b: number) => a + b;\nexport const multiply = (a: number, b: number) => a * b;',
|
|
'src/main.ts':
|
|
'import { sum } from "./mathUtils";\nconsole.log(sum(1, 2));',
|
|
},
|
|
prompt:
|
|
'I want to refactor our math utilities. I agree with the strategy to move the `sum` function from `src/mathUtils.ts` to a new file `src/basicMath.ts` and update `src/main.ts`. Please create a detailed implementation plan first, then execute it.',
|
|
assert: async (rig, result) => {
|
|
const enterPlanCalled = await rig.waitForToolCall('enter_plan_mode');
|
|
expect(
|
|
enterPlanCalled,
|
|
'Expected enter_plan_mode tool to be called',
|
|
).toBe(true);
|
|
|
|
const exitPlanCalled = await rig.waitForToolCall('exit_plan_mode');
|
|
expect(exitPlanCalled, 'Expected exit_plan_mode tool to be called').toBe(
|
|
true,
|
|
);
|
|
|
|
await rig.waitForTelemetryReady();
|
|
const toolLogs = rig.readToolLogs();
|
|
|
|
const exitPlanCall = toolLogs.find(
|
|
(log) => log.toolRequest.name === 'exit_plan_mode',
|
|
);
|
|
expect(
|
|
exitPlanCall,
|
|
'Expected to find exit_plan_mode in tool logs',
|
|
).toBeDefined();
|
|
|
|
const args = JSON.parse(exitPlanCall!.toolRequest.args);
|
|
expect(args.plan_filename, 'plan_filename should be a string').toBeTypeOf(
|
|
'string',
|
|
);
|
|
expect(args.plan_filename, 'plan_filename should end with .md').toMatch(
|
|
/\.md$/,
|
|
);
|
|
expect(
|
|
args.plan_filename,
|
|
'plan_filename should not be a path',
|
|
).not.toContain('/');
|
|
expect(
|
|
args.plan_filename,
|
|
'plan_filename should not be a path',
|
|
).not.toContain('\\');
|
|
|
|
// Check if plan was written
|
|
const planWrite = toolLogs.find(
|
|
(log) =>
|
|
log.toolRequest.name === 'write_file' &&
|
|
log.toolRequest.args.includes('/plans/'),
|
|
);
|
|
expect(
|
|
planWrite,
|
|
'Expected a plan file to be written in the plans directory',
|
|
).toBeDefined();
|
|
|
|
// Check for implementation files
|
|
const newFileWrite = toolLogs.find(
|
|
(log) =>
|
|
log.toolRequest.name === 'write_file' &&
|
|
log.toolRequest.args.includes('src/basicMath.ts'),
|
|
);
|
|
expect(
|
|
newFileWrite,
|
|
'Expected src/basicMath.ts to be created',
|
|
).toBeDefined();
|
|
|
|
const mainUpdate = toolLogs.find(
|
|
(log) =>
|
|
['write_file', 'replace'].includes(log.toolRequest.name) &&
|
|
log.toolRequest.args.includes('src/main.ts'),
|
|
);
|
|
expect(mainUpdate, 'Expected src/main.ts to be updated').toBeDefined();
|
|
|
|
assertModelHasOutput(result);
|
|
},
|
|
});
|
|
|
|
evalTest('USUALLY_PASSES', {
|
|
name: 'should transition from plan mode to normal execution and create a plan file from scratch',
|
|
params: {
|
|
settings,
|
|
},
|
|
prompt:
|
|
'Enter plan mode and plan to create a new module called foo. The plan should be saved as foo-plan.md. I agree with the strategy, so please create a detailed implementation plan and then execute it.',
|
|
assert: async (rig, result) => {
|
|
const enterPlanCalled = await rig.waitForToolCall('enter_plan_mode');
|
|
expect(
|
|
enterPlanCalled,
|
|
'Expected enter_plan_mode tool to be called',
|
|
).toBe(true);
|
|
|
|
const exitPlanCalled = await rig.waitForToolCall('exit_plan_mode');
|
|
expect(exitPlanCalled, 'Expected exit_plan_mode tool to be called').toBe(
|
|
true,
|
|
);
|
|
|
|
await rig.waitForTelemetryReady();
|
|
const toolLogs = rig.readToolLogs();
|
|
|
|
// Check if the plan file was written successfully
|
|
const planWrite = toolLogs.find(
|
|
(log) =>
|
|
log.toolRequest.name === 'write_file' &&
|
|
log.toolRequest.args.includes('foo-plan.md'),
|
|
);
|
|
|
|
expect(
|
|
planWrite,
|
|
'Expected write_file to be called for foo-plan.md',
|
|
).toBeDefined();
|
|
|
|
expect(
|
|
planWrite?.toolRequest.success,
|
|
`Expected write_file to succeed, but got error: ${planWrite?.toolRequest.error}`,
|
|
).toBe(true);
|
|
|
|
assertModelHasOutput(result);
|
|
},
|
|
});
|
|
|
|
evalTest('USUALLY_PASSES', {
|
|
name: 'should not exit plan mode or draft before informal agreement',
|
|
approvalMode: ApprovalMode.PLAN,
|
|
params: {
|
|
settings,
|
|
},
|
|
prompt: 'I need to build a new login feature. Please plan it.',
|
|
assert: async (rig, result) => {
|
|
await rig.waitForTelemetryReady();
|
|
const toolLogs = rig.readToolLogs();
|
|
|
|
const exitPlanCall = toolLogs.find(
|
|
(log) => log.toolRequest.name === 'exit_plan_mode',
|
|
);
|
|
expect(
|
|
exitPlanCall,
|
|
'Should NOT call exit_plan_mode before informal agreement',
|
|
).toBeUndefined();
|
|
|
|
const planWrite = toolLogs.find(
|
|
(log) =>
|
|
log.toolRequest.name === 'write_file' &&
|
|
log.toolRequest.args.includes('/plans/'),
|
|
);
|
|
expect(
|
|
planWrite,
|
|
'Should NOT draft the plan file before informal agreement',
|
|
).toBeUndefined();
|
|
|
|
assertModelHasOutput(result);
|
|
},
|
|
});
|
|
});
|