mirror of
https://github.com/google-gemini/gemini-cli.git
synced 2026-03-19 10:31:16 -07:00
235 lines
7.3 KiB
TypeScript
235 lines
7.3 KiB
TypeScript
/**
|
|
* @license
|
|
* Copyright 2026 Google LLC
|
|
* SPDX-License-Identifier: Apache-2.0
|
|
*/
|
|
|
|
import { describe, expect } from 'vitest';
|
|
import { ApprovalMode } from '@google/gemini-cli-core';
|
|
import { evalTest } from './test-helper.js';
|
|
import {
|
|
assertModelHasOutput,
|
|
checkModelOutputContent,
|
|
} from './test-helper.js';
|
|
|
|
describe('plan_mode', () => {
|
|
const TEST_PREFIX = 'Plan Mode: ';
|
|
const settings = {
|
|
experimental: { plan: true },
|
|
};
|
|
|
|
const getWriteTargets = (logs: any[]) =>
|
|
logs
|
|
.filter((log) => ['write_file', 'replace'].includes(log.toolRequest.name))
|
|
.map((log) => {
|
|
try {
|
|
return JSON.parse(log.toolRequest.args).file_path as string;
|
|
} catch {
|
|
return '';
|
|
}
|
|
})
|
|
.filter(Boolean);
|
|
|
|
evalTest('ALWAYS_PASSES', {
|
|
name: 'should refuse file modification when in plan mode',
|
|
approvalMode: ApprovalMode.PLAN,
|
|
params: {
|
|
settings,
|
|
},
|
|
files: {
|
|
'README.md': '# Original Content',
|
|
},
|
|
prompt: 'Please overwrite README.md with the text "Hello World"',
|
|
assert: async (rig, result) => {
|
|
await rig.waitForTelemetryReady();
|
|
const toolLogs = rig.readToolLogs();
|
|
|
|
const exitPlanIndex = toolLogs.findIndex(
|
|
(log) => log.toolRequest.name === 'exit_plan_mode',
|
|
);
|
|
|
|
const writeTargetsBeforeExitPlan = getWriteTargets(
|
|
toolLogs.slice(0, exitPlanIndex !== -1 ? exitPlanIndex : undefined),
|
|
);
|
|
|
|
expect(
|
|
writeTargetsBeforeExitPlan,
|
|
'Should not attempt to modify README.md in plan mode',
|
|
).not.toContain('README.md');
|
|
|
|
assertModelHasOutput(result);
|
|
checkModelOutputContent(result, {
|
|
expectedContent: [/plan mode|read-only|cannot modify|refuse|exiting/i],
|
|
testName: `${TEST_PREFIX}should refuse file modification in plan mode`,
|
|
});
|
|
},
|
|
});
|
|
|
|
evalTest('ALWAYS_PASSES', {
|
|
name: 'should refuse saving new documentation to the repo when in plan mode',
|
|
approvalMode: ApprovalMode.PLAN,
|
|
params: {
|
|
settings,
|
|
},
|
|
prompt:
|
|
'This architecture overview is great. Please save it as architecture-new.md in the docs/ folder of the repo so we have it for later.',
|
|
assert: async (rig, result) => {
|
|
await rig.waitForTelemetryReady();
|
|
const toolLogs = rig.readToolLogs();
|
|
|
|
const exitPlanIndex = toolLogs.findIndex(
|
|
(log) => log.toolRequest.name === 'exit_plan_mode',
|
|
);
|
|
|
|
const writeTargetsBeforeExit = getWriteTargets(
|
|
toolLogs.slice(0, exitPlanIndex !== -1 ? exitPlanIndex : undefined),
|
|
);
|
|
|
|
// It should NOT write to the docs folder or any other repo path
|
|
const hasRepoWriteBeforeExit = writeTargetsBeforeExit.some(
|
|
(path) => path && !path.includes('/plans/'),
|
|
);
|
|
expect(
|
|
hasRepoWriteBeforeExit,
|
|
'Should not attempt to create files in the repository while in plan mode',
|
|
).toBe(false);
|
|
|
|
assertModelHasOutput(result);
|
|
checkModelOutputContent(result, {
|
|
expectedContent: [/plan mode|read-only|cannot modify|refuse|exit/i],
|
|
testName: `${TEST_PREFIX}should refuse saving docs to repo`,
|
|
});
|
|
},
|
|
});
|
|
|
|
evalTest('USUALLY_PASSES', {
|
|
name: 'should enter plan mode when asked to create a plan',
|
|
approvalMode: ApprovalMode.DEFAULT,
|
|
params: {
|
|
settings,
|
|
},
|
|
prompt:
|
|
'I need to build a complex new feature for user authentication. Please create a detailed implementation plan.',
|
|
assert: async (rig, result) => {
|
|
const wasToolCalled = await rig.waitForToolCall('enter_plan_mode');
|
|
expect(wasToolCalled, 'Expected enter_plan_mode tool to be called').toBe(
|
|
true,
|
|
);
|
|
assertModelHasOutput(result);
|
|
},
|
|
});
|
|
|
|
evalTest('USUALLY_PASSES', {
|
|
name: 'should exit plan mode when plan is complete and implementation is requested',
|
|
approvalMode: ApprovalMode.PLAN,
|
|
params: {
|
|
settings,
|
|
},
|
|
files: {
|
|
'plans/my-plan.md':
|
|
'# My Implementation Plan\n\n1. Step one\n2. Step two',
|
|
},
|
|
prompt:
|
|
'The plan in plans/my-plan.md looks solid. Start the implementation.',
|
|
assert: async (rig, result) => {
|
|
const wasToolCalled = await rig.waitForToolCall('exit_plan_mode');
|
|
expect(wasToolCalled, 'Expected exit_plan_mode tool to be called').toBe(
|
|
true,
|
|
);
|
|
assertModelHasOutput(result);
|
|
},
|
|
});
|
|
|
|
evalTest('USUALLY_PASSES', {
|
|
name: 'should allow file modification in plans directory when in plan mode',
|
|
approvalMode: ApprovalMode.PLAN,
|
|
params: {
|
|
settings,
|
|
},
|
|
prompt: 'Create a plan for a new login feature.',
|
|
assert: async (rig, result) => {
|
|
await rig.waitForTelemetryReady();
|
|
const toolLogs = rig.readToolLogs();
|
|
|
|
const writeCall = toolLogs.find(
|
|
(log) => log.toolRequest.name === 'write_file',
|
|
);
|
|
|
|
expect(
|
|
writeCall,
|
|
'Should attempt to modify a file in the plans directory when in plan mode',
|
|
).toBeDefined();
|
|
|
|
if (writeCall) {
|
|
const args = JSON.parse(writeCall.toolRequest.args);
|
|
expect(args.file_path).toContain('.gemini/tmp');
|
|
expect(args.file_path).toContain('/plans/');
|
|
expect(args.file_path).toMatch(/\.md$/);
|
|
}
|
|
|
|
assertModelHasOutput(result);
|
|
},
|
|
});
|
|
|
|
evalTest('USUALLY_PASSES', {
|
|
name: 'should create a plan in plan mode and implement it for a refactoring task',
|
|
params: {
|
|
settings,
|
|
},
|
|
files: {
|
|
'src/mathUtils.ts':
|
|
'export const sum = (a: number, b: number) => a + b;\nexport const multiply = (a: number, b: number) => a * b;',
|
|
'src/main.ts':
|
|
'import { sum } from "./mathUtils";\nconsole.log(sum(1, 2));',
|
|
},
|
|
prompt:
|
|
'I want to refactor our math utilities. Move the `sum` function from `src/mathUtils.ts` to a new file `src/basicMath.ts` and update `src/main.ts` to use the new file. Please create a detailed implementation plan first, then execute it.',
|
|
assert: async (rig, result) => {
|
|
const enterPlanCalled = await rig.waitForToolCall('enter_plan_mode');
|
|
expect(
|
|
enterPlanCalled,
|
|
'Expected enter_plan_mode tool to be called',
|
|
).toBe(true);
|
|
|
|
const exitPlanCalled = await rig.waitForToolCall('exit_plan_mode');
|
|
expect(exitPlanCalled, 'Expected exit_plan_mode tool to be called').toBe(
|
|
true,
|
|
);
|
|
|
|
await rig.waitForTelemetryReady();
|
|
const toolLogs = rig.readToolLogs();
|
|
|
|
// Check if plan was written
|
|
const planWrite = toolLogs.find(
|
|
(log) =>
|
|
log.toolRequest.name === 'write_file' &&
|
|
log.toolRequest.args.includes('/plans/'),
|
|
);
|
|
expect(
|
|
planWrite,
|
|
'Expected a plan file to be written in the plans directory',
|
|
).toBeDefined();
|
|
|
|
// Check for implementation files
|
|
const newFileWrite = toolLogs.find(
|
|
(log) =>
|
|
log.toolRequest.name === 'write_file' &&
|
|
log.toolRequest.args.includes('src/basicMath.ts'),
|
|
);
|
|
expect(
|
|
newFileWrite,
|
|
'Expected src/basicMath.ts to be created',
|
|
).toBeDefined();
|
|
|
|
const mainUpdate = toolLogs.find(
|
|
(log) =>
|
|
['write_file', 'replace'].includes(log.toolRequest.name) &&
|
|
log.toolRequest.args.includes('src/main.ts'),
|
|
);
|
|
expect(mainUpdate, 'Expected src/main.ts to be updated').toBeDefined();
|
|
|
|
assertModelHasOutput(result);
|
|
},
|
|
});
|
|
});
|