fix(plan): deflake plan mode integration tests (#20477)

This commit is contained in:
Adib234
2026-03-02 14:51:44 -05:00
committed by GitHub
parent 7c9fceba7f
commit 2e1efaebe4
4 changed files with 88 additions and 32 deletions

View File

@@ -117,7 +117,6 @@ jobs:
name: 'Slow E2E - Win' name: 'Slow E2E - Win'
runs-on: 'gemini-cli-windows-16-core' runs-on: 'gemini-cli-windows-16-core'
if: "github.repository == 'google-gemini/gemini-cli'" if: "github.repository == 'google-gemini/gemini-cli'"
steps: steps:
- name: 'Checkout' - name: 'Checkout'
uses: 'actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955' # ratchet:actions/checkout@v5 uses: 'actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955' # ratchet:actions/checkout@v5

View File

@@ -4,8 +4,10 @@
* SPDX-License-Identifier: Apache-2.0 * SPDX-License-Identifier: Apache-2.0
*/ */
import { writeFileSync } from 'node:fs';
import { join } from 'node:path';
import { describe, it, expect, beforeEach, afterEach } from 'vitest'; import { describe, it, expect, beforeEach, afterEach } from 'vitest';
import { TestRig, checkModelOutputContent } from './test-helper.js'; import { TestRig, checkModelOutputContent, GEMINI_DIR } from './test-helper.js';
describe('Plan Mode', () => { describe('Plan Mode', () => {
let rig: TestRig; let rig: TestRig;
@@ -62,50 +64,98 @@ describe('Plan Mode', () => {
}); });
}); });
it.skip('should allow write_file only in the plans directory in plan mode', async () => { it('should allow write_file to the plans directory in plan mode', async () => {
await rig.setup( const plansDir = '.gemini/tmp/foo/123/plans';
'should allow write_file only in the plans directory in plan mode', const testName =
{ 'should allow write_file to the plans directory in plan mode';
settings: {
experimental: { plan: true }, await rig.setup(testName, {
tools: { settings: {
core: ['write_file', 'read_file', 'list_directory'], experimental: { plan: true },
allowed: ['write_file'], tools: {
core: ['write_file', 'read_file', 'list_directory'],
},
general: {
defaultApprovalMode: 'plan',
plan: {
directory: plansDir,
}, },
general: { defaultApprovalMode: 'plan' },
}, },
}, },
);
// We ask the agent to create a plan for a feature, which should trigger a write_file in the plans directory.
// Verify that write_file outside of plan directory fails
await rig.run({
approvalMode: 'plan',
stdin:
'Create a file called plan.md in the plans directory. Then create a file called hello.txt in the current directory',
}); });
const toolLogs = rig.readToolLogs(); // Disable the interactive terminal setup prompt in tests
const writeLogs = toolLogs.filter( writeFileSync(
(l) => l.toolRequest.name === 'write_file', join(rig.homeDir!, GEMINI_DIR, 'state.json'),
JSON.stringify({ terminalSetupPromptShown: true }, null, 2),
); );
const planWrite = writeLogs.find( const run = await rig.runInteractive({
approvalMode: 'plan',
});
await run.type('Create a file called plan.md in the plans directory.');
await run.type('\r');
await rig.expectToolCallSuccess(['write_file'], 30000, (args) =>
args.includes('plan.md'),
);
const toolLogs = rig.readToolLogs();
const planWrite = toolLogs.find(
(l) => (l) =>
l.toolRequest.name === 'write_file' &&
l.toolRequest.args.includes('plans') && l.toolRequest.args.includes('plans') &&
l.toolRequest.args.includes('plan.md'), l.toolRequest.args.includes('plan.md'),
); );
expect(planWrite?.toolRequest.success).toBe(true);
});
const blockedWrite = writeLogs.find((l) => it('should deny write_file to non-plans directory in plan mode', async () => {
l.toolRequest.args.includes('hello.txt'), const plansDir = '.gemini/tmp/foo/123/plans';
const testName =
'should deny write_file to non-plans directory in plan mode';
await rig.setup(testName, {
settings: {
experimental: { plan: true },
tools: {
core: ['write_file', 'read_file', 'list_directory'],
},
general: {
defaultApprovalMode: 'plan',
plan: {
directory: plansDir,
},
},
},
});
// Disable the interactive terminal setup prompt in tests
writeFileSync(
join(rig.homeDir!, GEMINI_DIR, 'state.json'),
JSON.stringify({ terminalSetupPromptShown: true }, null, 2),
); );
// Model is undeterministic, sometimes a blocked write appears in tool logs and sometimes it doesn't const run = await rig.runInteractive({
if (blockedWrite) { approvalMode: 'plan',
expect(blockedWrite?.toolRequest.success).toBe(false); });
}
expect(planWrite?.toolRequest.success).toBe(true); await run.type('Create a file called hello.txt in the current directory.');
await run.type('\r');
const toolLogs = rig.readToolLogs();
const writeLog = toolLogs.find(
(l) =>
l.toolRequest.name === 'write_file' &&
l.toolRequest.args.includes('hello.txt'),
);
// In Plan Mode, writes outside the plans directory should be blocked.
// Model is undeterministic, sometimes it doesn't even try, but if it does, it must fail.
if (writeLog) {
expect(writeLog.toolRequest.success).toBe(false);
}
}); });
it('should be able to enter plan mode from default mode', async () => { it('should be able to enter plan mode from default mode', async () => {
@@ -119,6 +169,12 @@ describe('Plan Mode', () => {
}, },
}); });
// Disable the interactive terminal setup prompt in tests
writeFileSync(
join(rig.homeDir!, GEMINI_DIR, 'state.json'),
JSON.stringify({ terminalSetupPromptShown: true }, null, 2),
);
// Start in default mode and ask to enter plan mode. // Start in default mode and ask to enter plan mode.
await rig.run({ await rig.run({
approvalMode: 'default', approvalMode: 'default',

View File

@@ -10,10 +10,10 @@ import * as crypto from 'node:crypto';
import { fileURLToPath } from 'node:url'; import { fileURLToPath } from 'node:url';
import { Storage } from '../config/storage.js'; import { Storage } from '../config/storage.js';
import { import {
ApprovalMode,
type PolicyEngineConfig, type PolicyEngineConfig,
PolicyDecision, PolicyDecision,
type PolicyRule, type PolicyRule,
ApprovalMode,
type PolicySettings, type PolicySettings,
type SafetyCheckerRule, type SafetyCheckerRule,
} from './types.js'; } from './types.js';

View File

@@ -12,6 +12,7 @@ import { fileURLToPath } from 'node:url';
import { env } from 'node:process'; import { env } from 'node:process';
import { setTimeout as sleep } from 'node:timers/promises'; import { setTimeout as sleep } from 'node:timers/promises';
import { DEFAULT_GEMINI_MODEL, GEMINI_DIR } from '@google/gemini-cli-core'; import { DEFAULT_GEMINI_MODEL, GEMINI_DIR } from '@google/gemini-cli-core';
export { GEMINI_DIR };
import * as pty from '@lydell/node-pty'; import * as pty from '@lydell/node-pty';
import stripAnsi from 'strip-ansi'; import stripAnsi from 'strip-ansi';
import * as os from 'node:os'; import * as os from 'node:os';