mirror of
https://github.com/google-gemini/gemini-cli.git
synced 2026-03-10 22:21:22 -07:00
fix(plan): deflake plan mode integration tests (#20477)
This commit is contained in:
1
.github/workflows/deflake.yml
vendored
1
.github/workflows/deflake.yml
vendored
@@ -117,7 +117,6 @@ jobs:
|
|||||||
name: 'Slow E2E - Win'
|
name: 'Slow E2E - Win'
|
||||||
runs-on: 'gemini-cli-windows-16-core'
|
runs-on: 'gemini-cli-windows-16-core'
|
||||||
if: "github.repository == 'google-gemini/gemini-cli'"
|
if: "github.repository == 'google-gemini/gemini-cli'"
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- name: 'Checkout'
|
- name: 'Checkout'
|
||||||
uses: 'actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955' # ratchet:actions/checkout@v5
|
uses: 'actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955' # ratchet:actions/checkout@v5
|
||||||
|
|||||||
@@ -4,8 +4,10 @@
|
|||||||
* SPDX-License-Identifier: Apache-2.0
|
* SPDX-License-Identifier: Apache-2.0
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
import { writeFileSync } from 'node:fs';
|
||||||
|
import { join } from 'node:path';
|
||||||
import { describe, it, expect, beforeEach, afterEach } from 'vitest';
|
import { describe, it, expect, beforeEach, afterEach } from 'vitest';
|
||||||
import { TestRig, checkModelOutputContent } from './test-helper.js';
|
import { TestRig, checkModelOutputContent, GEMINI_DIR } from './test-helper.js';
|
||||||
|
|
||||||
describe('Plan Mode', () => {
|
describe('Plan Mode', () => {
|
||||||
let rig: TestRig;
|
let rig: TestRig;
|
||||||
@@ -62,50 +64,98 @@ describe('Plan Mode', () => {
|
|||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
it.skip('should allow write_file only in the plans directory in plan mode', async () => {
|
it('should allow write_file to the plans directory in plan mode', async () => {
|
||||||
await rig.setup(
|
const plansDir = '.gemini/tmp/foo/123/plans';
|
||||||
'should allow write_file only in the plans directory in plan mode',
|
const testName =
|
||||||
{
|
'should allow write_file to the plans directory in plan mode';
|
||||||
settings: {
|
|
||||||
experimental: { plan: true },
|
await rig.setup(testName, {
|
||||||
tools: {
|
settings: {
|
||||||
core: ['write_file', 'read_file', 'list_directory'],
|
experimental: { plan: true },
|
||||||
allowed: ['write_file'],
|
tools: {
|
||||||
|
core: ['write_file', 'read_file', 'list_directory'],
|
||||||
|
},
|
||||||
|
general: {
|
||||||
|
defaultApprovalMode: 'plan',
|
||||||
|
plan: {
|
||||||
|
directory: plansDir,
|
||||||
},
|
},
|
||||||
general: { defaultApprovalMode: 'plan' },
|
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
);
|
|
||||||
|
|
||||||
// We ask the agent to create a plan for a feature, which should trigger a write_file in the plans directory.
|
|
||||||
// Verify that write_file outside of plan directory fails
|
|
||||||
await rig.run({
|
|
||||||
approvalMode: 'plan',
|
|
||||||
stdin:
|
|
||||||
'Create a file called plan.md in the plans directory. Then create a file called hello.txt in the current directory',
|
|
||||||
});
|
});
|
||||||
|
|
||||||
const toolLogs = rig.readToolLogs();
|
// Disable the interactive terminal setup prompt in tests
|
||||||
const writeLogs = toolLogs.filter(
|
writeFileSync(
|
||||||
(l) => l.toolRequest.name === 'write_file',
|
join(rig.homeDir!, GEMINI_DIR, 'state.json'),
|
||||||
|
JSON.stringify({ terminalSetupPromptShown: true }, null, 2),
|
||||||
);
|
);
|
||||||
|
|
||||||
const planWrite = writeLogs.find(
|
const run = await rig.runInteractive({
|
||||||
|
approvalMode: 'plan',
|
||||||
|
});
|
||||||
|
|
||||||
|
await run.type('Create a file called plan.md in the plans directory.');
|
||||||
|
await run.type('\r');
|
||||||
|
|
||||||
|
await rig.expectToolCallSuccess(['write_file'], 30000, (args) =>
|
||||||
|
args.includes('plan.md'),
|
||||||
|
);
|
||||||
|
|
||||||
|
const toolLogs = rig.readToolLogs();
|
||||||
|
const planWrite = toolLogs.find(
|
||||||
(l) =>
|
(l) =>
|
||||||
|
l.toolRequest.name === 'write_file' &&
|
||||||
l.toolRequest.args.includes('plans') &&
|
l.toolRequest.args.includes('plans') &&
|
||||||
l.toolRequest.args.includes('plan.md'),
|
l.toolRequest.args.includes('plan.md'),
|
||||||
);
|
);
|
||||||
|
expect(planWrite?.toolRequest.success).toBe(true);
|
||||||
|
});
|
||||||
|
|
||||||
const blockedWrite = writeLogs.find((l) =>
|
it('should deny write_file to non-plans directory in plan mode', async () => {
|
||||||
l.toolRequest.args.includes('hello.txt'),
|
const plansDir = '.gemini/tmp/foo/123/plans';
|
||||||
|
const testName =
|
||||||
|
'should deny write_file to non-plans directory in plan mode';
|
||||||
|
|
||||||
|
await rig.setup(testName, {
|
||||||
|
settings: {
|
||||||
|
experimental: { plan: true },
|
||||||
|
tools: {
|
||||||
|
core: ['write_file', 'read_file', 'list_directory'],
|
||||||
|
},
|
||||||
|
general: {
|
||||||
|
defaultApprovalMode: 'plan',
|
||||||
|
plan: {
|
||||||
|
directory: plansDir,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
// Disable the interactive terminal setup prompt in tests
|
||||||
|
writeFileSync(
|
||||||
|
join(rig.homeDir!, GEMINI_DIR, 'state.json'),
|
||||||
|
JSON.stringify({ terminalSetupPromptShown: true }, null, 2),
|
||||||
);
|
);
|
||||||
|
|
||||||
// Model is undeterministic, sometimes a blocked write appears in tool logs and sometimes it doesn't
|
const run = await rig.runInteractive({
|
||||||
if (blockedWrite) {
|
approvalMode: 'plan',
|
||||||
expect(blockedWrite?.toolRequest.success).toBe(false);
|
});
|
||||||
}
|
|
||||||
|
|
||||||
expect(planWrite?.toolRequest.success).toBe(true);
|
await run.type('Create a file called hello.txt in the current directory.');
|
||||||
|
await run.type('\r');
|
||||||
|
|
||||||
|
const toolLogs = rig.readToolLogs();
|
||||||
|
const writeLog = toolLogs.find(
|
||||||
|
(l) =>
|
||||||
|
l.toolRequest.name === 'write_file' &&
|
||||||
|
l.toolRequest.args.includes('hello.txt'),
|
||||||
|
);
|
||||||
|
|
||||||
|
// In Plan Mode, writes outside the plans directory should be blocked.
|
||||||
|
// Model is undeterministic, sometimes it doesn't even try, but if it does, it must fail.
|
||||||
|
if (writeLog) {
|
||||||
|
expect(writeLog.toolRequest.success).toBe(false);
|
||||||
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
it('should be able to enter plan mode from default mode', async () => {
|
it('should be able to enter plan mode from default mode', async () => {
|
||||||
@@ -119,6 +169,12 @@ describe('Plan Mode', () => {
|
|||||||
},
|
},
|
||||||
});
|
});
|
||||||
|
|
||||||
|
// Disable the interactive terminal setup prompt in tests
|
||||||
|
writeFileSync(
|
||||||
|
join(rig.homeDir!, GEMINI_DIR, 'state.json'),
|
||||||
|
JSON.stringify({ terminalSetupPromptShown: true }, null, 2),
|
||||||
|
);
|
||||||
|
|
||||||
// Start in default mode and ask to enter plan mode.
|
// Start in default mode and ask to enter plan mode.
|
||||||
await rig.run({
|
await rig.run({
|
||||||
approvalMode: 'default',
|
approvalMode: 'default',
|
||||||
|
|||||||
@@ -10,10 +10,10 @@ import * as crypto from 'node:crypto';
|
|||||||
import { fileURLToPath } from 'node:url';
|
import { fileURLToPath } from 'node:url';
|
||||||
import { Storage } from '../config/storage.js';
|
import { Storage } from '../config/storage.js';
|
||||||
import {
|
import {
|
||||||
|
ApprovalMode,
|
||||||
type PolicyEngineConfig,
|
type PolicyEngineConfig,
|
||||||
PolicyDecision,
|
PolicyDecision,
|
||||||
type PolicyRule,
|
type PolicyRule,
|
||||||
ApprovalMode,
|
|
||||||
type PolicySettings,
|
type PolicySettings,
|
||||||
type SafetyCheckerRule,
|
type SafetyCheckerRule,
|
||||||
} from './types.js';
|
} from './types.js';
|
||||||
|
|||||||
@@ -12,6 +12,7 @@ import { fileURLToPath } from 'node:url';
|
|||||||
import { env } from 'node:process';
|
import { env } from 'node:process';
|
||||||
import { setTimeout as sleep } from 'node:timers/promises';
|
import { setTimeout as sleep } from 'node:timers/promises';
|
||||||
import { DEFAULT_GEMINI_MODEL, GEMINI_DIR } from '@google/gemini-cli-core';
|
import { DEFAULT_GEMINI_MODEL, GEMINI_DIR } from '@google/gemini-cli-core';
|
||||||
|
export { GEMINI_DIR };
|
||||||
import * as pty from '@lydell/node-pty';
|
import * as pty from '@lydell/node-pty';
|
||||||
import stripAnsi from 'strip-ansi';
|
import stripAnsi from 'strip-ansi';
|
||||||
import * as os from 'node:os';
|
import * as os from 'node:os';
|
||||||
|
|||||||
Reference in New Issue
Block a user