mirror of
https://github.com/google-gemini/gemini-cli.git
synced 2026-03-10 14:10:37 -07:00
feat(plan): refactor TestRig and eval helper to support configurable approval modes (#17171)
This commit is contained in:
@@ -59,7 +59,10 @@ export function evalTest(policy: EvalPolicy, evalCase: EvalCase) {
|
||||
execSync('git commit --allow-empty -m "Initial commit"', execOptions);
|
||||
}
|
||||
|
||||
const result = await rig.run({ args: evalCase.prompt });
|
||||
const result = await rig.run({
|
||||
args: evalCase.prompt,
|
||||
approvalMode: evalCase.approvalMode ?? 'yolo',
|
||||
});
|
||||
|
||||
const unauthorizedErrorPrefix =
|
||||
createUnauthorizedToolError('').split("'")[0];
|
||||
@@ -91,6 +94,7 @@ export interface EvalCase {
|
||||
params?: Record<string, any>;
|
||||
prompt: string;
|
||||
files?: Record<string, string>;
|
||||
approvalMode?: 'default' | 'auto_edit' | 'yolo' | 'plan';
|
||||
assert: (rig: TestRig, result: string) => Promise<void>;
|
||||
}
|
||||
|
||||
|
||||
@@ -626,7 +626,7 @@ console.log(JSON.stringify({
|
||||
},
|
||||
});
|
||||
|
||||
const run = await rig.runInteractive({ yolo: false });
|
||||
const run = await rig.runInteractive({ approvalMode: 'default' });
|
||||
|
||||
// Send prompt that will trigger a permission request
|
||||
await run.type('Run the command "echo test"');
|
||||
|
||||
@@ -164,7 +164,7 @@ describe('run_shell_command', () => {
|
||||
const result = await rig.run({
|
||||
args: [`--allowed-tools=run_shell_command(${tool})`],
|
||||
stdin: prompt,
|
||||
yolo: false,
|
||||
approvalMode: 'default',
|
||||
});
|
||||
|
||||
const foundToolCall = await rig.waitForToolCall('run_shell_command', 15000);
|
||||
@@ -207,7 +207,7 @@ describe('run_shell_command', () => {
|
||||
const result = await rig.run({
|
||||
args: '--allowed-tools=run_shell_command',
|
||||
stdin: prompt,
|
||||
yolo: false,
|
||||
approvalMode: 'default',
|
||||
});
|
||||
|
||||
const foundToolCall = await rig.waitForToolCall('run_shell_command', 15000);
|
||||
@@ -231,8 +231,8 @@ describe('run_shell_command', () => {
|
||||
expect(toolCall.toolRequest.success).toBe(true);
|
||||
});
|
||||
|
||||
it('should succeed with --yolo mode', async () => {
|
||||
await rig.setup('should succeed with --yolo mode', {
|
||||
it('should succeed in yolo mode', async () => {
|
||||
await rig.setup('should succeed in yolo mode', {
|
||||
settings: { tools: { core: ['run_shell_command'] } },
|
||||
});
|
||||
|
||||
@@ -242,7 +242,7 @@ describe('run_shell_command', () => {
|
||||
|
||||
const result = await rig.run({
|
||||
args: prompt,
|
||||
yolo: true,
|
||||
approvalMode: 'yolo',
|
||||
});
|
||||
|
||||
const foundToolCall = await rig.waitForToolCall('run_shell_command', 15000);
|
||||
@@ -276,7 +276,7 @@ describe('run_shell_command', () => {
|
||||
const result = await rig.run({
|
||||
args: `--allowed-tools=ShellTool(${tool})`,
|
||||
stdin: prompt,
|
||||
yolo: false,
|
||||
approvalMode: 'default',
|
||||
});
|
||||
|
||||
const foundToolCall = await rig.waitForToolCall('run_shell_command', 15000);
|
||||
@@ -325,7 +325,7 @@ describe('run_shell_command', () => {
|
||||
'--allowed-tools=run_shell_command(ls)',
|
||||
],
|
||||
stdin: prompt,
|
||||
yolo: false,
|
||||
approvalMode: 'default',
|
||||
});
|
||||
|
||||
for (const expected in ['ls', tool]) {
|
||||
@@ -377,7 +377,7 @@ describe('run_shell_command', () => {
|
||||
const result = await rig.run({
|
||||
args: `--allowed-tools=run_shell_command(${allowedCommand})`,
|
||||
stdin: prompt,
|
||||
yolo: false,
|
||||
approvalMode: 'default',
|
||||
});
|
||||
|
||||
if (!result.toLowerCase().includes('fail')) {
|
||||
@@ -438,7 +438,7 @@ describe('run_shell_command', () => {
|
||||
await rig.run({
|
||||
args: `--allowed-tools=ShellTool(${chained.allowPattern})`,
|
||||
stdin: `${shellInjection}\n`,
|
||||
yolo: false,
|
||||
approvalMode: 'default',
|
||||
});
|
||||
|
||||
// CLI should refuse to execute the chained command without scheduling run_shell_command.
|
||||
@@ -470,7 +470,7 @@ describe('run_shell_command', () => {
|
||||
'--allowed-tools=run_shell_command',
|
||||
],
|
||||
stdin: prompt,
|
||||
yolo: false,
|
||||
approvalMode: 'default',
|
||||
});
|
||||
|
||||
const foundToolCall = await rig.waitForToolCall('run_shell_command', 15000);
|
||||
|
||||
@@ -400,14 +400,14 @@ export class TestRig {
|
||||
args?: string | string[];
|
||||
stdin?: string;
|
||||
stdinDoesNotEnd?: boolean;
|
||||
yolo?: boolean;
|
||||
approvalMode?: 'default' | 'auto_edit' | 'yolo' | 'plan';
|
||||
timeout?: number;
|
||||
env?: Record<string, string | undefined>;
|
||||
}): Promise<string> {
|
||||
const yolo = options.yolo !== false;
|
||||
const { command, initialArgs } = this._getCommandAndArgs(
|
||||
yolo ? ['--yolo'] : [],
|
||||
);
|
||||
const approvalMode = options.approvalMode ?? 'yolo';
|
||||
const { command, initialArgs } = this._getCommandAndArgs([
|
||||
`--approval-mode=${approvalMode}`,
|
||||
]);
|
||||
const commandArgs = [...initialArgs];
|
||||
const execOptions: {
|
||||
cwd: string;
|
||||
@@ -1128,13 +1128,13 @@ export class TestRig {
|
||||
|
||||
async runInteractive(options?: {
|
||||
args?: string | string[];
|
||||
yolo?: boolean;
|
||||
approvalMode?: 'default' | 'auto_edit' | 'yolo' | 'plan';
|
||||
env?: Record<string, string | undefined>;
|
||||
}): Promise<InteractiveRun> {
|
||||
const yolo = options?.yolo !== false;
|
||||
const { command, initialArgs } = this._getCommandAndArgs(
|
||||
yolo ? ['--yolo'] : [],
|
||||
);
|
||||
const approvalMode = options?.approvalMode ?? 'yolo';
|
||||
const { command, initialArgs } = this._getCommandAndArgs([
|
||||
`--approval-mode=${approvalMode}`,
|
||||
]);
|
||||
const commandArgs = [...initialArgs];
|
||||
|
||||
const envVars = {
|
||||
|
||||
Reference in New Issue
Block a user