diff --git a/evals/test-helper.ts b/evals/test-helper.ts index 2db9864464..c5fd09091b 100644 --- a/evals/test-helper.ts +++ b/evals/test-helper.ts @@ -59,7 +59,10 @@ export function evalTest(policy: EvalPolicy, evalCase: EvalCase) { execSync('git commit --allow-empty -m "Initial commit"', execOptions); } - const result = await rig.run({ args: evalCase.prompt }); + const result = await rig.run({ + args: evalCase.prompt, + approvalMode: evalCase.approvalMode ?? 'yolo', + }); const unauthorizedErrorPrefix = createUnauthorizedToolError('').split("'")[0]; @@ -91,6 +94,7 @@ export interface EvalCase { params?: Record; prompt: string; files?: Record; + approvalMode?: 'default' | 'auto_edit' | 'yolo' | 'plan'; assert: (rig: TestRig, result: string) => Promise; } diff --git a/integration-tests/hooks-system.test.ts b/integration-tests/hooks-system.test.ts index 1583f14e77..5526a4e758 100644 --- a/integration-tests/hooks-system.test.ts +++ b/integration-tests/hooks-system.test.ts @@ -626,7 +626,7 @@ console.log(JSON.stringify({ }, }); - const run = await rig.runInteractive({ yolo: false }); + const run = await rig.runInteractive({ approvalMode: 'default' }); // Send prompt that will trigger a permission request await run.type('Run the command "echo test"'); diff --git a/integration-tests/run_shell_command.test.ts b/integration-tests/run_shell_command.test.ts index 9ac0cf0533..1664870cfb 100644 --- a/integration-tests/run_shell_command.test.ts +++ b/integration-tests/run_shell_command.test.ts @@ -164,7 +164,7 @@ describe('run_shell_command', () => { const result = await rig.run({ args: [`--allowed-tools=run_shell_command(${tool})`], stdin: prompt, - yolo: false, + approvalMode: 'default', }); const foundToolCall = await rig.waitForToolCall('run_shell_command', 15000); @@ -207,7 +207,7 @@ describe('run_shell_command', () => { const result = await rig.run({ args: '--allowed-tools=run_shell_command', stdin: prompt, - yolo: false, + approvalMode: 'default', }); const foundToolCall = await rig.waitForToolCall('run_shell_command', 15000); @@ -231,8 +231,8 @@ describe('run_shell_command', () => { expect(toolCall.toolRequest.success).toBe(true); }); - it('should succeed with --yolo mode', async () => { - await rig.setup('should succeed with --yolo mode', { + it('should succeed in yolo mode', async () => { + await rig.setup('should succeed in yolo mode', { settings: { tools: { core: ['run_shell_command'] } }, }); @@ -242,7 +242,7 @@ describe('run_shell_command', () => { const result = await rig.run({ args: prompt, - yolo: true, + approvalMode: 'yolo', }); const foundToolCall = await rig.waitForToolCall('run_shell_command', 15000); @@ -276,7 +276,7 @@ describe('run_shell_command', () => { const result = await rig.run({ args: `--allowed-tools=ShellTool(${tool})`, stdin: prompt, - yolo: false, + approvalMode: 'default', }); const foundToolCall = await rig.waitForToolCall('run_shell_command', 15000); @@ -325,7 +325,7 @@ describe('run_shell_command', () => { '--allowed-tools=run_shell_command(ls)', ], stdin: prompt, - yolo: false, + approvalMode: 'default', }); for (const expected in ['ls', tool]) { @@ -377,7 +377,7 @@ describe('run_shell_command', () => { const result = await rig.run({ args: `--allowed-tools=run_shell_command(${allowedCommand})`, stdin: prompt, - yolo: false, + approvalMode: 'default', }); if (!result.toLowerCase().includes('fail')) { @@ -438,7 +438,7 @@ describe('run_shell_command', () => { await rig.run({ args: `--allowed-tools=ShellTool(${chained.allowPattern})`, stdin: `${shellInjection}\n`, - yolo: false, + approvalMode: 'default', }); // CLI should refuse to execute the chained command without scheduling run_shell_command. @@ -470,7 +470,7 @@ describe('run_shell_command', () => { '--allowed-tools=run_shell_command', ], stdin: prompt, - yolo: false, + approvalMode: 'default', }); const foundToolCall = await rig.waitForToolCall('run_shell_command', 15000); diff --git a/packages/test-utils/src/test-rig.ts b/packages/test-utils/src/test-rig.ts index 911146e0ff..e2b06b9609 100644 --- a/packages/test-utils/src/test-rig.ts +++ b/packages/test-utils/src/test-rig.ts @@ -400,14 +400,14 @@ export class TestRig { args?: string | string[]; stdin?: string; stdinDoesNotEnd?: boolean; - yolo?: boolean; + approvalMode?: 'default' | 'auto_edit' | 'yolo' | 'plan'; timeout?: number; env?: Record; }): Promise { - const yolo = options.yolo !== false; - const { command, initialArgs } = this._getCommandAndArgs( - yolo ? ['--yolo'] : [], - ); + const approvalMode = options.approvalMode ?? 'yolo'; + const { command, initialArgs } = this._getCommandAndArgs([ + `--approval-mode=${approvalMode}`, + ]); const commandArgs = [...initialArgs]; const execOptions: { cwd: string; @@ -1128,13 +1128,13 @@ export class TestRig { async runInteractive(options?: { args?: string | string[]; - yolo?: boolean; + approvalMode?: 'default' | 'auto_edit' | 'yolo' | 'plan'; env?: Record; }): Promise { - const yolo = options?.yolo !== false; - const { command, initialArgs } = this._getCommandAndArgs( - yolo ? ['--yolo'] : [], - ); + const approvalMode = options?.approvalMode ?? 'yolo'; + const { command, initialArgs } = this._getCommandAndArgs([ + `--approval-mode=${approvalMode}`, + ]); const commandArgs = [...initialArgs]; const envVars = {