feat(plan): refactor TestRig and eval helper to support configurable approval modes (#17171)

2026-05-14 05:42:54 -07:00 · 2026-01-21 10:43:48 -05:00
parent 0605e6e3e9
commit c21c297133
4 changed files with 26 additions and 22 deletions
@@ -59,7 +59,10 @@ export function evalTest(policy: EvalPolicy, evalCase: EvalCase) {
        execSync('git commit --allow-empty -m "Initial commit"', execOptions);
      }

-      const result = await rig.run({ args: evalCase.prompt });
+      const result = await rig.run({
+        args: evalCase.prompt,
+        approvalMode: evalCase.approvalMode ?? 'yolo',
+      });

      const unauthorizedErrorPrefix =
        createUnauthorizedToolError('').split("'")[0];
@@ -91,6 +94,7 @@ export interface EvalCase {
  params?: Record<string, any>;
  prompt: string;
  files?: Record<string, string>;
+  approvalMode?: 'default' | 'auto_edit' | 'yolo' | 'plan';
  assert: (rig: TestRig, result: string) => Promise<void>;
 }

@@ -626,7 +626,7 @@ console.log(JSON.stringify({
        },
      });

-      const run = await rig.runInteractive({ yolo: false });
+      const run = await rig.runInteractive({ approvalMode: 'default' });

      // Send prompt that will trigger a permission request
      await run.type('Run the command "echo test"');
@@ -164,7 +164,7 @@ describe('run_shell_command', () => {
    const result = await rig.run({
      args: [`--allowed-tools=run_shell_command(${tool})`],
      stdin: prompt,
-      yolo: false,
+      approvalMode: 'default',
    });

    const foundToolCall = await rig.waitForToolCall('run_shell_command', 15000);
@@ -207,7 +207,7 @@ describe('run_shell_command', () => {
    const result = await rig.run({
      args: '--allowed-tools=run_shell_command',
      stdin: prompt,
-      yolo: false,
+      approvalMode: 'default',
    });

    const foundToolCall = await rig.waitForToolCall('run_shell_command', 15000);
@@ -231,8 +231,8 @@ describe('run_shell_command', () => {
    expect(toolCall.toolRequest.success).toBe(true);
  });

-  it('should succeed with --yolo mode', async () => {
-    await rig.setup('should succeed with --yolo mode', {
+  it('should succeed in yolo mode', async () => {
+    await rig.setup('should succeed in yolo mode', {
      settings: { tools: { core: ['run_shell_command'] } },
    });

@@ -242,7 +242,7 @@ describe('run_shell_command', () => {

    const result = await rig.run({
      args: prompt,
-      yolo: true,
+      approvalMode: 'yolo',
    });

    const foundToolCall = await rig.waitForToolCall('run_shell_command', 15000);
@@ -276,7 +276,7 @@ describe('run_shell_command', () => {
    const result = await rig.run({
      args: `--allowed-tools=ShellTool(${tool})`,
      stdin: prompt,
-      yolo: false,
+      approvalMode: 'default',
    });

    const foundToolCall = await rig.waitForToolCall('run_shell_command', 15000);
@@ -325,7 +325,7 @@ describe('run_shell_command', () => {
        '--allowed-tools=run_shell_command(ls)',
      ],
      stdin: prompt,
-      yolo: false,
+      approvalMode: 'default',
    });

    for (const expected in ['ls', tool]) {
@@ -377,7 +377,7 @@ describe('run_shell_command', () => {
    const result = await rig.run({
      args: `--allowed-tools=run_shell_command(${allowedCommand})`,
      stdin: prompt,
-      yolo: false,
+      approvalMode: 'default',
    });

    if (!result.toLowerCase().includes('fail')) {
@@ -438,7 +438,7 @@ describe('run_shell_command', () => {
    await rig.run({
      args: `--allowed-tools=ShellTool(${chained.allowPattern})`,
      stdin: `${shellInjection}\n`,
-      yolo: false,
+      approvalMode: 'default',
    });

    // CLI should refuse to execute the chained command without scheduling run_shell_command.
@@ -470,7 +470,7 @@ describe('run_shell_command', () => {
        '--allowed-tools=run_shell_command',
      ],
      stdin: prompt,
-      yolo: false,
+      approvalMode: 'default',
    });

    const foundToolCall = await rig.waitForToolCall('run_shell_command', 15000);
@@ -400,14 +400,14 @@ export class TestRig {
    args?: string | string[];
    stdin?: string;
    stdinDoesNotEnd?: boolean;
-    yolo?: boolean;
+    approvalMode?: 'default' | 'auto_edit' | 'yolo' | 'plan';
    timeout?: number;
    env?: Record<string, string | undefined>;
  }): Promise<string> {
-    const yolo = options.yolo !== false;
-    const { command, initialArgs } = this._getCommandAndArgs(
-      yolo ? ['--yolo'] : [],
-    );
+    const approvalMode = options.approvalMode ?? 'yolo';
+    const { command, initialArgs } = this._getCommandAndArgs([
+      `--approval-mode=${approvalMode}`,
+    ]);
    const commandArgs = [...initialArgs];
    const execOptions: {
      cwd: string;
@@ -1128,13 +1128,13 @@ export class TestRig {

  async runInteractive(options?: {
    args?: string | string[];
-    yolo?: boolean;
+    approvalMode?: 'default' | 'auto_edit' | 'yolo' | 'plan';
    env?: Record<string, string | undefined>;
  }): Promise<InteractiveRun> {
-    const yolo = options?.yolo !== false;
-    const { command, initialArgs } = this._getCommandAndArgs(
-      yolo ? ['--yolo'] : [],
-    );
+    const approvalMode = options?.approvalMode ?? 'yolo';
+    const { command, initialArgs } = this._getCommandAndArgs([
+      `--approval-mode=${approvalMode}`,
+    ]);
    const commandArgs = [...initialArgs];

    const envVars = {