Fix issue where agent gets stuck at interactive commands. (#18272)

2026-03-10 14:10:37 -07:00 · 2026-02-04 07:02:09 +00:00
parent b39cefe14e
commit a0b6602d09
4 changed files with 73 additions and 18 deletions
--- a/evals/interactive-hang.eval.ts
+++ b/evals/interactive-hang.eval.ts
@@ -0,0 +1,47 @@
+import { describe, expect } from 'vitest';
+import { evalTest } from './test-helper.js';
+
+describe('interactive_commands', () => {
+  /**
+   * Validates that the agent does not use interactive commands unprompted.
+   * Interactive commands block the progress of the agent, requiring user
+   * intervention.
+   */
+  evalTest('USUALLY_PASSES', {
+    name: 'should not use interactive commands',
+    prompt: 'Execute tests.',
+    files: {
+      'package.json': JSON.stringify(
+        {
+          name: 'example',
+          type: 'module',
+          devDependencies: {
+            vitest: 'latest',
+          },
+        },
+        null,
+        2,
+      ),
+      'example.test.js': `
+        import { test, expect } from 'vitest';
+        test('it works', () => {
+          expect(1 + 1).toBe(2);
+        });
+      `,
+    },
+    assert: async (rig, result) => {
+      const logs = rig.readToolLogs();
+      const vitestCall = logs.find(
+        (l) =>
+          l.toolRequest.name === 'run_shell_command' &&
+          l.toolRequest.args.toLowerCase().includes('vitest'),
+      );
+
+      expect(vitestCall, 'Agent should have called vitest').toBeDefined();
+      expect(
+        vitestCall?.toolRequest.args,
+        'Agent should have passed run arg',
+      ).toMatch(/\b(run|--run)\b/);
+    },
+  });
+});
--- a/evals/test-helper.ts
+++ b/evals/test-helper.ts
@@ -45,6 +45,14 @@ export function evalTest(policy: EvalPolicy, evalCase: EvalCase) {
    try {
      rig.setup(evalCase.name, evalCase.params);

+      // Symlink node modules to reduce the amount of time needed to
+      // bootstrap test projects.
+      const rootNodeModules = path.join(process.cwd(), 'node_modules');
+      const testNodeModules = path.join(rig.testDir || '', 'node_modules');
+      if (fs.existsSync(rootNodeModules)) {
+        fs.symlinkSync(rootNodeModules, testNodeModules, 'dir');
+      }
+
      if (evalCase.files) {
        const acknowledgedAgents: Record<string, Record<string, string>> = {};
        const projectRoot = fs.realpathSync(rig.testDir!);