Fix issue where agent gets stuck at interactive commands. (#18272)

This commit is contained in:
Christian Gunderman
2026-02-04 07:02:09 +00:00
committed by GitHub
parent b39cefe14e
commit a0b6602d09
4 changed files with 73 additions and 18 deletions

View File

@@ -0,0 +1,47 @@
import { describe, expect } from 'vitest';
import { evalTest } from './test-helper.js';
describe('interactive_commands', () => {
/**
* Validates that the agent does not use interactive commands unprompted.
* Interactive commands block the progress of the agent, requiring user
* intervention.
*/
evalTest('USUALLY_PASSES', {
name: 'should not use interactive commands',
prompt: 'Execute tests.',
files: {
'package.json': JSON.stringify(
{
name: 'example',
type: 'module',
devDependencies: {
vitest: 'latest',
},
},
null,
2,
),
'example.test.js': `
import { test, expect } from 'vitest';
test('it works', () => {
expect(1 + 1).toBe(2);
});
`,
},
assert: async (rig, result) => {
const logs = rig.readToolLogs();
const vitestCall = logs.find(
(l) =>
l.toolRequest.name === 'run_shell_command' &&
l.toolRequest.args.toLowerCase().includes('vitest'),
);
expect(vitestCall, 'Agent should have called vitest').toBeDefined();
expect(
vitestCall?.toolRequest.args,
'Agent should have passed run arg',
).toMatch(/\b(run|--run)\b/);
},
});
});

View File

@@ -45,6 +45,14 @@ export function evalTest(policy: EvalPolicy, evalCase: EvalCase) {
try {
rig.setup(evalCase.name, evalCase.params);
// Symlink node modules to reduce the amount of time needed to
// bootstrap test projects.
const rootNodeModules = path.join(process.cwd(), 'node_modules');
const testNodeModules = path.join(rig.testDir || '', 'node_modules');
if (fs.existsSync(rootNodeModules)) {
fs.symlinkSync(rootNodeModules, testNodeModules, 'dir');
}
if (evalCase.files) {
const acknowledgedAgents: Record<string, Record<string, string>> = {};
const projectRoot = fs.realpathSync(rig.testDir!);