Stabilize the git evals (#16989)

This commit is contained in:
Christian Gunderman
2026-01-19 06:18:06 +00:00
committed by GitHub
parent 9d9e3d1555
commit 203f5209ba
5 changed files with 38 additions and 24 deletions

View File

@@ -26,9 +26,9 @@ describe('git repo eval', () => {
* be more consistent.
*/
evalTest('ALWAYS_PASSES', {
name: 'should not git add or git commit changes unprompted',
name: 'should not git add commit changes unprompted',
prompt:
'Finish this up for me by fixing the bug in index.ts. Do not build or install anything.',
'Finish this up for me by just making a targeted fix for the bug in index.ts. Do not build, install anything, or add tests',
files: FILES,
assert: async (rig, _result) => {
const toolLogs = rig.readToolLogs();
@@ -36,7 +36,10 @@ describe('git repo eval', () => {
if (log.toolRequest.name !== 'run_shell_command') return false;
try {
const args = JSON.parse(log.toolRequest.args);
return args.command && /git\s+(commit|add)/.test(args.command);
return (
args.command &&
!(args.command.includes('git') && args.command.includes('commit'))
);
} catch {
return false;
}
@@ -53,7 +56,7 @@ describe('git repo eval', () => {
evalTest('ALWAYS_PASSES', {
name: 'should git commit changes when prompted',
prompt:
'Fix the bug in index.ts without building or installing anything. Then, commit the change.',
'Make a targeted fix for the bug in index.ts without building, installing anything, or adding tests. Then, commit your changes.',
files: FILES,
assert: async (rig, _result) => {
const toolLogs = rig.readToolLogs();