mirror of
https://github.com/google-gemini/gemini-cli.git
synced 2026-03-14 16:10:59 -07:00
test: add eval case for implicit ranged reads
This commit is contained in:
@@ -69,4 +69,63 @@ describe('optimization_evals', () => {
|
||||
// But for this behavior test, the read pattern is the most important part.
|
||||
},
|
||||
});
|
||||
|
||||
evalTest('ALWAYS_PASSES', {
|
||||
name: 'should use ranged read when specific line is targeted',
|
||||
files: {
|
||||
'linter_mess.ts': (() => {
|
||||
const lines = [];
|
||||
for (let i = 0; i < 4000; i++) {
|
||||
if (i === 3000) {
|
||||
lines.push(`var oldVar${i} = "needs fix";`);
|
||||
} else {
|
||||
lines.push(`const goodVar${i} = "clean";`);
|
||||
}
|
||||
}
|
||||
return lines.join('\n');
|
||||
})(),
|
||||
},
|
||||
prompt:
|
||||
'Fix the linter error on line 3000 of linter_mess.ts. The error is the use of "var".',
|
||||
assert: async (rig, result) => {
|
||||
const logs = rig.readToolLogs();
|
||||
|
||||
// Check if the agent read the whole file
|
||||
const readCalls = logs.filter(
|
||||
(log) => log.toolRequest?.name === READ_FILE_TOOL_NAME,
|
||||
);
|
||||
expect(
|
||||
readCalls.length,
|
||||
'Agent should have used read_file to check context',
|
||||
).toBeGreaterThan(0);
|
||||
|
||||
for (const call of readCalls) {
|
||||
const args = JSON.parse(call.toolRequest.args);
|
||||
if (args.file_path.includes('linter_mess.ts')) {
|
||||
expect(
|
||||
args.limit,
|
||||
'Agent read the entire file (missing limit) instead of using ranged read',
|
||||
).toBeDefined();
|
||||
|
||||
expect(args.limit, 'Agent read too many lines at once').toBeLessThan(
|
||||
1000,
|
||||
);
|
||||
|
||||
// Since the error is at line 3000, efficient reading implies using an offset
|
||||
// unless they grep first (which is also fine, but less direct if line is known).
|
||||
// However, strict ranged read usually means offset is set.
|
||||
// We'll allow either grep OR offset usage.
|
||||
const hasOffset = args.offset !== undefined && args.offset > 2000;
|
||||
|
||||
// If they didn't use offset, they better have searched first or read a small chunk?
|
||||
// Actually if they read line 0-1000, they won't find line 3000.
|
||||
// So if they read the file, they MUST have used offset to see line 3000 without reading the whole thing.
|
||||
expect(
|
||||
hasOffset,
|
||||
'Agent should use offset to read around line 3000',
|
||||
).toBe(true);
|
||||
}
|
||||
}
|
||||
},
|
||||
});
|
||||
});
|
||||
|
||||
Reference in New Issue
Block a user