Stabilize tests. (#20095)

This commit is contained in:
Christian Gunderman
2026-02-24 00:01:39 +00:00
committed by GitHub
parent 2ff7738b5d
commit 56c8d7e985
3 changed files with 16 additions and 15 deletions

View File

@@ -78,22 +78,23 @@ describe('Frugal reads eval', () => {
).toBe(true); ).toBe(true);
let totalLinesRead = 0; let totalLinesRead = 0;
const readRanges: { offset: number; limit: number }[] = []; const readRanges: { start_line: number; end_line: number }[] = [];
for (const call of targetFileReads) { for (const call of targetFileReads) {
const args = JSON.parse(call.toolRequest.args); const args = JSON.parse(call.toolRequest.args);
expect( expect(
args.limit, args.end_line,
'Agent read the entire file (missing limit) instead of using ranged read', 'Agent read the entire file (missing end_line) instead of using ranged read',
).toBeDefined(); ).toBeDefined();
const limit = args.limit; const end_line = args.end_line;
const offset = args.offset ?? 0; const start_line = args.start_line ?? 1;
totalLinesRead += limit; const linesRead = end_line - start_line + 1;
readRanges.push({ offset, limit }); totalLinesRead += linesRead;
readRanges.push({ start_line, end_line });
expect(args.limit, 'Agent read too many lines at once').toBeLessThan( expect(linesRead, 'Agent read too many lines at once').toBeLessThan(
1001, 1001,
); );
} }
@@ -108,7 +109,7 @@ describe('Frugal reads eval', () => {
const errorLines = [500, 510, 520]; const errorLines = [500, 510, 520];
for (const line of errorLines) { for (const line of errorLines) {
const covered = readRanges.some( const covered = readRanges.some(
(range) => line >= range.offset && line < range.offset + range.limit, (range) => line >= range.start_line && line <= range.end_line,
); );
expect(covered, `Agent should have read around line ${line}`).toBe( expect(covered, `Agent should have read around line ${line}`).toBe(
true, true,
@@ -191,8 +192,8 @@ describe('Frugal reads eval', () => {
for (const call of targetFileReads) { for (const call of targetFileReads) {
const args = JSON.parse(call.toolRequest.args); const args = JSON.parse(call.toolRequest.args);
expect( expect(
args.limit, args.end_line,
'Agent should have used ranged read (limit) to save tokens', 'Agent should have used ranged read (end_line) to save tokens',
).toBeDefined(); ).toBeDefined();
} }
}, },
@@ -253,7 +254,7 @@ describe('Frugal reads eval', () => {
// and just read the whole file to be efficient with tool calls. // and just read the whole file to be efficient with tool calls.
const readEntireFile = targetFileReads.some((call) => { const readEntireFile = targetFileReads.some((call) => {
const args = JSON.parse(call.toolRequest.args); const args = JSON.parse(call.toolRequest.args);
return args.limit === undefined; return args.end_line === undefined;
}); });
expect( expect(

View File

@@ -68,7 +68,7 @@ describe('Frugal Search', () => {
const args = getParams(call); const args = getParams(call);
return ( return (
args.file_path === 'src/legacy_processor.ts' && args.file_path === 'src/legacy_processor.ts' &&
(args.limit === undefined || args.limit === null) (args.end_line === undefined || args.end_line === null)
); );
}); });
@@ -87,7 +87,7 @@ describe('Frugal Search', () => {
if ( if (
call.toolRequest.name === 'read_file' && call.toolRequest.name === 'read_file' &&
args.file_path === 'src/legacy_processor.ts' && args.file_path === 'src/legacy_processor.ts' &&
args.limit !== undefined args.end_line !== undefined
) { ) {
return true; return true;
} }

View File

@@ -56,7 +56,7 @@ describe('interactive_commands', () => {
const scaffoldCall = logs.find( const scaffoldCall = logs.find(
(l) => (l) =>
l.toolRequest.name === 'run_shell_command' && l.toolRequest.name === 'run_shell_command' &&
/npm (init|create)|npx create-|yarn create|pnpm create/.test( /npm (init|create)|npx (.*)?create-|yarn create|pnpm create/.test(
l.toolRequest.args, l.toolRequest.args,
), ),
); );