Stabilize tests. (#20095)

This commit is contained in:
Christian Gunderman
2026-02-24 00:01:39 +00:00
committed by GitHub
parent 2ff7738b5d
commit 56c8d7e985
3 changed files with 16 additions and 15 deletions

View File

@@ -78,22 +78,23 @@ describe('Frugal reads eval', () => {
).toBe(true);
let totalLinesRead = 0;
const readRanges: { offset: number; limit: number }[] = [];
const readRanges: { start_line: number; end_line: number }[] = [];
for (const call of targetFileReads) {
const args = JSON.parse(call.toolRequest.args);
expect(
args.limit,
'Agent read the entire file (missing limit) instead of using ranged read',
args.end_line,
'Agent read the entire file (missing end_line) instead of using ranged read',
).toBeDefined();
const limit = args.limit;
const offset = args.offset ?? 0;
totalLinesRead += limit;
readRanges.push({ offset, limit });
const end_line = args.end_line;
const start_line = args.start_line ?? 1;
const linesRead = end_line - start_line + 1;
totalLinesRead += linesRead;
readRanges.push({ start_line, end_line });
expect(args.limit, 'Agent read too many lines at once').toBeLessThan(
expect(linesRead, 'Agent read too many lines at once').toBeLessThan(
1001,
);
}
@@ -108,7 +109,7 @@ describe('Frugal reads eval', () => {
const errorLines = [500, 510, 520];
for (const line of errorLines) {
const covered = readRanges.some(
(range) => line >= range.offset && line < range.offset + range.limit,
(range) => line >= range.start_line && line <= range.end_line,
);
expect(covered, `Agent should have read around line ${line}`).toBe(
true,
@@ -191,8 +192,8 @@ describe('Frugal reads eval', () => {
for (const call of targetFileReads) {
const args = JSON.parse(call.toolRequest.args);
expect(
args.limit,
'Agent should have used ranged read (limit) to save tokens',
args.end_line,
'Agent should have used ranged read (end_line) to save tokens',
).toBeDefined();
}
},
@@ -253,7 +254,7 @@ describe('Frugal reads eval', () => {
// and just read the whole file to be efficient with tool calls.
const readEntireFile = targetFileReads.some((call) => {
const args = JSON.parse(call.toolRequest.args);
return args.limit === undefined;
return args.end_line === undefined;
});
expect(

View File

@@ -68,7 +68,7 @@ describe('Frugal Search', () => {
const args = getParams(call);
return (
args.file_path === 'src/legacy_processor.ts' &&
(args.limit === undefined || args.limit === null)
(args.end_line === undefined || args.end_line === null)
);
});
@@ -87,7 +87,7 @@ describe('Frugal Search', () => {
if (
call.toolRequest.name === 'read_file' &&
args.file_path === 'src/legacy_processor.ts' &&
args.limit !== undefined
args.end_line !== undefined
) {
return true;
}

View File

@@ -56,7 +56,7 @@ describe('interactive_commands', () => {
const scaffoldCall = logs.find(
(l) =>
l.toolRequest.name === 'run_shell_command' &&
/npm (init|create)|npx create-|yarn create|pnpm create/.test(
/npm (init|create)|npx (.*)?create-|yarn create|pnpm create/.test(
l.toolRequest.args,
),
);