Stabilize tests. (#20095)

2026-04-29 22:44:45 -07:00 · 2026-02-24 00:01:39 +00:00
parent 2ff7738b5d
commit 56c8d7e985
3 changed files with 16 additions and 15 deletions
@@ -78,22 +78,23 @@ describe('Frugal reads eval', () => {
      ).toBe(true);

      let totalLinesRead = 0;
-      const readRanges: { offset: number; limit: number }[] = [];
+      const readRanges: { start_line: number; end_line: number }[] = [];

      for (const call of targetFileReads) {
        const args = JSON.parse(call.toolRequest.args);

        expect(
-          args.limit,
-          'Agent read the entire file (missing limit) instead of using ranged read',
+          args.end_line,
+          'Agent read the entire file (missing end_line) instead of using ranged read',
        ).toBeDefined();

-        const limit = args.limit;
-        const offset = args.offset ?? 0;
-        totalLinesRead += limit;
-        readRanges.push({ offset, limit });
+        const end_line = args.end_line;
+        const start_line = args.start_line ?? 1;
+        const linesRead = end_line - start_line + 1;
+        totalLinesRead += linesRead;
+        readRanges.push({ start_line, end_line });

-        expect(args.limit, 'Agent read too many lines at once').toBeLessThan(
+        expect(linesRead, 'Agent read too many lines at once').toBeLessThan(
          1001,
        );
      }
@@ -108,7 +109,7 @@ describe('Frugal reads eval', () => {
      const errorLines = [500, 510, 520];
      for (const line of errorLines) {
        const covered = readRanges.some(
-          (range) => line >= range.offset && line < range.offset + range.limit,
+          (range) => line >= range.start_line && line <= range.end_line,
        );
        expect(covered, `Agent should have read around line ${line}`).toBe(
          true,
@@ -191,8 +192,8 @@ describe('Frugal reads eval', () => {
      for (const call of targetFileReads) {
        const args = JSON.parse(call.toolRequest.args);
        expect(
-          args.limit,
-          'Agent should have used ranged read (limit) to save tokens',
+          args.end_line,
+          'Agent should have used ranged read (end_line) to save tokens',
        ).toBeDefined();
      }
    },
@@ -253,7 +254,7 @@ describe('Frugal reads eval', () => {
      // and just read the whole file to be efficient with tool calls.
      const readEntireFile = targetFileReads.some((call) => {
        const args = JSON.parse(call.toolRequest.args);
-        return args.limit === undefined;
+        return args.end_line === undefined;
      });

      expect(
@@ -68,7 +68,7 @@ describe('Frugal Search', () => {
        const args = getParams(call);
        return (
          args.file_path === 'src/legacy_processor.ts' &&
-          (args.limit === undefined || args.limit === null)
+          (args.end_line === undefined || args.end_line === null)
        );
      });

@@ -87,7 +87,7 @@ describe('Frugal Search', () => {
        if (
          call.toolRequest.name === 'read_file' &&
          args.file_path === 'src/legacy_processor.ts' &&
-          args.limit !== undefined
+          args.end_line !== undefined
        ) {
          return true;
        }
@@ -56,7 +56,7 @@ describe('interactive_commands', () => {
      const scaffoldCall = logs.find(
        (l) =>
          l.toolRequest.name === 'run_shell_command' &&
-          /npm (init|create)|npx create-|yarn create|pnpm create/.test(
+          /npm (init|create)|npx (.*)?create-|yarn create|pnpm create/.test(
            l.toolRequest.args,
          ),
      );