Steer towards frugal reads.

2026-04-29 14:34:55 -07:00 · 2026-01-27 21:17:06 -08:00
parent 5e41b7d29e
commit 796dfac23c
3 changed files with 79 additions and 3 deletions
@@ -0,0 +1,72 @@
+/**
+ * @license
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import { describe, expect } from 'vitest';
+import { evalTest } from './test-helper.js';
+import { GREP_TOOL_NAME, READ_FILE_TOOL_NAME } from '@google/gemini-cli-core';
+
+describe('optimization_evals', () => {
+  evalTest('ALWAYS_PASSES', {
+    name: 'should avoid reading entire file when fixing scattered linter issues',
+    files: {
+      'linter_mess.ts': (() => {
+        const lines = [];
+        for (let i = 0; i < 4000; i++) {
+          if (i % 300 === 0) {
+            lines.push(`var oldVar${i} = "needs fix";`); // 'var' is the "linter error"
+          } else {
+            lines.push(`const goodVar${i} = "clean";`);
+          }
+        }
+        return lines.join('\n');
+      })(),
+    },
+    prompt:
+      'Start by searching for "var" in linter_mess.ts. Then, using the line numbers from the search results, read only the 5 lines of context around each match to verify safety before fixing.',
+    assert: async (rig, result) => {
+      const logs = rig.readToolLogs();
+
+      // 1. Check if the agent read the whole file
+      const readCalls = logs.filter(
+        (log) => log.toolRequest?.name === READ_FILE_TOOL_NAME,
+      );
+      expect(
+        readCalls.length,
+        'Agent should have used read_file to check context',
+      ).toBeGreaterThan(0);
+
+      for (const call of readCalls) {
+        const args = JSON.parse(call.toolRequest.args);
+        if (args.file_path.includes('linter_mess.ts')) {
+          // If limit is missing or undefined, it defaults to reading the whole file (or a very large chunk)
+          // We want to force it to be specific.
+          expect(
+            args.limit,
+            'Agent read the entire file (missing limit) instead of searching/partial reading',
+          ).toBeDefined();
+
+          // Even if defined, it shouldn't be the file size (4000)
+          expect(args.limit, 'Agent read too many lines at once').toBeLessThan(
+            1000,
+          );
+        }
+      }
+
+      // 2. Verify search was likely used (good behavior)
+      const searchCalls = logs.filter(
+        (log) => log.toolRequest?.name === GREP_TOOL_NAME,
+      );
+      expect(
+        searchCalls.length,
+        'Agent should have searched for "var" first',
+      ).toBeGreaterThan(0);
+
+      // 3. Verify the file content was actually updated (using read_file to check disk state from test rig)
+      // Since we can't easily check disk state inside assert without 'fs', we rely on the tool execution logs or success.
+      // But for this behavior test, the read pattern is the most important part.
+    },
+  });
+});
@@ -135,6 +135,7 @@ export function renderCoreMandates(options?: CoreMandatesOptions): string {
  return `
 # Core Mandates

+- **Context Efficiency:** Minimize context usage. Do not read entire files unless necessary. Use 'search_file_content' or 'read_file' with 'limit' to inspect large files.
 - **Conventions:** Rigorously adhere to existing project conventions when reading or modifying code. Analyze surrounding code, tests, and configuration first.
 - **Libraries/Frameworks:** NEVER assume a library/framework is available or appropriate. Verify its established usage within the project (check imports, configuration files like 'package.json', 'Cargo.toml', 'requirements.txt', 'build.gradle', etc., or observe neighboring files) before employing it.
 - **Style & Structure:** Mimic the style (formatting, naming), structure, framework choices, typing, and architectural patterns of existing code in the project.
@@ -370,7 +371,10 @@ function workflowStepUnderstand(options: PrimaryWorkflowsOptions): string {
    return `1. **Understand & Strategize:** Think about the user's request and the relevant codebase context. When the task involves **complex refactoring, codebase exploration or system-wide analysis**, your **first and primary action** must be to delegate to the 'codebase_investigator' agent using the 'codebase_investigator' tool. Use it to build a comprehensive understanding of the code, its structure, and dependencies. For **simple, targeted searches** (like finding a specific function name, file path, or variable declaration), you should use '${GREP_TOOL_NAME}' or '${GLOB_TOOL_NAME}' directly.`;
  }
  return `1. **Understand:** Think about the user's request and the relevant codebase context. Use '${GREP_TOOL_NAME}' and '${GLOB_TOOL_NAME}' search tools extensively (in parallel if independent) to understand file structures, existing code patterns, and conventions.
-Use '${READ_FILE_TOOL_NAME}' to understand context and validate any assumptions you may have. If you need to read multiple files, you should make multiple parallel calls to '${READ_FILE_TOOL_NAME}'.`;
+Use '${READ_FILE_TOOL_NAME}' to understand context and validate any assumptions you may have. If you need to read multiple files, you should make multiple parallel calls to '${READ_FILE_TOOL_NAME}'.
+Keep in mind the following as you explore:
+- Search and/or read enough files to gain a thorough understanding of the problem.
+- However, irrelevant context consumed by unnecessarily reading entire files will degrade the quality of your answer, so use ranged reads if you know which lines you need.`;
 }

 function workflowStepPlan(options: PrimaryWorkflowsOptions): string {
@@ -177,12 +177,12 @@ export class ReadFileTool extends BaseDeclarativeTool<
          },
          offset: {
            description:
-              "Optional: For text files, the 0-based line number to start reading from. Requires 'limit' to be set. Use for paginating through large files.",
+              "Optional: For text files, the 0-based line number to start reading from. Requires 'limit' to be set. Use with 'limit' to target specific lines.",
            type: 'number',
          },
          limit: {
            description:
-              "Optional: For text files, maximum number of lines to read. Use with 'offset' to paginate through large files. If omitted, reads the entire file (if feasible, up to a default limit).",
+              "Optional: For text files, maximum number of lines to read. Use with 'offset' to paginate through large files. When checking for context or verifying changes, always set this to a small value (e.g. 50) to avoid reading the entire file.",
            type: 'number',
          },
        },