From 796dfac23c8dd036a7660bda704707b5ac3ac9a6 Mon Sep 17 00:00:00 2001 From: Christian Gunderman Date: Tue, 27 Jan 2026 21:17:06 -0800 Subject: [PATCH] Steer towards frugal reads. --- evals/read_file_optimization.eval.ts | 72 +++++++++++++++++++++++++++ packages/core/src/prompts/snippets.ts | 6 ++- packages/core/src/tools/read-file.ts | 4 +- 3 files changed, 79 insertions(+), 3 deletions(-) create mode 100644 evals/read_file_optimization.eval.ts diff --git a/evals/read_file_optimization.eval.ts b/evals/read_file_optimization.eval.ts new file mode 100644 index 0000000000..109736cc48 --- /dev/null +++ b/evals/read_file_optimization.eval.ts @@ -0,0 +1,72 @@ +/** + * @license + * Copyright 2025 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { describe, expect } from 'vitest'; +import { evalTest } from './test-helper.js'; +import { GREP_TOOL_NAME, READ_FILE_TOOL_NAME } from '@google/gemini-cli-core'; + +describe('optimization_evals', () => { + evalTest('ALWAYS_PASSES', { + name: 'should avoid reading entire file when fixing scattered linter issues', + files: { + 'linter_mess.ts': (() => { + const lines = []; + for (let i = 0; i < 4000; i++) { + if (i % 300 === 0) { + lines.push(`var oldVar${i} = "needs fix";`); // 'var' is the "linter error" + } else { + lines.push(`const goodVar${i} = "clean";`); + } + } + return lines.join('\n'); + })(), + }, + prompt: + 'Start by searching for "var" in linter_mess.ts. Then, using the line numbers from the search results, read only the 5 lines of context around each match to verify safety before fixing.', + assert: async (rig, result) => { + const logs = rig.readToolLogs(); + + // 1. Check if the agent read the whole file + const readCalls = logs.filter( + (log) => log.toolRequest?.name === READ_FILE_TOOL_NAME, + ); + expect( + readCalls.length, + 'Agent should have used read_file to check context', + ).toBeGreaterThan(0); + + for (const call of readCalls) { + const args = JSON.parse(call.toolRequest.args); + if (args.file_path.includes('linter_mess.ts')) { + // If limit is missing or undefined, it defaults to reading the whole file (or a very large chunk) + // We want to force it to be specific. + expect( + args.limit, + 'Agent read the entire file (missing limit) instead of searching/partial reading', + ).toBeDefined(); + + // Even if defined, it shouldn't be the file size (4000) + expect(args.limit, 'Agent read too many lines at once').toBeLessThan( + 1000, + ); + } + } + + // 2. Verify search was likely used (good behavior) + const searchCalls = logs.filter( + (log) => log.toolRequest?.name === GREP_TOOL_NAME, + ); + expect( + searchCalls.length, + 'Agent should have searched for "var" first', + ).toBeGreaterThan(0); + + // 3. Verify the file content was actually updated (using read_file to check disk state from test rig) + // Since we can't easily check disk state inside assert without 'fs', we rely on the tool execution logs or success. + // But for this behavior test, the read pattern is the most important part. + }, + }); +}); diff --git a/packages/core/src/prompts/snippets.ts b/packages/core/src/prompts/snippets.ts index 38ba82624e..0f790a6b20 100644 --- a/packages/core/src/prompts/snippets.ts +++ b/packages/core/src/prompts/snippets.ts @@ -135,6 +135,7 @@ export function renderCoreMandates(options?: CoreMandatesOptions): string { return ` # Core Mandates +- **Context Efficiency:** Minimize context usage. Do not read entire files unless necessary. Use 'search_file_content' or 'read_file' with 'limit' to inspect large files. - **Conventions:** Rigorously adhere to existing project conventions when reading or modifying code. Analyze surrounding code, tests, and configuration first. - **Libraries/Frameworks:** NEVER assume a library/framework is available or appropriate. Verify its established usage within the project (check imports, configuration files like 'package.json', 'Cargo.toml', 'requirements.txt', 'build.gradle', etc., or observe neighboring files) before employing it. - **Style & Structure:** Mimic the style (formatting, naming), structure, framework choices, typing, and architectural patterns of existing code in the project. @@ -370,7 +371,10 @@ function workflowStepUnderstand(options: PrimaryWorkflowsOptions): string { return `1. **Understand & Strategize:** Think about the user's request and the relevant codebase context. When the task involves **complex refactoring, codebase exploration or system-wide analysis**, your **first and primary action** must be to delegate to the 'codebase_investigator' agent using the 'codebase_investigator' tool. Use it to build a comprehensive understanding of the code, its structure, and dependencies. For **simple, targeted searches** (like finding a specific function name, file path, or variable declaration), you should use '${GREP_TOOL_NAME}' or '${GLOB_TOOL_NAME}' directly.`; } return `1. **Understand:** Think about the user's request and the relevant codebase context. Use '${GREP_TOOL_NAME}' and '${GLOB_TOOL_NAME}' search tools extensively (in parallel if independent) to understand file structures, existing code patterns, and conventions. -Use '${READ_FILE_TOOL_NAME}' to understand context and validate any assumptions you may have. If you need to read multiple files, you should make multiple parallel calls to '${READ_FILE_TOOL_NAME}'.`; +Use '${READ_FILE_TOOL_NAME}' to understand context and validate any assumptions you may have. If you need to read multiple files, you should make multiple parallel calls to '${READ_FILE_TOOL_NAME}'. +Keep in mind the following as you explore: +- Search and/or read enough files to gain a thorough understanding of the problem. +- However, irrelevant context consumed by unnecessarily reading entire files will degrade the quality of your answer, so use ranged reads if you know which lines you need.`; } function workflowStepPlan(options: PrimaryWorkflowsOptions): string { diff --git a/packages/core/src/tools/read-file.ts b/packages/core/src/tools/read-file.ts index d7a3684b51..983d8840e2 100644 --- a/packages/core/src/tools/read-file.ts +++ b/packages/core/src/tools/read-file.ts @@ -177,12 +177,12 @@ export class ReadFileTool extends BaseDeclarativeTool< }, offset: { description: - "Optional: For text files, the 0-based line number to start reading from. Requires 'limit' to be set. Use for paginating through large files.", + "Optional: For text files, the 0-based line number to start reading from. Requires 'limit' to be set. Use with 'limit' to target specific lines.", type: 'number', }, limit: { description: - "Optional: For text files, maximum number of lines to read. Use with 'offset' to paginate through large files. If omitted, reads the entire file (if feasible, up to a default limit).", + "Optional: For text files, maximum number of lines to read. Use with 'offset' to paginate through large files. When checking for context or verifying changes, always set this to a small value (e.g. 50) to avoid reading the entire file.", type: 'number', }, },