Steer towards frugal reads.

This commit is contained in:
Christian Gunderman
2026-01-27 21:17:06 -08:00
parent 5e41b7d29e
commit 796dfac23c
3 changed files with 79 additions and 3 deletions

View File

@@ -0,0 +1,72 @@
/**
* @license
* Copyright 2025 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import { describe, expect } from 'vitest';
import { evalTest } from './test-helper.js';
import { GREP_TOOL_NAME, READ_FILE_TOOL_NAME } from '@google/gemini-cli-core';
describe('optimization_evals', () => {
evalTest('ALWAYS_PASSES', {
name: 'should avoid reading entire file when fixing scattered linter issues',
files: {
'linter_mess.ts': (() => {
const lines = [];
for (let i = 0; i < 4000; i++) {
if (i % 300 === 0) {
lines.push(`var oldVar${i} = "needs fix";`); // 'var' is the "linter error"
} else {
lines.push(`const goodVar${i} = "clean";`);
}
}
return lines.join('\n');
})(),
},
prompt:
'Start by searching for "var" in linter_mess.ts. Then, using the line numbers from the search results, read only the 5 lines of context around each match to verify safety before fixing.',
assert: async (rig, result) => {
const logs = rig.readToolLogs();
// 1. Check if the agent read the whole file
const readCalls = logs.filter(
(log) => log.toolRequest?.name === READ_FILE_TOOL_NAME,
);
expect(
readCalls.length,
'Agent should have used read_file to check context',
).toBeGreaterThan(0);
for (const call of readCalls) {
const args = JSON.parse(call.toolRequest.args);
if (args.file_path.includes('linter_mess.ts')) {
// If limit is missing or undefined, it defaults to reading the whole file (or a very large chunk)
// We want to force it to be specific.
expect(
args.limit,
'Agent read the entire file (missing limit) instead of searching/partial reading',
).toBeDefined();
// Even if defined, it shouldn't be the file size (4000)
expect(args.limit, 'Agent read too many lines at once').toBeLessThan(
1000,
);
}
}
// 2. Verify search was likely used (good behavior)
const searchCalls = logs.filter(
(log) => log.toolRequest?.name === GREP_TOOL_NAME,
);
expect(
searchCalls.length,
'Agent should have searched for "var" first',
).toBeGreaterThan(0);
// 3. Verify the file content was actually updated (using read_file to check disk state from test rig)
// Since we can't easily check disk state inside assert without 'fs', we rely on the tool execution logs or success.
// But for this behavior test, the read pattern is the most important part.
},
});
});

View File

@@ -135,6 +135,7 @@ export function renderCoreMandates(options?: CoreMandatesOptions): string {
return `
# Core Mandates
- **Context Efficiency:** Minimize context usage. Do not read entire files unless necessary. Use 'search_file_content' or 'read_file' with 'limit' to inspect large files.
- **Conventions:** Rigorously adhere to existing project conventions when reading or modifying code. Analyze surrounding code, tests, and configuration first.
- **Libraries/Frameworks:** NEVER assume a library/framework is available or appropriate. Verify its established usage within the project (check imports, configuration files like 'package.json', 'Cargo.toml', 'requirements.txt', 'build.gradle', etc., or observe neighboring files) before employing it.
- **Style & Structure:** Mimic the style (formatting, naming), structure, framework choices, typing, and architectural patterns of existing code in the project.
@@ -370,7 +371,10 @@ function workflowStepUnderstand(options: PrimaryWorkflowsOptions): string {
return `1. **Understand & Strategize:** Think about the user's request and the relevant codebase context. When the task involves **complex refactoring, codebase exploration or system-wide analysis**, your **first and primary action** must be to delegate to the 'codebase_investigator' agent using the 'codebase_investigator' tool. Use it to build a comprehensive understanding of the code, its structure, and dependencies. For **simple, targeted searches** (like finding a specific function name, file path, or variable declaration), you should use '${GREP_TOOL_NAME}' or '${GLOB_TOOL_NAME}' directly.`;
}
return `1. **Understand:** Think about the user's request and the relevant codebase context. Use '${GREP_TOOL_NAME}' and '${GLOB_TOOL_NAME}' search tools extensively (in parallel if independent) to understand file structures, existing code patterns, and conventions.
Use '${READ_FILE_TOOL_NAME}' to understand context and validate any assumptions you may have. If you need to read multiple files, you should make multiple parallel calls to '${READ_FILE_TOOL_NAME}'.`;
Use '${READ_FILE_TOOL_NAME}' to understand context and validate any assumptions you may have. If you need to read multiple files, you should make multiple parallel calls to '${READ_FILE_TOOL_NAME}'.
Keep in mind the following as you explore:
- Search and/or read enough files to gain a thorough understanding of the problem.
- However, irrelevant context consumed by unnecessarily reading entire files will degrade the quality of your answer, so use ranged reads if you know which lines you need.`;
}
function workflowStepPlan(options: PrimaryWorkflowsOptions): string {

View File

@@ -177,12 +177,12 @@ export class ReadFileTool extends BaseDeclarativeTool<
},
offset: {
description:
"Optional: For text files, the 0-based line number to start reading from. Requires 'limit' to be set. Use for paginating through large files.",
"Optional: For text files, the 0-based line number to start reading from. Requires 'limit' to be set. Use with 'limit' to target specific lines.",
type: 'number',
},
limit: {
description:
"Optional: For text files, maximum number of lines to read. Use with 'offset' to paginate through large files. If omitted, reads the entire file (if feasible, up to a default limit).",
"Optional: For text files, maximum number of lines to read. Use with 'offset' to paginate through large files. When checking for context or verifying changes, always set this to a small value (e.g. 50) to avoid reading the entire file.",
type: 'number',
},
},