mirror of
https://github.com/google-gemini/gemini-cli.git
synced 2026-03-15 08:31:14 -07:00
Steer towards frugal reads.
This commit is contained in:
72
evals/read_file_optimization.eval.ts
Normal file
72
evals/read_file_optimization.eval.ts
Normal file
@@ -0,0 +1,72 @@
|
||||
/**
|
||||
* @license
|
||||
* Copyright 2025 Google LLC
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
import { describe, expect } from 'vitest';
|
||||
import { evalTest } from './test-helper.js';
|
||||
import { GREP_TOOL_NAME, READ_FILE_TOOL_NAME } from '@google/gemini-cli-core';
|
||||
|
||||
describe('optimization_evals', () => {
|
||||
evalTest('ALWAYS_PASSES', {
|
||||
name: 'should avoid reading entire file when fixing scattered linter issues',
|
||||
files: {
|
||||
'linter_mess.ts': (() => {
|
||||
const lines = [];
|
||||
for (let i = 0; i < 4000; i++) {
|
||||
if (i % 300 === 0) {
|
||||
lines.push(`var oldVar${i} = "needs fix";`); // 'var' is the "linter error"
|
||||
} else {
|
||||
lines.push(`const goodVar${i} = "clean";`);
|
||||
}
|
||||
}
|
||||
return lines.join('\n');
|
||||
})(),
|
||||
},
|
||||
prompt:
|
||||
'Start by searching for "var" in linter_mess.ts. Then, using the line numbers from the search results, read only the 5 lines of context around each match to verify safety before fixing.',
|
||||
assert: async (rig, result) => {
|
||||
const logs = rig.readToolLogs();
|
||||
|
||||
// 1. Check if the agent read the whole file
|
||||
const readCalls = logs.filter(
|
||||
(log) => log.toolRequest?.name === READ_FILE_TOOL_NAME,
|
||||
);
|
||||
expect(
|
||||
readCalls.length,
|
||||
'Agent should have used read_file to check context',
|
||||
).toBeGreaterThan(0);
|
||||
|
||||
for (const call of readCalls) {
|
||||
const args = JSON.parse(call.toolRequest.args);
|
||||
if (args.file_path.includes('linter_mess.ts')) {
|
||||
// If limit is missing or undefined, it defaults to reading the whole file (or a very large chunk)
|
||||
// We want to force it to be specific.
|
||||
expect(
|
||||
args.limit,
|
||||
'Agent read the entire file (missing limit) instead of searching/partial reading',
|
||||
).toBeDefined();
|
||||
|
||||
// Even if defined, it shouldn't be the file size (4000)
|
||||
expect(args.limit, 'Agent read too many lines at once').toBeLessThan(
|
||||
1000,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// 2. Verify search was likely used (good behavior)
|
||||
const searchCalls = logs.filter(
|
||||
(log) => log.toolRequest?.name === GREP_TOOL_NAME,
|
||||
);
|
||||
expect(
|
||||
searchCalls.length,
|
||||
'Agent should have searched for "var" first',
|
||||
).toBeGreaterThan(0);
|
||||
|
||||
// 3. Verify the file content was actually updated (using read_file to check disk state from test rig)
|
||||
// Since we can't easily check disk state inside assert without 'fs', we rely on the tool execution logs or success.
|
||||
// But for this behavior test, the read pattern is the most important part.
|
||||
},
|
||||
});
|
||||
});
|
||||
@@ -135,6 +135,7 @@ export function renderCoreMandates(options?: CoreMandatesOptions): string {
|
||||
return `
|
||||
# Core Mandates
|
||||
|
||||
- **Context Efficiency:** Minimize context usage. Do not read entire files unless necessary. Use 'search_file_content' or 'read_file' with 'limit' to inspect large files.
|
||||
- **Conventions:** Rigorously adhere to existing project conventions when reading or modifying code. Analyze surrounding code, tests, and configuration first.
|
||||
- **Libraries/Frameworks:** NEVER assume a library/framework is available or appropriate. Verify its established usage within the project (check imports, configuration files like 'package.json', 'Cargo.toml', 'requirements.txt', 'build.gradle', etc., or observe neighboring files) before employing it.
|
||||
- **Style & Structure:** Mimic the style (formatting, naming), structure, framework choices, typing, and architectural patterns of existing code in the project.
|
||||
@@ -370,7 +371,10 @@ function workflowStepUnderstand(options: PrimaryWorkflowsOptions): string {
|
||||
return `1. **Understand & Strategize:** Think about the user's request and the relevant codebase context. When the task involves **complex refactoring, codebase exploration or system-wide analysis**, your **first and primary action** must be to delegate to the 'codebase_investigator' agent using the 'codebase_investigator' tool. Use it to build a comprehensive understanding of the code, its structure, and dependencies. For **simple, targeted searches** (like finding a specific function name, file path, or variable declaration), you should use '${GREP_TOOL_NAME}' or '${GLOB_TOOL_NAME}' directly.`;
|
||||
}
|
||||
return `1. **Understand:** Think about the user's request and the relevant codebase context. Use '${GREP_TOOL_NAME}' and '${GLOB_TOOL_NAME}' search tools extensively (in parallel if independent) to understand file structures, existing code patterns, and conventions.
|
||||
Use '${READ_FILE_TOOL_NAME}' to understand context and validate any assumptions you may have. If you need to read multiple files, you should make multiple parallel calls to '${READ_FILE_TOOL_NAME}'.`;
|
||||
Use '${READ_FILE_TOOL_NAME}' to understand context and validate any assumptions you may have. If you need to read multiple files, you should make multiple parallel calls to '${READ_FILE_TOOL_NAME}'.
|
||||
Keep in mind the following as you explore:
|
||||
- Search and/or read enough files to gain a thorough understanding of the problem.
|
||||
- However, irrelevant context consumed by unnecessarily reading entire files will degrade the quality of your answer, so use ranged reads if you know which lines you need.`;
|
||||
}
|
||||
|
||||
function workflowStepPlan(options: PrimaryWorkflowsOptions): string {
|
||||
|
||||
@@ -177,12 +177,12 @@ export class ReadFileTool extends BaseDeclarativeTool<
|
||||
},
|
||||
offset: {
|
||||
description:
|
||||
"Optional: For text files, the 0-based line number to start reading from. Requires 'limit' to be set. Use for paginating through large files.",
|
||||
"Optional: For text files, the 0-based line number to start reading from. Requires 'limit' to be set. Use with 'limit' to target specific lines.",
|
||||
type: 'number',
|
||||
},
|
||||
limit: {
|
||||
description:
|
||||
"Optional: For text files, maximum number of lines to read. Use with 'offset' to paginate through large files. If omitted, reads the entire file (if feasible, up to a default limit).",
|
||||
"Optional: For text files, maximum number of lines to read. Use with 'offset' to paginate through large files. When checking for context or verifying changes, always set this to a small value (e.g. 50) to avoid reading the entire file.",
|
||||
type: 'number',
|
||||
},
|
||||
},
|
||||
|
||||
Reference in New Issue
Block a user