From 9f0d2b92f0c20cd778b3639e01ef344cbd8ab191 Mon Sep 17 00:00:00 2001 From: Christian Gunderman Date: Wed, 28 Jan 2026 22:27:49 -0800 Subject: [PATCH] Limit search output. --- evals/frugalReads.eval.ts | 73 +++++++++++++++++++++++++++++- packages/core/src/tools/grep.ts | 16 +++++-- packages/core/src/tools/ripGrep.ts | 11 ++++- 3 files changed, 93 insertions(+), 7 deletions(-) diff --git a/evals/frugalReads.eval.ts b/evals/frugalReads.eval.ts index 0474030502..dc9d947e9a 100644 --- a/evals/frugalReads.eval.ts +++ b/evals/frugalReads.eval.ts @@ -6,7 +6,7 @@ import { describe, expect } from 'vitest'; import { evalTest } from './test-helper.js'; -import { READ_FILE_TOOL_NAME } from '@google/gemini-cli-core'; +import { READ_FILE_TOOL_NAME, GREP_TOOL_NAME } from '@google/gemini-cli-core'; describe('Frugal reads eval', () => { /** @@ -112,4 +112,75 @@ describe('Frugal reads eval', () => { } }, }); + + /** + * Ensures that the agent uses search_file_content effectively when searching + * through large files, and refines its search or uses context to find the + * correct match among many. + */ + evalTest('ALWAYS_PASSES', { + name: 'should use search_file_content with context and limits to find a needle in a haystack', + files: (() => { + const files: Record = {}; + for (let f = 1; f <= 5; f++) { + const lines = []; + for (let i = 0; i < 2000; i++) { + if (f === 3 && i === 1500) { + lines.push('Pattern: TargetMatch'); + lines.push('Metadata: CORRECT_VALUE_42'); + } else if (i % 50 === 0) { + lines.push('Pattern: TargetMatch'); + lines.push('Metadata: WRONG_VALUE'); + } else { + lines.push(`Noise line ${i} in file ${f}`); + } + } + files[`large_file_${f}.txt`] = lines.join('\n'); + } + return files; + })(), + prompt: + 'Find the "Metadata" value associated with the "Pattern: TargetMatch" in the large_file_*.txt files. There are many such patterns, so you MUST set the "limit" parameter of search_file_content to 10 to avoid returning too many results. If you do not find the correct metadata (CORRECT_VALUE_42) in the first batch, refine your search or search file-by-file.', + assert: async (rig) => { + const logs = rig.readToolLogs(); + + const grepCalls = logs.filter( + (log) => log.toolRequest?.name === GREP_TOOL_NAME, + ); + + expect( + grepCalls.length, + 'Agent should have used search_file_content to find the pattern', + ).toBeGreaterThan(0); + + // Check that the agent used the limit parameter + const usedLimit = grepCalls.some((call) => { + const args = JSON.parse(call.toolRequest.args); + return args.limit !== undefined && args.limit <= 20; + }); + expect(usedLimit, 'Agent should have used the limit parameter').toBe( + true, + ); + + // We expect the agent to eventually use context or refine the search. + const usedContext = grepCalls.some((call) => { + const args = JSON.parse(call.toolRequest.args); + return (args.after ?? 0) > 0 || (args.context ?? 0) > 0; + }); + + const usedReadForContext = logs.some((log) => { + if (log.toolRequest?.name !== READ_FILE_TOOL_NAME) return false; + const args = JSON.parse(log.toolRequest.args); + return ( + args.file_path.includes('large_file_3.txt') && + args.offset !== undefined + ); + }); + + expect( + usedContext || usedReadForContext, + 'Agent should have used context (either via grep "after/context" or read_file) to find the metadata', + ).toBe(true); + }, + }); }); diff --git a/packages/core/src/tools/grep.ts b/packages/core/src/tools/grep.ts index f1a0d413fe..695ac004b0 100644 --- a/packages/core/src/tools/grep.ts +++ b/packages/core/src/tools/grep.ts @@ -46,6 +46,11 @@ export interface GrepToolParams { * File pattern to include in the search (e.g. "*.js", "*.{ts,tsx}") */ include?: string; + + /** + * Max number of matches to return. Defaults to 20,000. + */ + limit?: number; } /** @@ -184,7 +189,7 @@ class GrepToolInvocation extends BaseToolInvocation< // Collect matches from all search directories let allMatches: GrepMatch[] = []; - const totalMaxMatches = DEFAULT_TOTAL_MAX_MATCHES; + const totalMaxMatches = this.params.limit ?? DEFAULT_TOTAL_MAX_MATCHES; // Create a timeout controller to prevent indefinitely hanging searches const timeoutController = new AbortController(); @@ -352,10 +357,6 @@ class GrepToolInvocation extends BaseToolInvocation< '--ignore-case', pattern, ]; - if (include) { - gitArgs.push('--', include); - } - try { const generator = execStreaming('git', gitArgs, { cwd: absolutePath, @@ -587,6 +588,11 @@ export class GrepTool extends BaseDeclarativeTool { description: `Optional: A glob pattern to filter which files are searched (e.g., '*.js', '*.{ts,tsx}', 'src/**'). If omitted, searches all files (respecting potential global ignores).`, type: 'string', }, + limit: { + description: + 'Optional: Max number of matches to return. Defaults to 20,000.', + type: 'integer', + }, }, required: ['pattern'], type: 'object', diff --git a/packages/core/src/tools/ripGrep.ts b/packages/core/src/tools/ripGrep.ts index 4752edf9b9..26265280f9 100644 --- a/packages/core/src/tools/ripGrep.ts +++ b/packages/core/src/tools/ripGrep.ts @@ -131,6 +131,11 @@ export interface RipGrepToolParams { * If true, does not respect .gitignore or default ignores (like build/dist). */ no_ignore?: boolean; + + /** + * Max number of matches to return. Defaults to 20,000. + */ + limit?: number; } /** @@ -204,7 +209,7 @@ class GrepToolInvocation extends BaseToolInvocation< const searchDirDisplay = pathParam; - const totalMaxMatches = DEFAULT_TOTAL_MAX_MATCHES; + const totalMaxMatches = this.params.limit ?? DEFAULT_TOTAL_MAX_MATCHES; if (this.config.getDebugMode()) { debugLogger.log(`[GrepTool] Total result limit: ${totalMaxMatches}`); } @@ -530,6 +535,10 @@ export class RipGrepTool extends BaseDeclarativeTool< 'If true, searches all files including those usually ignored (like in .gitignore, build/, dist/, etc). Defaults to false if omitted.', type: 'boolean', }, + limit: { + description: 'Max number of matches to return. Defaults to 20,000.', + type: 'integer', + }, }, required: ['pattern'], type: 'object',