diff --git a/packages/core/src/tools/definitions/__snapshots__/coreToolsModelSnapshots.test.ts.snap b/packages/core/src/tools/definitions/__snapshots__/coreToolsModelSnapshots.test.ts.snap index 3420f3a6bf..a61afbd2b9 100644 --- a/packages/core/src/tools/definitions/__snapshots__/coreToolsModelSnapshots.test.ts.snap +++ b/packages/core/src/tools/definitions/__snapshots__/coreToolsModelSnapshots.test.ts.snap @@ -49,10 +49,20 @@ exports[`coreTools snapshots for specific models > Model: gemini-2.5-pro > snaps "description": "Optional: A glob pattern to filter which files are searched (e.g., '*.js', '*.{ts,tsx}', 'src/**'). If omitted, searches all files (respecting potential global ignores).", "type": "string", }, + "max_matches_per_file": { + "description": "Optional: Maximum number of matches to return per file. Use this to prevent being overwhelmed by repetitive matches in large files.", + "minimum": 1, + "type": "integer", + }, "pattern": { "description": "The regular expression (regex) pattern to search for within file contents (e.g., 'function\\s+myFunction', 'import\\s+\\{.*\\}\\s+from\\s+.*').", "type": "string", }, + "total_max_matches": { + "description": "Optional: Maximum number of total matches to return. Use this to limit the overall size of the response. Defaults to 100 if omitted.", + "minimum": 1, + "type": "integer", + }, }, "required": [ "pattern", @@ -248,10 +258,20 @@ exports[`coreTools snapshots for specific models > Model: gemini-3-pro-preview > "description": "Optional: A glob pattern to filter which files are searched (e.g., '*.js', '*.{ts,tsx}', 'src/**'). If omitted, searches all files (respecting potential global ignores).", "type": "string", }, + "max_matches_per_file": { + "description": "Optional: Maximum number of matches to return per file. Use this to prevent being overwhelmed by repetitive matches in large files.", + "minimum": 1, + "type": "integer", + }, "pattern": { "description": "The regular expression (regex) pattern to search for within file contents (e.g., 'function\\s+myFunction', 'import\\s+\\{.*\\}\\s+from\\s+.*').", "type": "string", }, + "total_max_matches": { + "description": "Optional: Maximum number of total matches to return. Use this to limit the overall size of the response. Defaults to 100 if omitted.", + "minimum": 1, + "type": "integer", + }, }, "required": [ "pattern", diff --git a/packages/core/src/tools/definitions/coreTools.ts b/packages/core/src/tools/definitions/coreTools.ts index 71fe1793e9..b87a6a10e4 100644 --- a/packages/core/src/tools/definitions/coreTools.ts +++ b/packages/core/src/tools/definitions/coreTools.ts @@ -98,6 +98,18 @@ export const GREP_DEFINITION: ToolDefinition = { description: `Optional: A glob pattern to filter which files are searched (e.g., '*.js', '*.{ts,tsx}', 'src/**'). If omitted, searches all files (respecting potential global ignores).`, type: 'string', }, + max_matches_per_file: { + description: + 'Optional: Maximum number of matches to return per file. Use this to prevent being overwhelmed by repetitive matches in large files.', + type: 'integer', + minimum: 1, + }, + total_max_matches: { + description: + 'Optional: Maximum number of total matches to return. Use this to limit the overall size of the response. Defaults to 100 if omitted.', + type: 'integer', + minimum: 1, + }, }, required: ['pattern'], }, diff --git a/packages/core/src/tools/grep.test.ts b/packages/core/src/tools/grep.test.ts index 3f1f023faf..743116f0f1 100644 --- a/packages/core/src/tools/grep.test.ts +++ b/packages/core/src/tools/grep.test.ts @@ -458,6 +458,46 @@ describe('GrepTool', () => { // Clean up await fs.rm(secondDir, { recursive: true, force: true }); }); + + it('should respect total_max_matches and truncate results', async () => { + // Use 'world' pattern which has 3 matches across fileA.txt and sub/fileC.txt + const params: GrepToolParams = { + pattern: 'world', + total_max_matches: 2, + }; + const invocation = grepTool.build(params); + const result = await invocation.execute(abortSignal); + + expect(result.llmContent).toContain('Found 2 matches'); + expect(result.llmContent).toContain( + 'results limited to 2 matches for performance', + ); + // It should find matches in fileA.txt first (2 matches) + expect(result.llmContent).toContain('File: fileA.txt'); + expect(result.llmContent).toContain('L1: hello world'); + expect(result.llmContent).toContain('L2: second line with world'); + // And sub/fileC.txt should be excluded because limit reached + expect(result.llmContent).not.toContain('File: sub/fileC.txt'); + expect(result.returnDisplay).toBe('Found 2 matches (limited)'); + }); + + it('should respect max_matches_per_file in JS fallback', async () => { + const params: GrepToolParams = { + pattern: 'world', + max_matches_per_file: 1, + }; + const invocation = grepTool.build(params); + const result = await invocation.execute(abortSignal); + + // fileA.txt has 2 worlds, but should only return 1. + // sub/fileC.txt has 1 world, so total matches = 2. + expect(result.llmContent).toContain('Found 2 matches'); + expect(result.llmContent).toContain('File: fileA.txt'); + expect(result.llmContent).toContain('L1: hello world'); + expect(result.llmContent).not.toContain('L2: second line with world'); + expect(result.llmContent).toContain('File: sub/fileC.txt'); + expect(result.llmContent).toContain('L1: another world in sub dir'); + }); }); describe('getDescription', () => { diff --git a/packages/core/src/tools/grep.ts b/packages/core/src/tools/grep.ts index 48f68f9609..fb8fde2bd8 100644 --- a/packages/core/src/tools/grep.ts +++ b/packages/core/src/tools/grep.ts @@ -48,6 +48,16 @@ export interface GrepToolParams { * File pattern to include in the search (e.g. "*.js", "*.{ts,tsx}") */ include?: string; + + /** + * Optional: Maximum number of matches to return per file. Use this to prevent being overwhelmed by repetitive matches in large files. + */ + max_matches_per_file?: number; + + /** + * Optional: Maximum number of total matches to return. Use this to limit the overall size of the response. Defaults to 100 if omitted. + */ + total_max_matches?: number; } /** @@ -189,7 +199,8 @@ class GrepToolInvocation extends BaseToolInvocation< // Collect matches from all search directories let allMatches: GrepMatch[] = []; - const totalMaxMatches = DEFAULT_TOTAL_MAX_MATCHES; + const totalMaxMatches = + this.params.total_max_matches ?? DEFAULT_TOTAL_MAX_MATCHES; // Create a timeout controller to prevent indefinitely hanging searches const timeoutController = new AbortController(); @@ -215,6 +226,7 @@ class GrepToolInvocation extends BaseToolInvocation< path: searchDir, include: this.params.include, maxMatches: remainingLimit, + max_matches_per_file: this.params.max_matches_per_file, signal: timeoutController.signal, }); @@ -343,9 +355,16 @@ class GrepToolInvocation extends BaseToolInvocation< path: string; // Expects absolute path include?: string; maxMatches: number; + max_matches_per_file?: number; signal: AbortSignal; }): Promise { - const { pattern, path: absolutePath, include, maxMatches } = options; + const { + pattern, + path: absolutePath, + include, + maxMatches, + max_matches_per_file, + } = options; let strategyUsed = 'none'; try { @@ -363,6 +382,9 @@ class GrepToolInvocation extends BaseToolInvocation< '--ignore-case', pattern, ]; + if (max_matches_per_file) { + gitArgs.push('--max-count', max_matches_per_file.toString()); + } if (include) { gitArgs.push('--', include); } @@ -425,6 +447,9 @@ class GrepToolInvocation extends BaseToolInvocation< }) .filter((dir): dir is string => !!dir); commonExcludes.forEach((dir) => grepArgs.push(`--exclude-dir=${dir}`)); + if (max_matches_per_file) { + grepArgs.push('--max-count', max_matches_per_file.toString()); + } if (include) { grepArgs.push(`--include=${include}`); } @@ -499,6 +524,7 @@ class GrepToolInvocation extends BaseToolInvocation< try { const content = await fsPromises.readFile(fileAbsolutePath, 'utf8'); const lines = content.split(/\r?\n/); + let matchesInFile = 0; for (let index = 0; index < lines.length; index++) { const line = lines[index]; if (regex.test(line)) { @@ -509,7 +535,14 @@ class GrepToolInvocation extends BaseToolInvocation< lineNumber: index + 1, line, }); + matchesInFile++; if (allMatches.length >= maxMatches) break; + if ( + max_matches_per_file && + matchesInFile >= max_matches_per_file + ) { + break; + } } } } catch (readError: unknown) { @@ -604,6 +637,20 @@ export class GrepTool extends BaseDeclarativeTool { return `Invalid regular expression pattern provided: ${params.pattern}. Error: ${getErrorMessage(error)}`; } + if ( + params.max_matches_per_file !== undefined && + params.max_matches_per_file < 1 + ) { + return 'max_matches_per_file must be at least 1.'; + } + + if ( + params.total_max_matches !== undefined && + params.total_max_matches < 1 + ) { + return 'total_max_matches must be at least 1.'; + } + // Only validate dir_path if one is provided if (params.dir_path) { const resolvedPath = path.resolve( diff --git a/packages/core/src/tools/ripGrep.test.ts b/packages/core/src/tools/ripGrep.test.ts index f3c780603d..be3c298d76 100644 --- a/packages/core/src/tools/ripGrep.test.ts +++ b/packages/core/src/tools/ripGrep.test.ts @@ -1848,6 +1848,89 @@ describe('RipGrepTool', () => { expect(invocation.getDescription()).toContain(path.join('src', 'app')); }); }); + + describe('new parameters', () => { + it('should pass --max-count when max_matches_per_file is provided', async () => { + mockSpawn.mockImplementationOnce( + createMockSpawn({ + outputData: + JSON.stringify({ + type: 'match', + data: { + path: { text: 'fileA.txt' }, + line_number: 1, + lines: { text: 'hello world\n' }, + }, + }) + '\n', + exitCode: 0, + }), + ); + + const params: RipGrepToolParams = { + pattern: 'world', + max_matches_per_file: 1, + }; + const invocation = grepTool.build(params); + await invocation.execute(abortSignal); + + const spawnArgs = mockSpawn.mock.calls[0][1]; + expect(spawnArgs).toContain('--max-count'); + expect(spawnArgs).toContain('1'); + }); + + it('should respect total_max_matches and truncate results', async () => { + // Return 3 matches, but set total_max_matches to 2 + mockSpawn.mockImplementationOnce( + createMockSpawn({ + outputData: + JSON.stringify({ + type: 'match', + data: { + path: { text: 'fileA.txt' }, + line_number: 1, + lines: { text: 'match 1\n' }, + }, + }) + + '\n' + + JSON.stringify({ + type: 'match', + data: { + path: { text: 'fileA.txt' }, + line_number: 2, + lines: { text: 'match 2\n' }, + }, + }) + + '\n' + + JSON.stringify({ + type: 'match', + data: { + path: { text: 'fileA.txt' }, + line_number: 3, + lines: { text: 'match 3\n' }, + }, + }) + + '\n', + exitCode: 0, + }), + ); + + const params: RipGrepToolParams = { + pattern: 'match', + total_max_matches: 2, + }; + const invocation = grepTool.build(params); + const result = await invocation.execute(abortSignal); + + expect(result.llmContent).toContain('Found 2 matches'); + expect(result.llmContent).toContain( + 'results limited to 2 matches for performance', + ); + expect(result.llmContent).toContain('L1: match 1'); + expect(result.llmContent).toContain('L2: match 2'); + expect(result.llmContent).not.toContain('L3: match 3'); + expect(result.returnDisplay).toBe('Found 2 matches (limited)'); + }); + }); }); afterAll(() => { diff --git a/packages/core/src/tools/ripGrep.ts b/packages/core/src/tools/ripGrep.ts index ebf022472c..c2e27bcdbe 100644 --- a/packages/core/src/tools/ripGrep.ts +++ b/packages/core/src/tools/ripGrep.ts @@ -131,6 +131,16 @@ export interface RipGrepToolParams { * If true, does not respect .gitignore or default ignores (like build/dist). */ no_ignore?: boolean; + + /** + * Optional: Maximum number of matches to return per file. Use this to prevent being overwhelmed by repetitive matches in large files. + */ + max_matches_per_file?: number; + + /** + * Optional: Maximum number of total matches to return. Use this to limit the overall size of the response. Defaults to 100 if omitted. + */ + total_max_matches?: number; } /** @@ -208,7 +218,8 @@ class GrepToolInvocation extends BaseToolInvocation< const searchDirDisplay = pathParam; - const totalMaxMatches = DEFAULT_TOTAL_MAX_MATCHES; + const totalMaxMatches = + this.params.total_max_matches ?? DEFAULT_TOTAL_MAX_MATCHES; if (this.config.getDebugMode()) { debugLogger.log(`[GrepTool] Total result limit: ${totalMaxMatches}`); } @@ -240,6 +251,7 @@ class GrepToolInvocation extends BaseToolInvocation< before: this.params.before, no_ignore: this.params.no_ignore, maxMatches: totalMaxMatches, + max_matches_per_file: this.params.max_matches_per_file, signal: timeoutController.signal, }); } finally { @@ -325,6 +337,7 @@ class GrepToolInvocation extends BaseToolInvocation< before?: number; no_ignore?: boolean; maxMatches: number; + max_matches_per_file?: number; signal: AbortSignal; }): Promise { const { @@ -338,6 +351,7 @@ class GrepToolInvocation extends BaseToolInvocation< before, no_ignore, maxMatches, + max_matches_per_file, } = options; const rgArgs = ['--json']; @@ -366,6 +380,10 @@ class GrepToolInvocation extends BaseToolInvocation< rgArgs.push('--no-ignore'); } + if (max_matches_per_file) { + rgArgs.push('--max-count', max_matches_per_file.toString()); + } + if (include) { rgArgs.push('--glob', include); } @@ -558,6 +576,18 @@ export class RipGrepTool extends BaseDeclarativeTool< 'If true, searches all files including those usually ignored (like in .gitignore, build/, dist/, etc). Defaults to false if omitted.', type: 'boolean', }, + max_matches_per_file: { + description: + 'Optional: Maximum number of matches to return per file. Use this to prevent being overwhelmed by repetitive matches in large files.', + type: 'integer', + minimum: 1, + }, + total_max_matches: { + description: + 'Optional: Maximum number of total matches to return. Use this to limit the overall size of the response. Defaults to 100 if omitted.', + type: 'integer', + minimum: 1, + }, }, required: ['pattern'], type: 'object', @@ -588,6 +618,20 @@ export class RipGrepTool extends BaseDeclarativeTool< } } + if ( + params.max_matches_per_file !== undefined && + params.max_matches_per_file < 1 + ) { + return 'max_matches_per_file must be at least 1.'; + } + + if ( + params.total_max_matches !== undefined && + params.total_max_matches < 1 + ) { + return 'total_max_matches must be at least 1.'; + } + // Only validate path if one is provided if (params.dir_path) { const resolvedPath = path.resolve(