diff --git a/packages/core/src/tools/grep.test.ts b/packages/core/src/tools/grep.test.ts index 3f1f023faf..5b0ca9442e 100644 --- a/packages/core/src/tools/grep.test.ts +++ b/packages/core/src/tools/grep.test.ts @@ -310,6 +310,29 @@ describe('GrepTool', () => { expect(result.error?.type).toBe(ToolErrorType.GREP_EXECUTION_ERROR); vi.mocked(glob.globStream).mockReset(); }, 30000); + + it('should limit matches per file when max_matches_per_file is set', async () => { + // fileA.txt has 2 matches for "world" + // sub/fileC.txt has 1 match for "world" + const params: GrepToolParams = { + pattern: 'world', + max_matches_per_file: 1, + }; + const invocation = grepTool.build(params); + const result = await invocation.execute(abortSignal); + + // Should find 1 match in fileA.txt (instead of 2) + // And 1 match in sub/fileC.txt + // Total 2 matches (was 3) + + expect(result.llmContent).toContain('Found 2 matches'); + expect(result.llmContent).toContain('File: fileA.txt'); + + // Count occurrences of match lines in the output + // Matches lines start with L: + const matches = result.llmContent.match(/^L\d+:.*world/gm); + expect(matches?.length).toBe(2); + }, 30000); }); describe('multi-directory workspace', () => { diff --git a/packages/core/src/tools/grep.ts b/packages/core/src/tools/grep.ts index 06278910bb..e5c90c366f 100644 --- a/packages/core/src/tools/grep.ts +++ b/packages/core/src/tools/grep.ts @@ -46,6 +46,11 @@ export interface GrepToolParams { * File pattern to include in the search (e.g. "*.js", "*.{ts,tsx}") */ include?: string; + + /** + * Optional: Maximum number of matches to return per file. + */ + max_matches_per_file?: number; } /** @@ -210,6 +215,7 @@ class GrepToolInvocation extends BaseToolInvocation< path: searchDir, include: this.params.include, maxMatches: remainingLimit, + maxMatchesPerFile: this.params.max_matches_per_file, signal: timeoutController.signal, }); @@ -338,9 +344,16 @@ class GrepToolInvocation extends BaseToolInvocation< path: string; // Expects absolute path include?: string; maxMatches: number; + maxMatchesPerFile?: number; signal: AbortSignal; }): Promise { - const { pattern, path: absolutePath, include, maxMatches } = options; + const { + pattern, + path: absolutePath, + include, + maxMatches, + maxMatchesPerFile, + } = options; let strategyUsed = 'none'; try { @@ -370,9 +383,19 @@ class GrepToolInvocation extends BaseToolInvocation< }); const results: GrepMatch[] = []; + const matchesPerFile = new Map(); + for await (const line of generator) { const match = this.parseGrepLine(line, absolutePath); if (match) { + if (maxMatchesPerFile) { + const count = matchesPerFile.get(match.filePath) || 0; + if (count >= maxMatchesPerFile) { + continue; + } + matchesPerFile.set(match.filePath, count + 1); + } + results.push(match); if (results.length >= maxMatches) { break; @@ -423,6 +446,11 @@ class GrepToolInvocation extends BaseToolInvocation< if (include) { grepArgs.push(`--include=${include}`); } + + if (maxMatchesPerFile) { + grepArgs.push(`-m`, maxMatchesPerFile.toString()); + } + grepArgs.push(pattern); grepArgs.push('.'); @@ -494,6 +522,7 @@ class GrepToolInvocation extends BaseToolInvocation< try { const content = await fsPromises.readFile(fileAbsolutePath, 'utf8'); const lines = content.split(/\r?\n/); + let fileMatchCount = 0; for (let index = 0; index < lines.length; index++) { const line = lines[index]; if (regex.test(line)) { @@ -504,7 +533,10 @@ class GrepToolInvocation extends BaseToolInvocation< lineNumber: index + 1, line, }); + fileMatchCount++; if (allMatches.length >= maxMatches) break; + if (maxMatchesPerFile && fileMatchCount >= maxMatchesPerFile) + break; } } } catch (readError: unknown) { @@ -593,6 +625,12 @@ export class GrepTool extends BaseDeclarativeTool { description: `Optional: A glob pattern to filter which files are searched (e.g., '*.js', '*.{ts,tsx}', 'src/**'). If omitted, searches all files (respecting potential global ignores).`, type: 'string', }, + max_matches_per_file: { + description: + 'Optional: Maximum number of matches to return per file. Use this to prevent being overwhelmed by repetitive matches in large files.', + type: 'integer', + minimum: 1, + }, }, required: ['pattern'], type: 'object', diff --git a/packages/core/src/tools/ripGrep.test.ts b/packages/core/src/tools/ripGrep.test.ts index 944a320fa4..f25835e967 100644 --- a/packages/core/src/tools/ripGrep.test.ts +++ b/packages/core/src/tools/ripGrep.test.ts @@ -1655,6 +1655,36 @@ describe('RipGrepTool', () => { // Note: Ripgrep JSON output for context lines doesn't include line numbers for context lines directly // The current parsing only extracts the matched line, so we only assert on that. }); + + it('should handle max_matches_per_file parameter', async () => { + mockSpawn.mockImplementationOnce( + createMockSpawn({ + outputData: + JSON.stringify({ + type: 'match', + data: { + path: { text: 'fileA.txt' }, + line_number: 1, + lines: { text: 'match 1\n' }, + }, + }) + '\n', + exitCode: 0, + }), + ); + + const params: RipGrepToolParams = { + pattern: 'match', + max_matches_per_file: 5, + }; + const invocation = grepTool.build(params); + await invocation.execute(abortSignal); + + expect(mockSpawn).toHaveBeenLastCalledWith( + expect.anything(), + expect.arrayContaining(['--max-count', '5']), + expect.anything(), + ); + }); }); describe('getDescription', () => { diff --git a/packages/core/src/tools/ripGrep.ts b/packages/core/src/tools/ripGrep.ts index e905f2f404..96154cb219 100644 --- a/packages/core/src/tools/ripGrep.ts +++ b/packages/core/src/tools/ripGrep.ts @@ -131,6 +131,11 @@ export interface RipGrepToolParams { * If true, does not respect .gitignore or default ignores (like build/dist). */ no_ignore?: boolean; + + /** + * Optional: Maximum number of matches to return per file. + */ + max_matches_per_file?: number; } /** @@ -236,6 +241,7 @@ class GrepToolInvocation extends BaseToolInvocation< before: this.params.before, no_ignore: this.params.no_ignore, maxMatches: totalMaxMatches, + maxMatchesPerFile: this.params.max_matches_per_file, signal: timeoutController.signal, }); } finally { @@ -320,6 +326,7 @@ class GrepToolInvocation extends BaseToolInvocation< before?: number; no_ignore?: boolean; maxMatches: number; + maxMatchesPerFile?: number; signal: AbortSignal; }): Promise { const { @@ -333,6 +340,7 @@ class GrepToolInvocation extends BaseToolInvocation< before, no_ignore, maxMatches, + maxMatchesPerFile, } = options; const rgArgs = ['--json']; @@ -361,6 +369,10 @@ class GrepToolInvocation extends BaseToolInvocation< rgArgs.push('--no-ignore'); } + if (maxMatchesPerFile) { + rgArgs.push('--max-count', maxMatchesPerFile.toString()); + } + if (include) { rgArgs.push('--glob', include); } @@ -544,6 +556,12 @@ export class RipGrepTool extends BaseDeclarativeTool< 'If true, searches all files including those usually ignored (like in .gitignore, build/, dist/, etc). Defaults to false if omitted.', type: 'boolean', }, + max_matches_per_file: { + description: + 'Optional: Maximum number of matches to return per file. Use this to prevent being overwhelmed by repetitive matches in large files.', + type: 'integer', + minimum: 1, + }, }, required: ['pattern'], type: 'object',