Frugal search

This commit is contained in:
Christian Gunderman
2026-02-02 22:23:58 -08:00
parent 1b274b081d
commit 28afd9bbff
4 changed files with 110 additions and 1 deletions

View File

@@ -310,6 +310,29 @@ describe('GrepTool', () => {
expect(result.error?.type).toBe(ToolErrorType.GREP_EXECUTION_ERROR);
vi.mocked(glob.globStream).mockReset();
}, 30000);
it('should limit matches per file when max_matches_per_file is set', async () => {
// fileA.txt has 2 matches for "world"
// sub/fileC.txt has 1 match for "world"
const params: GrepToolParams = {
pattern: 'world',
max_matches_per_file: 1,
};
const invocation = grepTool.build(params);
const result = await invocation.execute(abortSignal);
// Should find 1 match in fileA.txt (instead of 2)
// And 1 match in sub/fileC.txt
// Total 2 matches (was 3)
expect(result.llmContent).toContain('Found 2 matches');
expect(result.llmContent).toContain('File: fileA.txt');
// Count occurrences of match lines in the output
// Matches lines start with L<number>:
const matches = result.llmContent.match(/^L\d+:.*world/gm);
expect(matches?.length).toBe(2);
}, 30000);
});
describe('multi-directory workspace', () => {

View File

@@ -46,6 +46,11 @@ export interface GrepToolParams {
* File pattern to include in the search (e.g. "*.js", "*.{ts,tsx}")
*/
include?: string;
/**
* Optional: Maximum number of matches to return per file.
*/
max_matches_per_file?: number;
}
/**
@@ -210,6 +215,7 @@ class GrepToolInvocation extends BaseToolInvocation<
path: searchDir,
include: this.params.include,
maxMatches: remainingLimit,
maxMatchesPerFile: this.params.max_matches_per_file,
signal: timeoutController.signal,
});
@@ -338,9 +344,16 @@ class GrepToolInvocation extends BaseToolInvocation<
path: string; // Expects absolute path
include?: string;
maxMatches: number;
maxMatchesPerFile?: number;
signal: AbortSignal;
}): Promise<GrepMatch[]> {
const { pattern, path: absolutePath, include, maxMatches } = options;
const {
pattern,
path: absolutePath,
include,
maxMatches,
maxMatchesPerFile,
} = options;
let strategyUsed = 'none';
try {
@@ -370,9 +383,19 @@ class GrepToolInvocation extends BaseToolInvocation<
});
const results: GrepMatch[] = [];
const matchesPerFile = new Map<string, number>();
for await (const line of generator) {
const match = this.parseGrepLine(line, absolutePath);
if (match) {
if (maxMatchesPerFile) {
const count = matchesPerFile.get(match.filePath) || 0;
if (count >= maxMatchesPerFile) {
continue;
}
matchesPerFile.set(match.filePath, count + 1);
}
results.push(match);
if (results.length >= maxMatches) {
break;
@@ -423,6 +446,11 @@ class GrepToolInvocation extends BaseToolInvocation<
if (include) {
grepArgs.push(`--include=${include}`);
}
if (maxMatchesPerFile) {
grepArgs.push(`-m`, maxMatchesPerFile.toString());
}
grepArgs.push(pattern);
grepArgs.push('.');
@@ -494,6 +522,7 @@ class GrepToolInvocation extends BaseToolInvocation<
try {
const content = await fsPromises.readFile(fileAbsolutePath, 'utf8');
const lines = content.split(/\r?\n/);
let fileMatchCount = 0;
for (let index = 0; index < lines.length; index++) {
const line = lines[index];
if (regex.test(line)) {
@@ -504,7 +533,10 @@ class GrepToolInvocation extends BaseToolInvocation<
lineNumber: index + 1,
line,
});
fileMatchCount++;
if (allMatches.length >= maxMatches) break;
if (maxMatchesPerFile && fileMatchCount >= maxMatchesPerFile)
break;
}
}
} catch (readError: unknown) {
@@ -593,6 +625,12 @@ export class GrepTool extends BaseDeclarativeTool<GrepToolParams, ToolResult> {
description: `Optional: A glob pattern to filter which files are searched (e.g., '*.js', '*.{ts,tsx}', 'src/**'). If omitted, searches all files (respecting potential global ignores).`,
type: 'string',
},
max_matches_per_file: {
description:
'Optional: Maximum number of matches to return per file. Use this to prevent being overwhelmed by repetitive matches in large files.',
type: 'integer',
minimum: 1,
},
},
required: ['pattern'],
type: 'object',

View File

@@ -1655,6 +1655,36 @@ describe('RipGrepTool', () => {
// Note: Ripgrep JSON output for context lines doesn't include line numbers for context lines directly
// The current parsing only extracts the matched line, so we only assert on that.
});
it('should handle max_matches_per_file parameter', async () => {
mockSpawn.mockImplementationOnce(
createMockSpawn({
outputData:
JSON.stringify({
type: 'match',
data: {
path: { text: 'fileA.txt' },
line_number: 1,
lines: { text: 'match 1\n' },
},
}) + '\n',
exitCode: 0,
}),
);
const params: RipGrepToolParams = {
pattern: 'match',
max_matches_per_file: 5,
};
const invocation = grepTool.build(params);
await invocation.execute(abortSignal);
expect(mockSpawn).toHaveBeenLastCalledWith(
expect.anything(),
expect.arrayContaining(['--max-count', '5']),
expect.anything(),
);
});
});
describe('getDescription', () => {

View File

@@ -131,6 +131,11 @@ export interface RipGrepToolParams {
* If true, does not respect .gitignore or default ignores (like build/dist).
*/
no_ignore?: boolean;
/**
* Optional: Maximum number of matches to return per file.
*/
max_matches_per_file?: number;
}
/**
@@ -236,6 +241,7 @@ class GrepToolInvocation extends BaseToolInvocation<
before: this.params.before,
no_ignore: this.params.no_ignore,
maxMatches: totalMaxMatches,
maxMatchesPerFile: this.params.max_matches_per_file,
signal: timeoutController.signal,
});
} finally {
@@ -320,6 +326,7 @@ class GrepToolInvocation extends BaseToolInvocation<
before?: number;
no_ignore?: boolean;
maxMatches: number;
maxMatchesPerFile?: number;
signal: AbortSignal;
}): Promise<GrepMatch[]> {
const {
@@ -333,6 +340,7 @@ class GrepToolInvocation extends BaseToolInvocation<
before,
no_ignore,
maxMatches,
maxMatchesPerFile,
} = options;
const rgArgs = ['--json'];
@@ -361,6 +369,10 @@ class GrepToolInvocation extends BaseToolInvocation<
rgArgs.push('--no-ignore');
}
if (maxMatchesPerFile) {
rgArgs.push('--max-count', maxMatchesPerFile.toString());
}
if (include) {
rgArgs.push('--glob', include);
}
@@ -544,6 +556,12 @@ export class RipGrepTool extends BaseDeclarativeTool<
'If true, searches all files including those usually ignored (like in .gitignore, build/, dist/, etc). Defaults to false if omitted.',
type: 'boolean',
},
max_matches_per_file: {
description:
'Optional: Maximum number of matches to return per file. Use this to prevent being overwhelmed by repetitive matches in large files.',
type: 'integer',
minimum: 1,
},
},
required: ['pattern'],
type: 'object',