Introduce limits for search results. (#18767)

This commit is contained in:
Christian Gunderman
2026-02-11 03:50:10 +00:00
committed by GitHub
parent 49d55d972e
commit 0d034b8c18
6 changed files with 249 additions and 3 deletions

View File

@@ -49,10 +49,20 @@ exports[`coreTools snapshots for specific models > Model: gemini-2.5-pro > snaps
"description": "Optional: A glob pattern to filter which files are searched (e.g., '*.js', '*.{ts,tsx}', 'src/**'). If omitted, searches all files (respecting potential global ignores).",
"type": "string",
},
"max_matches_per_file": {
"description": "Optional: Maximum number of matches to return per file. Use this to prevent being overwhelmed by repetitive matches in large files.",
"minimum": 1,
"type": "integer",
},
"pattern": {
"description": "The regular expression (regex) pattern to search for within file contents (e.g., 'function\\s+myFunction', 'import\\s+\\{.*\\}\\s+from\\s+.*').",
"type": "string",
},
"total_max_matches": {
"description": "Optional: Maximum number of total matches to return. Use this to limit the overall size of the response. Defaults to 100 if omitted.",
"minimum": 1,
"type": "integer",
},
},
"required": [
"pattern",
@@ -248,10 +258,20 @@ exports[`coreTools snapshots for specific models > Model: gemini-3-pro-preview >
"description": "Optional: A glob pattern to filter which files are searched (e.g., '*.js', '*.{ts,tsx}', 'src/**'). If omitted, searches all files (respecting potential global ignores).",
"type": "string",
},
"max_matches_per_file": {
"description": "Optional: Maximum number of matches to return per file. Use this to prevent being overwhelmed by repetitive matches in large files.",
"minimum": 1,
"type": "integer",
},
"pattern": {
"description": "The regular expression (regex) pattern to search for within file contents (e.g., 'function\\s+myFunction', 'import\\s+\\{.*\\}\\s+from\\s+.*').",
"type": "string",
},
"total_max_matches": {
"description": "Optional: Maximum number of total matches to return. Use this to limit the overall size of the response. Defaults to 100 if omitted.",
"minimum": 1,
"type": "integer",
},
},
"required": [
"pattern",

View File

@@ -98,6 +98,18 @@ export const GREP_DEFINITION: ToolDefinition = {
description: `Optional: A glob pattern to filter which files are searched (e.g., '*.js', '*.{ts,tsx}', 'src/**'). If omitted, searches all files (respecting potential global ignores).`,
type: 'string',
},
max_matches_per_file: {
description:
'Optional: Maximum number of matches to return per file. Use this to prevent being overwhelmed by repetitive matches in large files.',
type: 'integer',
minimum: 1,
},
total_max_matches: {
description:
'Optional: Maximum number of total matches to return. Use this to limit the overall size of the response. Defaults to 100 if omitted.',
type: 'integer',
minimum: 1,
},
},
required: ['pattern'],
},

View File

@@ -458,6 +458,46 @@ describe('GrepTool', () => {
// Clean up
await fs.rm(secondDir, { recursive: true, force: true });
});
it('should respect total_max_matches and truncate results', async () => {
// Use 'world' pattern which has 3 matches across fileA.txt and sub/fileC.txt
const params: GrepToolParams = {
pattern: 'world',
total_max_matches: 2,
};
const invocation = grepTool.build(params);
const result = await invocation.execute(abortSignal);
expect(result.llmContent).toContain('Found 2 matches');
expect(result.llmContent).toContain(
'results limited to 2 matches for performance',
);
// It should find matches in fileA.txt first (2 matches)
expect(result.llmContent).toContain('File: fileA.txt');
expect(result.llmContent).toContain('L1: hello world');
expect(result.llmContent).toContain('L2: second line with world');
// And sub/fileC.txt should be excluded because limit reached
expect(result.llmContent).not.toContain('File: sub/fileC.txt');
expect(result.returnDisplay).toBe('Found 2 matches (limited)');
});
it('should respect max_matches_per_file in JS fallback', async () => {
const params: GrepToolParams = {
pattern: 'world',
max_matches_per_file: 1,
};
const invocation = grepTool.build(params);
const result = await invocation.execute(abortSignal);
// fileA.txt has 2 worlds, but should only return 1.
// sub/fileC.txt has 1 world, so total matches = 2.
expect(result.llmContent).toContain('Found 2 matches');
expect(result.llmContent).toContain('File: fileA.txt');
expect(result.llmContent).toContain('L1: hello world');
expect(result.llmContent).not.toContain('L2: second line with world');
expect(result.llmContent).toContain('File: sub/fileC.txt');
expect(result.llmContent).toContain('L1: another world in sub dir');
});
});
describe('getDescription', () => {

View File

@@ -48,6 +48,16 @@ export interface GrepToolParams {
* File pattern to include in the search (e.g. "*.js", "*.{ts,tsx}")
*/
include?: string;
/**
* Optional: Maximum number of matches to return per file. Use this to prevent being overwhelmed by repetitive matches in large files.
*/
max_matches_per_file?: number;
/**
* Optional: Maximum number of total matches to return. Use this to limit the overall size of the response. Defaults to 100 if omitted.
*/
total_max_matches?: number;
}
/**
@@ -189,7 +199,8 @@ class GrepToolInvocation extends BaseToolInvocation<
// Collect matches from all search directories
let allMatches: GrepMatch[] = [];
const totalMaxMatches = DEFAULT_TOTAL_MAX_MATCHES;
const totalMaxMatches =
this.params.total_max_matches ?? DEFAULT_TOTAL_MAX_MATCHES;
// Create a timeout controller to prevent indefinitely hanging searches
const timeoutController = new AbortController();
@@ -215,6 +226,7 @@ class GrepToolInvocation extends BaseToolInvocation<
path: searchDir,
include: this.params.include,
maxMatches: remainingLimit,
max_matches_per_file: this.params.max_matches_per_file,
signal: timeoutController.signal,
});
@@ -343,9 +355,16 @@ class GrepToolInvocation extends BaseToolInvocation<
path: string; // Expects absolute path
include?: string;
maxMatches: number;
max_matches_per_file?: number;
signal: AbortSignal;
}): Promise<GrepMatch[]> {
const { pattern, path: absolutePath, include, maxMatches } = options;
const {
pattern,
path: absolutePath,
include,
maxMatches,
max_matches_per_file,
} = options;
let strategyUsed = 'none';
try {
@@ -363,6 +382,9 @@ class GrepToolInvocation extends BaseToolInvocation<
'--ignore-case',
pattern,
];
if (max_matches_per_file) {
gitArgs.push('--max-count', max_matches_per_file.toString());
}
if (include) {
gitArgs.push('--', include);
}
@@ -425,6 +447,9 @@ class GrepToolInvocation extends BaseToolInvocation<
})
.filter((dir): dir is string => !!dir);
commonExcludes.forEach((dir) => grepArgs.push(`--exclude-dir=${dir}`));
if (max_matches_per_file) {
grepArgs.push('--max-count', max_matches_per_file.toString());
}
if (include) {
grepArgs.push(`--include=${include}`);
}
@@ -499,6 +524,7 @@ class GrepToolInvocation extends BaseToolInvocation<
try {
const content = await fsPromises.readFile(fileAbsolutePath, 'utf8');
const lines = content.split(/\r?\n/);
let matchesInFile = 0;
for (let index = 0; index < lines.length; index++) {
const line = lines[index];
if (regex.test(line)) {
@@ -509,7 +535,14 @@ class GrepToolInvocation extends BaseToolInvocation<
lineNumber: index + 1,
line,
});
matchesInFile++;
if (allMatches.length >= maxMatches) break;
if (
max_matches_per_file &&
matchesInFile >= max_matches_per_file
) {
break;
}
}
}
} catch (readError: unknown) {
@@ -604,6 +637,20 @@ export class GrepTool extends BaseDeclarativeTool<GrepToolParams, ToolResult> {
return `Invalid regular expression pattern provided: ${params.pattern}. Error: ${getErrorMessage(error)}`;
}
if (
params.max_matches_per_file !== undefined &&
params.max_matches_per_file < 1
) {
return 'max_matches_per_file must be at least 1.';
}
if (
params.total_max_matches !== undefined &&
params.total_max_matches < 1
) {
return 'total_max_matches must be at least 1.';
}
// Only validate dir_path if one is provided
if (params.dir_path) {
const resolvedPath = path.resolve(

View File

@@ -1848,6 +1848,89 @@ describe('RipGrepTool', () => {
expect(invocation.getDescription()).toContain(path.join('src', 'app'));
});
});
describe('new parameters', () => {
it('should pass --max-count when max_matches_per_file is provided', async () => {
mockSpawn.mockImplementationOnce(
createMockSpawn({
outputData:
JSON.stringify({
type: 'match',
data: {
path: { text: 'fileA.txt' },
line_number: 1,
lines: { text: 'hello world\n' },
},
}) + '\n',
exitCode: 0,
}),
);
const params: RipGrepToolParams = {
pattern: 'world',
max_matches_per_file: 1,
};
const invocation = grepTool.build(params);
await invocation.execute(abortSignal);
const spawnArgs = mockSpawn.mock.calls[0][1];
expect(spawnArgs).toContain('--max-count');
expect(spawnArgs).toContain('1');
});
it('should respect total_max_matches and truncate results', async () => {
// Return 3 matches, but set total_max_matches to 2
mockSpawn.mockImplementationOnce(
createMockSpawn({
outputData:
JSON.stringify({
type: 'match',
data: {
path: { text: 'fileA.txt' },
line_number: 1,
lines: { text: 'match 1\n' },
},
}) +
'\n' +
JSON.stringify({
type: 'match',
data: {
path: { text: 'fileA.txt' },
line_number: 2,
lines: { text: 'match 2\n' },
},
}) +
'\n' +
JSON.stringify({
type: 'match',
data: {
path: { text: 'fileA.txt' },
line_number: 3,
lines: { text: 'match 3\n' },
},
}) +
'\n',
exitCode: 0,
}),
);
const params: RipGrepToolParams = {
pattern: 'match',
total_max_matches: 2,
};
const invocation = grepTool.build(params);
const result = await invocation.execute(abortSignal);
expect(result.llmContent).toContain('Found 2 matches');
expect(result.llmContent).toContain(
'results limited to 2 matches for performance',
);
expect(result.llmContent).toContain('L1: match 1');
expect(result.llmContent).toContain('L2: match 2');
expect(result.llmContent).not.toContain('L3: match 3');
expect(result.returnDisplay).toBe('Found 2 matches (limited)');
});
});
});
afterAll(() => {

View File

@@ -131,6 +131,16 @@ export interface RipGrepToolParams {
* If true, does not respect .gitignore or default ignores (like build/dist).
*/
no_ignore?: boolean;
/**
* Optional: Maximum number of matches to return per file. Use this to prevent being overwhelmed by repetitive matches in large files.
*/
max_matches_per_file?: number;
/**
* Optional: Maximum number of total matches to return. Use this to limit the overall size of the response. Defaults to 100 if omitted.
*/
total_max_matches?: number;
}
/**
@@ -208,7 +218,8 @@ class GrepToolInvocation extends BaseToolInvocation<
const searchDirDisplay = pathParam;
const totalMaxMatches = DEFAULT_TOTAL_MAX_MATCHES;
const totalMaxMatches =
this.params.total_max_matches ?? DEFAULT_TOTAL_MAX_MATCHES;
if (this.config.getDebugMode()) {
debugLogger.log(`[GrepTool] Total result limit: ${totalMaxMatches}`);
}
@@ -240,6 +251,7 @@ class GrepToolInvocation extends BaseToolInvocation<
before: this.params.before,
no_ignore: this.params.no_ignore,
maxMatches: totalMaxMatches,
max_matches_per_file: this.params.max_matches_per_file,
signal: timeoutController.signal,
});
} finally {
@@ -325,6 +337,7 @@ class GrepToolInvocation extends BaseToolInvocation<
before?: number;
no_ignore?: boolean;
maxMatches: number;
max_matches_per_file?: number;
signal: AbortSignal;
}): Promise<GrepMatch[]> {
const {
@@ -338,6 +351,7 @@ class GrepToolInvocation extends BaseToolInvocation<
before,
no_ignore,
maxMatches,
max_matches_per_file,
} = options;
const rgArgs = ['--json'];
@@ -366,6 +380,10 @@ class GrepToolInvocation extends BaseToolInvocation<
rgArgs.push('--no-ignore');
}
if (max_matches_per_file) {
rgArgs.push('--max-count', max_matches_per_file.toString());
}
if (include) {
rgArgs.push('--glob', include);
}
@@ -558,6 +576,18 @@ export class RipGrepTool extends BaseDeclarativeTool<
'If true, searches all files including those usually ignored (like in .gitignore, build/, dist/, etc). Defaults to false if omitted.',
type: 'boolean',
},
max_matches_per_file: {
description:
'Optional: Maximum number of matches to return per file. Use this to prevent being overwhelmed by repetitive matches in large files.',
type: 'integer',
minimum: 1,
},
total_max_matches: {
description:
'Optional: Maximum number of total matches to return. Use this to limit the overall size of the response. Defaults to 100 if omitted.',
type: 'integer',
minimum: 1,
},
},
required: ['pattern'],
type: 'object',
@@ -588,6 +618,20 @@ export class RipGrepTool extends BaseDeclarativeTool<
}
}
if (
params.max_matches_per_file !== undefined &&
params.max_matches_per_file < 1
) {
return 'max_matches_per_file must be at least 1.';
}
if (
params.total_max_matches !== undefined &&
params.total_max_matches < 1
) {
return 'total_max_matches must be at least 1.';
}
// Only validate path if one is provided
if (params.dir_path) {
const resolvedPath = path.resolve(