Secondary filter.

This commit is contained in:
Christian Gunderman
2026-02-02 22:41:09 -08:00
parent aaa2661917
commit 13251f1e27
2 changed files with 245 additions and 54 deletions

View File

@@ -330,9 +330,34 @@ describe('GrepTool', () => {
// Count occurrences of match lines in the output
// Matches lines start with L<number>:
const matches = result.llmContent.match(/^L\d+:.*world/gm);
const content =
typeof result.llmContent === 'string' ? result.llmContent : '';
const matches = content.match(/^L\d+:.*world/gm);
expect(matches?.length).toBe(2);
}, 30000);
it('should filter files based on the filter parameter', async () => {
// fileA.txt contains "hello" and "world"
// fileB.js contains "hello" but NOT "world"
// sub/fileC.txt contains "world" but NOT "hello"
const params: GrepToolParams = {
pattern: 'hello',
filter: 'world',
};
const invocation = grepTool.build(params);
const result = await invocation.execute(abortSignal);
// Should find matches in fileA.txt because it has both
// Should NOT find matches in fileB.js (has hello, missing world)
// Should NOT find matches in sub/fileC.txt (missing hello, has world)
const content =
typeof result.llmContent === 'string' ? result.llmContent : '';
expect(content).toContain('File: fileA.txt');
expect(content).not.toContain('File: fileB.js');
expect(content).not.toContain('File: fileC.txt');
}, 30000);
});
describe('multi-directory workspace', () => {

View File

@@ -47,6 +47,11 @@ export interface GrepToolParams {
*/
include?: string;
/**
* Optional: A string that must be present in the file for it to be included (logical AND).
*/
filter?: string;
/**
* Optional: Maximum number of matches to return per file.
*/
@@ -214,6 +219,7 @@ class GrepToolInvocation extends BaseToolInvocation<
pattern: this.params.pattern,
path: searchDir,
include: this.params.include,
filter: this.params.filter,
maxMatches: remainingLimit,
maxMatchesPerFile: this.params.max_matches_per_file,
signal: timeoutController.signal,
@@ -343,6 +349,7 @@ class GrepToolInvocation extends BaseToolInvocation<
pattern: string;
path: string; // Expects absolute path
include?: string;
filter?: string;
maxMatches: number;
maxMatchesPerFile?: number;
signal: AbortSignal;
@@ -351,6 +358,7 @@ class GrepToolInvocation extends BaseToolInvocation<
pattern,
path: absolutePath,
include,
filter,
maxMatches,
maxMatchesPerFile,
} = options;
@@ -376,33 +384,116 @@ class GrepToolInvocation extends BaseToolInvocation<
}
try {
const generator = execStreaming('git', gitArgs, {
cwd: absolutePath,
signal: options.signal,
allowedExitCodes: [0, 1],
});
// If a filter is provided, first find files matching the filter.
// This simulates "grep -l 'filter' | xargs grep 'pattern'".
if (filter) {
const filterArgs = [
'grep',
'--untracked',
'-l', // List filenames only
'--ignore-case',
'-E', // Extended regex (optional, but consistent)
filter,
];
if (include) {
filterArgs.push('--', include);
}
const results: GrepMatch[] = [];
const matchesPerFile = new Map<string, number>();
const filesGenerator = execStreaming('git', filterArgs, {
cwd: absolutePath,
signal: options.signal,
allowedExitCodes: [0, 1],
});
for await (const line of generator) {
const match = this.parseGrepLine(line, absolutePath);
if (match) {
if (maxMatchesPerFile) {
const count = matchesPerFile.get(match.filePath) || 0;
if (count >= maxMatchesPerFile) {
continue;
}
matchesPerFile.set(match.filePath, count + 1);
}
results.push(match);
if (results.length >= maxMatches) {
break;
const matchingFiles: string[] = [];
for await (const fileLine of filesGenerator) {
if (fileLine.trim()) {
matchingFiles.push(fileLine.trim());
}
}
if (matchingFiles.length === 0) {
return [];
}
// If we have files, search ONLY within those files.
// We append the list of files to the main search command.
// Note: This could hit command line length limits if matchingFiles is huge.
// For now, we assume the filter reduces the set significantly.
// git grep expects paths after '--' or just as arguments.
// We need to be careful with '--' if include was used.
// Actually, if we provide explicit file paths, include glob is irrelevant for the second grep.
// So we can drop the include glob and just pass the files.
// Reset args for the second command
const filteredGitArgs = [
'grep',
'--untracked',
'-n',
'-E',
'--ignore-case',
pattern,
'--',
...matchingFiles,
];
const generator = execStreaming('git', filteredGitArgs, {
cwd: absolutePath,
signal: options.signal,
allowedExitCodes: [0, 1],
});
const results: GrepMatch[] = [];
const matchesPerFile = new Map<string, number>();
for await (const line of generator) {
const match = this.parseGrepLine(line, absolutePath);
if (match) {
if (maxMatchesPerFile) {
const count = matchesPerFile.get(match.filePath) || 0;
if (count >= maxMatchesPerFile) {
continue;
}
matchesPerFile.set(match.filePath, count + 1);
}
results.push(match);
if (results.length >= maxMatches) {
break;
}
}
}
return results;
} else {
// Normal execution without filter
const generator = execStreaming('git', gitArgs, {
cwd: absolutePath,
signal: options.signal,
allowedExitCodes: [0, 1],
});
const results: GrepMatch[] = [];
const matchesPerFile = new Map<string, number>();
for await (const line of generator) {
const match = this.parseGrepLine(line, absolutePath);
if (match) {
if (maxMatchesPerFile) {
const count = matchesPerFile.get(match.filePath) || 0;
if (count >= maxMatchesPerFile) {
continue;
}
matchesPerFile.set(match.filePath, count + 1);
}
results.push(match);
if (results.length >= maxMatches) {
break;
}
}
}
return results;
}
return results;
} catch (gitError: unknown) {
debugLogger.debug(
`GrepLogic: git grep failed: ${getErrorMessage(
@@ -443,47 +534,115 @@ class GrepToolInvocation extends BaseToolInvocation<
})
.filter((dir): dir is string => !!dir);
commonExcludes.forEach((dir) => grepArgs.push(`--exclude-dir=${dir}`));
if (include) {
grepArgs.push(`--include=${include}`);
}
if (maxMatchesPerFile) {
grepArgs.push(`-m`, maxMatchesPerFile.toString());
}
// If filter is present, we first find matching files using grep -l
if (filter) {
const filterArgs = ['-r', '-l', '-E', '-I'];
commonExcludes.forEach((dir) =>
filterArgs.push(`--exclude-dir=${dir}`),
);
if (include) {
filterArgs.push(`--include=${include}`);
}
filterArgs.push(filter);
filterArgs.push('.');
grepArgs.push(pattern);
grepArgs.push('.');
const results: GrepMatch[] = [];
try {
const generator = execStreaming('grep', grepArgs, {
const filesGenerator = execStreaming('grep', filterArgs, {
cwd: absolutePath,
signal: options.signal,
allowedExitCodes: [0, 1],
});
for await (const line of generator) {
const match = this.parseGrepLine(line, absolutePath);
if (match) {
results.push(match);
if (results.length >= maxMatches) {
break;
}
const matchingFiles: string[] = [];
for await (const fileLine of filesGenerator) {
if (fileLine.trim()) {
matchingFiles.push(fileLine.trim());
}
}
return results;
} catch (grepError: unknown) {
if (
grepError instanceof Error &&
/Permission denied|Is a directory/i.test(grepError.message)
) {
return results;
if (matchingFiles.length === 0) {
return [];
}
// Search within the filtered files
const filteredGrepArgs = ['-n', '-H', '-E', '-I'];
if (maxMatchesPerFile) {
filteredGrepArgs.push(`-m`, maxMatchesPerFile.toString());
}
filteredGrepArgs.push(pattern);
filteredGrepArgs.push(...matchingFiles);
const results: GrepMatch[] = [];
// execStreaming will handle potentially large arg list by failing if too large,
// but for now we assume it fits or rely on fallback.
// Ideally we'd chunk it.
try {
const generator = execStreaming('grep', filteredGrepArgs, {
cwd: absolutePath,
signal: options.signal,
allowedExitCodes: [0, 1],
});
for await (const line of generator) {
const match = this.parseGrepLine(line, absolutePath);
if (match) {
results.push(match);
if (results.length >= maxMatches) {
break;
}
}
}
return results;
} catch (e) {
debugLogger.debug(
`GrepLogic: System grep (second pass) failed: ${getErrorMessage(e)}`,
);
// Fallback to JS if second pass fails (e.g. arg list too long)
}
} else {
// Normal system grep execution
if (include) {
grepArgs.push(`--include=${include}`);
}
if (maxMatchesPerFile) {
grepArgs.push(`-m`, maxMatchesPerFile.toString());
}
grepArgs.push(pattern);
grepArgs.push('.');
const results: GrepMatch[] = [];
try {
const generator = execStreaming('grep', grepArgs, {
cwd: absolutePath,
signal: options.signal,
allowedExitCodes: [0, 1],
});
for await (const line of generator) {
const match = this.parseGrepLine(line, absolutePath);
if (match) {
results.push(match);
if (results.length >= maxMatches) {
break;
}
}
}
return results;
} catch (grepError: unknown) {
if (
grepError instanceof Error &&
/Permission denied|Is a directory/i.test(grepError.message)
) {
return results;
}
debugLogger.debug(
`GrepLogic: System grep failed: ${getErrorMessage(
grepError,
)}. Falling back...`,
);
}
debugLogger.debug(
`GrepLogic: System grep failed: ${getErrorMessage(
grepError,
)}. Falling back...`,
);
}
}
@@ -505,6 +664,7 @@ class GrepToolInvocation extends BaseToolInvocation<
});
const regex = new RegExp(pattern, 'i');
const filterRegex = filter ? new RegExp(filter, 'i') : null;
const allMatches: GrepMatch[] = [];
for await (const filePath of filesStream) {
@@ -521,6 +681,12 @@ class GrepToolInvocation extends BaseToolInvocation<
try {
const content = await fsPromises.readFile(fileAbsolutePath, 'utf8');
// Check filter first if present
if (filterRegex && !filterRegex.test(content)) {
continue;
}
const lines = content.split(/\r?\n/);
let fileMatchCount = 0;
for (let index = 0; index < lines.length; index++) {