From 13251f1e278efa2adf9f68732eb746febe2ecaa2 Mon Sep 17 00:00:00 2001 From: Christian Gunderman Date: Mon, 2 Feb 2026 22:41:09 -0800 Subject: [PATCH] Secondary filter. --- packages/core/src/tools/grep.test.ts | 27 ++- packages/core/src/tools/grep.ts | 272 +++++++++++++++++++++------ 2 files changed, 245 insertions(+), 54 deletions(-) diff --git a/packages/core/src/tools/grep.test.ts b/packages/core/src/tools/grep.test.ts index 5b0ca9442e..24022c5d0f 100644 --- a/packages/core/src/tools/grep.test.ts +++ b/packages/core/src/tools/grep.test.ts @@ -330,9 +330,34 @@ describe('GrepTool', () => { // Count occurrences of match lines in the output // Matches lines start with L: - const matches = result.llmContent.match(/^L\d+:.*world/gm); + const content = + typeof result.llmContent === 'string' ? result.llmContent : ''; + const matches = content.match(/^L\d+:.*world/gm); expect(matches?.length).toBe(2); }, 30000); + + it('should filter files based on the filter parameter', async () => { + // fileA.txt contains "hello" and "world" + // fileB.js contains "hello" but NOT "world" + // sub/fileC.txt contains "world" but NOT "hello" + + const params: GrepToolParams = { + pattern: 'hello', + filter: 'world', + }; + const invocation = grepTool.build(params); + const result = await invocation.execute(abortSignal); + + // Should find matches in fileA.txt because it has both + // Should NOT find matches in fileB.js (has hello, missing world) + // Should NOT find matches in sub/fileC.txt (missing hello, has world) + + const content = + typeof result.llmContent === 'string' ? result.llmContent : ''; + expect(content).toContain('File: fileA.txt'); + expect(content).not.toContain('File: fileB.js'); + expect(content).not.toContain('File: fileC.txt'); + }, 30000); }); describe('multi-directory workspace', () => { diff --git a/packages/core/src/tools/grep.ts b/packages/core/src/tools/grep.ts index e5c90c366f..52cb7475fb 100644 --- a/packages/core/src/tools/grep.ts +++ b/packages/core/src/tools/grep.ts @@ -47,6 +47,11 @@ export interface GrepToolParams { */ include?: string; + /** + * Optional: A string that must be present in the file for it to be included (logical AND). + */ + filter?: string; + /** * Optional: Maximum number of matches to return per file. */ @@ -214,6 +219,7 @@ class GrepToolInvocation extends BaseToolInvocation< pattern: this.params.pattern, path: searchDir, include: this.params.include, + filter: this.params.filter, maxMatches: remainingLimit, maxMatchesPerFile: this.params.max_matches_per_file, signal: timeoutController.signal, @@ -343,6 +349,7 @@ class GrepToolInvocation extends BaseToolInvocation< pattern: string; path: string; // Expects absolute path include?: string; + filter?: string; maxMatches: number; maxMatchesPerFile?: number; signal: AbortSignal; @@ -351,6 +358,7 @@ class GrepToolInvocation extends BaseToolInvocation< pattern, path: absolutePath, include, + filter, maxMatches, maxMatchesPerFile, } = options; @@ -376,33 +384,116 @@ class GrepToolInvocation extends BaseToolInvocation< } try { - const generator = execStreaming('git', gitArgs, { - cwd: absolutePath, - signal: options.signal, - allowedExitCodes: [0, 1], - }); + // If a filter is provided, first find files matching the filter. + // This simulates "grep -l 'filter' | xargs grep 'pattern'". + if (filter) { + const filterArgs = [ + 'grep', + '--untracked', + '-l', // List filenames only + '--ignore-case', + '-E', // Extended regex (optional, but consistent) + filter, + ]; + if (include) { + filterArgs.push('--', include); + } - const results: GrepMatch[] = []; - const matchesPerFile = new Map(); + const filesGenerator = execStreaming('git', filterArgs, { + cwd: absolutePath, + signal: options.signal, + allowedExitCodes: [0, 1], + }); - for await (const line of generator) { - const match = this.parseGrepLine(line, absolutePath); - if (match) { - if (maxMatchesPerFile) { - const count = matchesPerFile.get(match.filePath) || 0; - if (count >= maxMatchesPerFile) { - continue; - } - matchesPerFile.set(match.filePath, count + 1); - } - - results.push(match); - if (results.length >= maxMatches) { - break; + const matchingFiles: string[] = []; + for await (const fileLine of filesGenerator) { + if (fileLine.trim()) { + matchingFiles.push(fileLine.trim()); } } + + if (matchingFiles.length === 0) { + return []; + } + + // If we have files, search ONLY within those files. + // We append the list of files to the main search command. + // Note: This could hit command line length limits if matchingFiles is huge. + // For now, we assume the filter reduces the set significantly. + // git grep expects paths after '--' or just as arguments. + // We need to be careful with '--' if include was used. + // Actually, if we provide explicit file paths, include glob is irrelevant for the second grep. + // So we can drop the include glob and just pass the files. + + // Reset args for the second command + const filteredGitArgs = [ + 'grep', + '--untracked', + '-n', + '-E', + '--ignore-case', + pattern, + '--', + ...matchingFiles, + ]; + + const generator = execStreaming('git', filteredGitArgs, { + cwd: absolutePath, + signal: options.signal, + allowedExitCodes: [0, 1], + }); + + const results: GrepMatch[] = []; + const matchesPerFile = new Map(); + + for await (const line of generator) { + const match = this.parseGrepLine(line, absolutePath); + if (match) { + if (maxMatchesPerFile) { + const count = matchesPerFile.get(match.filePath) || 0; + if (count >= maxMatchesPerFile) { + continue; + } + matchesPerFile.set(match.filePath, count + 1); + } + + results.push(match); + if (results.length >= maxMatches) { + break; + } + } + } + return results; + } else { + // Normal execution without filter + const generator = execStreaming('git', gitArgs, { + cwd: absolutePath, + signal: options.signal, + allowedExitCodes: [0, 1], + }); + + const results: GrepMatch[] = []; + const matchesPerFile = new Map(); + + for await (const line of generator) { + const match = this.parseGrepLine(line, absolutePath); + if (match) { + if (maxMatchesPerFile) { + const count = matchesPerFile.get(match.filePath) || 0; + if (count >= maxMatchesPerFile) { + continue; + } + matchesPerFile.set(match.filePath, count + 1); + } + + results.push(match); + if (results.length >= maxMatches) { + break; + } + } + } + return results; } - return results; } catch (gitError: unknown) { debugLogger.debug( `GrepLogic: git grep failed: ${getErrorMessage( @@ -443,47 +534,115 @@ class GrepToolInvocation extends BaseToolInvocation< }) .filter((dir): dir is string => !!dir); commonExcludes.forEach((dir) => grepArgs.push(`--exclude-dir=${dir}`)); - if (include) { - grepArgs.push(`--include=${include}`); - } - if (maxMatchesPerFile) { - grepArgs.push(`-m`, maxMatchesPerFile.toString()); - } + // If filter is present, we first find matching files using grep -l + if (filter) { + const filterArgs = ['-r', '-l', '-E', '-I']; + commonExcludes.forEach((dir) => + filterArgs.push(`--exclude-dir=${dir}`), + ); + if (include) { + filterArgs.push(`--include=${include}`); + } + filterArgs.push(filter); + filterArgs.push('.'); - grepArgs.push(pattern); - grepArgs.push('.'); - - const results: GrepMatch[] = []; - try { - const generator = execStreaming('grep', grepArgs, { + const filesGenerator = execStreaming('grep', filterArgs, { cwd: absolutePath, signal: options.signal, allowedExitCodes: [0, 1], }); - for await (const line of generator) { - const match = this.parseGrepLine(line, absolutePath); - if (match) { - results.push(match); - if (results.length >= maxMatches) { - break; - } + const matchingFiles: string[] = []; + for await (const fileLine of filesGenerator) { + if (fileLine.trim()) { + matchingFiles.push(fileLine.trim()); } } - return results; - } catch (grepError: unknown) { - if ( - grepError instanceof Error && - /Permission denied|Is a directory/i.test(grepError.message) - ) { - return results; + + if (matchingFiles.length === 0) { + return []; + } + + // Search within the filtered files + const filteredGrepArgs = ['-n', '-H', '-E', '-I']; + if (maxMatchesPerFile) { + filteredGrepArgs.push(`-m`, maxMatchesPerFile.toString()); + } + filteredGrepArgs.push(pattern); + filteredGrepArgs.push(...matchingFiles); + + const results: GrepMatch[] = []; + // execStreaming will handle potentially large arg list by failing if too large, + // but for now we assume it fits or rely on fallback. + // Ideally we'd chunk it. + try { + const generator = execStreaming('grep', filteredGrepArgs, { + cwd: absolutePath, + signal: options.signal, + allowedExitCodes: [0, 1], + }); + + for await (const line of generator) { + const match = this.parseGrepLine(line, absolutePath); + if (match) { + results.push(match); + if (results.length >= maxMatches) { + break; + } + } + } + return results; + } catch (e) { + debugLogger.debug( + `GrepLogic: System grep (second pass) failed: ${getErrorMessage(e)}`, + ); + // Fallback to JS if second pass fails (e.g. arg list too long) + } + } else { + // Normal system grep execution + if (include) { + grepArgs.push(`--include=${include}`); + } + + if (maxMatchesPerFile) { + grepArgs.push(`-m`, maxMatchesPerFile.toString()); + } + + grepArgs.push(pattern); + grepArgs.push('.'); + + const results: GrepMatch[] = []; + try { + const generator = execStreaming('grep', grepArgs, { + cwd: absolutePath, + signal: options.signal, + allowedExitCodes: [0, 1], + }); + + for await (const line of generator) { + const match = this.parseGrepLine(line, absolutePath); + if (match) { + results.push(match); + if (results.length >= maxMatches) { + break; + } + } + } + return results; + } catch (grepError: unknown) { + if ( + grepError instanceof Error && + /Permission denied|Is a directory/i.test(grepError.message) + ) { + return results; + } + debugLogger.debug( + `GrepLogic: System grep failed: ${getErrorMessage( + grepError, + )}. Falling back...`, + ); } - debugLogger.debug( - `GrepLogic: System grep failed: ${getErrorMessage( - grepError, - )}. Falling back...`, - ); } } @@ -505,6 +664,7 @@ class GrepToolInvocation extends BaseToolInvocation< }); const regex = new RegExp(pattern, 'i'); + const filterRegex = filter ? new RegExp(filter, 'i') : null; const allMatches: GrepMatch[] = []; for await (const filePath of filesStream) { @@ -521,6 +681,12 @@ class GrepToolInvocation extends BaseToolInvocation< try { const content = await fsPromises.readFile(fileAbsolutePath, 'utf8'); + + // Check filter first if present + if (filterRegex && !filterRegex.test(content)) { + continue; + } + const lines = content.split(/\r?\n/); let fileMatchCount = 0; for (let index = 0; index < lines.length; index++) {