From 6ca188ad2f00a215110fa8c5c00cdcd1f1acf20c Mon Sep 17 00:00:00 2001 From: Christian Gunderman Date: Wed, 4 Feb 2026 11:55:38 -0800 Subject: [PATCH] revert: remove filter parameter from grep and ripgrep tools --- packages/core/src/tools/grep.test.ts | 23 --- packages/core/src/tools/grep.ts | 272 ++++++--------------------- packages/core/src/tools/ripGrep.ts | 145 ++------------ 3 files changed, 75 insertions(+), 365 deletions(-) diff --git a/packages/core/src/tools/grep.test.ts b/packages/core/src/tools/grep.test.ts index 24022c5d0f..857a960642 100644 --- a/packages/core/src/tools/grep.test.ts +++ b/packages/core/src/tools/grep.test.ts @@ -335,29 +335,6 @@ describe('GrepTool', () => { const matches = content.match(/^L\d+:.*world/gm); expect(matches?.length).toBe(2); }, 30000); - - it('should filter files based on the filter parameter', async () => { - // fileA.txt contains "hello" and "world" - // fileB.js contains "hello" but NOT "world" - // sub/fileC.txt contains "world" but NOT "hello" - - const params: GrepToolParams = { - pattern: 'hello', - filter: 'world', - }; - const invocation = grepTool.build(params); - const result = await invocation.execute(abortSignal); - - // Should find matches in fileA.txt because it has both - // Should NOT find matches in fileB.js (has hello, missing world) - // Should NOT find matches in sub/fileC.txt (missing hello, has world) - - const content = - typeof result.llmContent === 'string' ? result.llmContent : ''; - expect(content).toContain('File: fileA.txt'); - expect(content).not.toContain('File: fileB.js'); - expect(content).not.toContain('File: fileC.txt'); - }, 30000); }); describe('multi-directory workspace', () => { diff --git a/packages/core/src/tools/grep.ts b/packages/core/src/tools/grep.ts index 1012340170..4834b5c04a 100644 --- a/packages/core/src/tools/grep.ts +++ b/packages/core/src/tools/grep.ts @@ -47,11 +47,6 @@ export interface GrepToolParams { */ include?: string; - /** - * Optional: A string that must be present in the file for it to be included (logical AND). - */ - filter?: string; - /** * Optional: Maximum number of matches to return per file. */ @@ -225,7 +220,6 @@ class GrepToolInvocation extends BaseToolInvocation< pattern: this.params.pattern, path: searchDir, include: this.params.include, - filter: this.params.filter, maxMatches: remainingLimit, maxMatchesPerFile: this.params.max_matches_per_file, signal: timeoutController.signal, @@ -355,7 +349,6 @@ class GrepToolInvocation extends BaseToolInvocation< pattern: string; path: string; // Expects absolute path include?: string; - filter?: string; maxMatches: number; maxMatchesPerFile?: number; signal: AbortSignal; @@ -364,7 +357,6 @@ class GrepToolInvocation extends BaseToolInvocation< pattern, path: absolutePath, include, - filter, maxMatches, maxMatchesPerFile, } = options; @@ -390,116 +382,34 @@ class GrepToolInvocation extends BaseToolInvocation< } try { - // If a filter is provided, first find files matching the filter. - // This simulates "grep -l 'filter' | xargs grep 'pattern'". - if (filter) { - const filterArgs = [ - 'grep', - '--untracked', - '-l', // List filenames only - '--ignore-case', - '-E', // Extended regex (optional, but consistent) - filter, - ]; - if (include) { - filterArgs.push('--', include); - } + // Normal execution without filter + const generator = execStreaming('git', gitArgs, { + cwd: absolutePath, + signal: options.signal, + allowedExitCodes: [0, 1], + }); - const filesGenerator = execStreaming('git', filterArgs, { - cwd: absolutePath, - signal: options.signal, - allowedExitCodes: [0, 1], - }); + const results: GrepMatch[] = []; + const matchesPerFile = new Map(); - const matchingFiles: string[] = []; - for await (const fileLine of filesGenerator) { - if (fileLine.trim()) { - matchingFiles.push(fileLine.trim()); + for await (const line of generator) { + const match = this.parseGrepLine(line, absolutePath); + if (match) { + if (maxMatchesPerFile) { + const count = matchesPerFile.get(match.filePath) || 0; + if (count >= maxMatchesPerFile) { + continue; + } + matchesPerFile.set(match.filePath, count + 1); + } + + results.push(match); + if (results.length >= maxMatches) { + break; } } - - if (matchingFiles.length === 0) { - return []; - } - - // If we have files, search ONLY within those files. - // We append the list of files to the main search command. - // Note: This could hit command line length limits if matchingFiles is huge. - // For now, we assume the filter reduces the set significantly. - // git grep expects paths after '--' or just as arguments. - // We need to be careful with '--' if include was used. - // Actually, if we provide explicit file paths, include glob is irrelevant for the second grep. - // So we can drop the include glob and just pass the files. - - // Reset args for the second command - const filteredGitArgs = [ - 'grep', - '--untracked', - '-n', - '-E', - '--ignore-case', - pattern, - '--', - ...matchingFiles, - ]; - - const generator = execStreaming('git', filteredGitArgs, { - cwd: absolutePath, - signal: options.signal, - allowedExitCodes: [0, 1], - }); - - const results: GrepMatch[] = []; - const matchesPerFile = new Map(); - - for await (const line of generator) { - const match = this.parseGrepLine(line, absolutePath); - if (match) { - if (maxMatchesPerFile) { - const count = matchesPerFile.get(match.filePath) || 0; - if (count >= maxMatchesPerFile) { - continue; - } - matchesPerFile.set(match.filePath, count + 1); - } - - results.push(match); - if (results.length >= maxMatches) { - break; - } - } - } - return results; - } else { - // Normal execution without filter - const generator = execStreaming('git', gitArgs, { - cwd: absolutePath, - signal: options.signal, - allowedExitCodes: [0, 1], - }); - - const results: GrepMatch[] = []; - const matchesPerFile = new Map(); - - for await (const line of generator) { - const match = this.parseGrepLine(line, absolutePath); - if (match) { - if (maxMatchesPerFile) { - const count = matchesPerFile.get(match.filePath) || 0; - if (count >= maxMatchesPerFile) { - continue; - } - matchesPerFile.set(match.filePath, count + 1); - } - - results.push(match); - if (results.length >= maxMatches) { - break; - } - } - } - return results; } + return results; } catch (gitError: unknown) { debugLogger.debug( `GrepLogic: git grep failed: ${getErrorMessage( @@ -541,114 +451,48 @@ class GrepToolInvocation extends BaseToolInvocation< .filter((dir): dir is string => !!dir); commonExcludes.forEach((dir) => grepArgs.push(`--exclude-dir=${dir}`)); - // If filter is present, we first find matching files using grep -l - if (filter) { - const filterArgs = ['-r', '-l', '-E', '-I']; - commonExcludes.forEach((dir) => - filterArgs.push(`--exclude-dir=${dir}`), - ); - if (include) { - filterArgs.push(`--include=${include}`); - } - filterArgs.push(filter); - filterArgs.push('.'); + // Normal system grep execution + if (include) { + grepArgs.push(`--include=${include}`); + } - const filesGenerator = execStreaming('grep', filterArgs, { + if (maxMatchesPerFile) { + grepArgs.push(`-m`, maxMatchesPerFile.toString()); + } + + grepArgs.push(pattern); + grepArgs.push('.'); + + const results: GrepMatch[] = []; + try { + const generator = execStreaming('grep', grepArgs, { cwd: absolutePath, signal: options.signal, allowedExitCodes: [0, 1], }); - const matchingFiles: string[] = []; - for await (const fileLine of filesGenerator) { - if (fileLine.trim()) { - matchingFiles.push(fileLine.trim()); - } - } - - if (matchingFiles.length === 0) { - return []; - } - - // Search within the filtered files - const filteredGrepArgs = ['-n', '-H', '-E', '-I']; - if (maxMatchesPerFile) { - filteredGrepArgs.push(`-m`, maxMatchesPerFile.toString()); - } - filteredGrepArgs.push(pattern); - filteredGrepArgs.push(...matchingFiles); - - const results: GrepMatch[] = []; - // execStreaming will handle potentially large arg list by failing if too large, - // but for now we assume it fits or rely on fallback. - // Ideally we'd chunk it. - try { - const generator = execStreaming('grep', filteredGrepArgs, { - cwd: absolutePath, - signal: options.signal, - allowedExitCodes: [0, 1], - }); - - for await (const line of generator) { - const match = this.parseGrepLine(line, absolutePath); - if (match) { - results.push(match); - if (results.length >= maxMatches) { - break; - } + for await (const line of generator) { + const match = this.parseGrepLine(line, absolutePath); + if (match) { + results.push(match); + if (results.length >= maxMatches) { + break; } } + } + return results; + } catch (grepError: unknown) { + if ( + grepError instanceof Error && + /Permission denied|Is a directory/i.test(grepError.message) + ) { return results; - } catch (e) { - debugLogger.debug( - `GrepLogic: System grep (second pass) failed: ${getErrorMessage(e)}`, - ); - // Fallback to JS if second pass fails (e.g. arg list too long) - } - } else { - // Normal system grep execution - if (include) { - grepArgs.push(`--include=${include}`); - } - - if (maxMatchesPerFile) { - grepArgs.push(`-m`, maxMatchesPerFile.toString()); - } - - grepArgs.push(pattern); - grepArgs.push('.'); - - const results: GrepMatch[] = []; - try { - const generator = execStreaming('grep', grepArgs, { - cwd: absolutePath, - signal: options.signal, - allowedExitCodes: [0, 1], - }); - - for await (const line of generator) { - const match = this.parseGrepLine(line, absolutePath); - if (match) { - results.push(match); - if (results.length >= maxMatches) { - break; - } - } - } - return results; - } catch (grepError: unknown) { - if ( - grepError instanceof Error && - /Permission denied|Is a directory/i.test(grepError.message) - ) { - return results; - } - debugLogger.debug( - `GrepLogic: System grep failed: ${getErrorMessage( - grepError, - )}. Falling back...`, - ); } + debugLogger.debug( + `GrepLogic: System grep failed: ${getErrorMessage( + grepError, + )}. Falling back...`, + ); } } @@ -670,7 +514,6 @@ class GrepToolInvocation extends BaseToolInvocation< }); const regex = new RegExp(pattern, 'i'); - const filterRegex = filter ? new RegExp(filter, 'i') : null; const allMatches: GrepMatch[] = []; for await (const filePath of filesStream) { @@ -688,11 +531,6 @@ class GrepToolInvocation extends BaseToolInvocation< try { const content = await fsPromises.readFile(fileAbsolutePath, 'utf8'); - // Check filter first if present - if (filterRegex && !filterRegex.test(content)) { - continue; - } - const lines = content.split(/\r?\n/); let fileMatchCount = 0; for (let index = 0; index < lines.length; index++) { diff --git a/packages/core/src/tools/ripGrep.ts b/packages/core/src/tools/ripGrep.ts index 2937112846..be7fbb1665 100644 --- a/packages/core/src/tools/ripGrep.ts +++ b/packages/core/src/tools/ripGrep.ts @@ -102,11 +102,6 @@ export interface RipGrepToolParams { */ include?: string; - /** - * Optional: A string that must be present in the file for it to be included (logical AND). - */ - filter?: string; - /** * If true, searches case-sensitively. Defaults to false. */ @@ -245,7 +240,6 @@ class GrepToolInvocation extends BaseToolInvocation< pattern: this.params.pattern, path: searchDirAbs, include: this.params.include, - filter: this.params.filter, case_sensitive: this.params.case_sensitive, fixed_strings: this.params.fixed_strings, context: this.params.context, @@ -331,7 +325,6 @@ class GrepToolInvocation extends BaseToolInvocation< pattern: string; path: string; include?: string; - filter?: string; case_sensitive?: boolean; fixed_strings?: boolean; context?: number; @@ -346,7 +339,6 @@ class GrepToolInvocation extends BaseToolInvocation< pattern, path: absolutePath, include, - filter, case_sensitive, fixed_strings, context, @@ -359,86 +351,6 @@ class GrepToolInvocation extends BaseToolInvocation< const rgPath = await ensureRgPath(); - // If filter is provided, first find files matching the filter. - // This simulates "rg -l 'filter' | xargs rg 'pattern'". - let matchingFiles: string[] | undefined; - - if (filter) { - const filterArgs = ['--files-with-matches', '--json']; - if (!case_sensitive) { - filterArgs.push('--ignore-case'); - } - // Treat filter as literal string if fixed_strings is true, or maybe we should always treat filter as regex? - // The prompt says "must_contain" or "pattern_b". - // Let's assume filter follows same rules as pattern (regex unless fixed_strings). - if (fixed_strings) { - filterArgs.push('--fixed-strings'); - filterArgs.push(filter); - } else { - filterArgs.push('--regexp', filter); - } - - if (no_ignore) { - filterArgs.push('--no-ignore'); - } - if (include) { - filterArgs.push('--glob', include); - } - - if (!no_ignore) { - // Add ignore patterns - const fileExclusions = new FileExclusions(this.config); - const excludes = fileExclusions.getGlobExcludes([ - ...COMMON_DIRECTORY_EXCLUDES, - '*.log', - '*.tmp', - ]); - excludes.forEach((exclude) => { - filterArgs.push('--glob', `!${exclude}`); - }); - - const geminiIgnorePaths = - this.fileDiscoveryService.getIgnoreFilePaths(); - for (const ignorePath of geminiIgnorePaths) { - filterArgs.push('--ignore-file', ignorePath); - } - } - - filterArgs.push('--threads', '4'); - filterArgs.push(absolutePath); - - try { - const generator = execStreaming(rgPath, filterArgs, { - signal: options.signal, - allowedExitCodes: [0, 1], - }); - - matchingFiles = []; - for await (const line of generator) { - try { - const json = JSON.parse(line); - if (json.type === 'match') { - const match = json.data; - if (match.path?.text) { - matchingFiles.push(match.path.text); - } - } - } catch (_e) { - // Ignore parse errors - } - } - - if (matchingFiles.length === 0) { - return []; - } - } catch (error: unknown) { - debugLogger.debug( - `GrepLogic: ripgrep filter failed: ${getErrorMessage(error)}`, - ); - throw error; - } - } - const rgArgs = ['--json']; if (!case_sensitive) { @@ -469,40 +381,28 @@ class GrepToolInvocation extends BaseToolInvocation< rgArgs.push('--max-count', maxMatchesPerFile.toString()); } - // If we have matchingFiles, we pass them as arguments. - // Otherwise we use include/exclude args and the path. - if (matchingFiles) { - // We don't need --glob include here as we are passing specific files that already matched the include in the filter step. - // We also don't need ignore files/patterns as we are passing explicit files. - // However, we should check command line length. - // If matchingFiles is very large, we might need to batch? - // For now, let's just spread them. - rgArgs.push(...matchingFiles); - } else { - if (include) { - rgArgs.push('--glob', include); - } - - if (!no_ignore) { - const fileExclusions = new FileExclusions(this.config); - const excludes = fileExclusions.getGlobExcludes([ - ...COMMON_DIRECTORY_EXCLUDES, - '*.log', - '*.tmp', - ]); - excludes.forEach((exclude) => { - rgArgs.push('--glob', `!${exclude}`); - }); - - const geminiIgnorePaths = - this.fileDiscoveryService.getIgnoreFilePaths(); - for (const ignorePath of geminiIgnorePaths) { - rgArgs.push('--ignore-file', ignorePath); - } - } - rgArgs.push(absolutePath); + if (include) { + rgArgs.push('--glob', include); } + if (!no_ignore) { + const fileExclusions = new FileExclusions(this.config); + const excludes = fileExclusions.getGlobExcludes([ + ...COMMON_DIRECTORY_EXCLUDES, + '*.log', + '*.tmp', + ]); + excludes.forEach((exclude) => { + rgArgs.push('--glob', `!${exclude}`); + }); + + const geminiIgnorePaths = this.fileDiscoveryService.getIgnoreFilePaths(); + for (const ignorePath of geminiIgnorePaths) { + rgArgs.push('--ignore-file', ignorePath); + } + } + rgArgs.push(absolutePath); + // Add threads rgArgs.push('--threads', '4'); @@ -632,11 +532,6 @@ export class RipGrepTool extends BaseDeclarativeTool< "Glob pattern to filter files (e.g., '*.ts', 'src/**'). Recommended for large repositories to reduce noise. Defaults to all files if omitted.", type: 'string', }, - filter: { - description: - 'Optional: A string that must be present in the file for it to be included (logical AND). Use this to filter results to files containing both the `pattern` and this filter.', - type: 'string', - }, case_sensitive: { description: 'If true, search is case-sensitive. Defaults to false (ignore case) if omitted.',