diff --git a/packages/core/src/tools/constants.ts b/packages/core/src/tools/constants.ts new file mode 100644 index 0000000000..132e8c104a --- /dev/null +++ b/packages/core/src/tools/constants.ts @@ -0,0 +1,7 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ +export const DEFAULT_TOTAL_MAX_MATCHES = 20000; +export const DEFAULT_SEARCH_TIMEOUT_MS = 30000; diff --git a/packages/core/src/tools/grep.test.ts b/packages/core/src/tools/grep.test.ts index 7c9f224feb..0f0db86665 100644 --- a/packages/core/src/tools/grep.test.ts +++ b/packages/core/src/tools/grep.test.ts @@ -7,6 +7,7 @@ import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest'; import type { GrepToolParams } from './grep.js'; import { GrepTool } from './grep.js'; +import type { ToolResult } from './tools.js'; import path from 'node:path'; import fs from 'node:fs/promises'; import os from 'node:os'; @@ -15,8 +16,12 @@ import { createMockWorkspaceContext } from '../test-utils/mockWorkspaceContext.j import { ToolErrorType } from './tool-error.js'; import * as glob from 'glob'; import { createMockMessageBus } from '../test-utils/mock-message-bus.js'; +import { execStreaming } from '../utils/shell-utils.js'; vi.mock('glob', { spy: true }); +vi.mock('../utils/shell-utils.js', () => ({ + execStreaming: vi.fn(), +})); // Mock the child_process module to control grep/git grep behavior vi.mock('child_process', () => ({ @@ -129,6 +134,14 @@ describe('GrepTool', () => { }); }); + function createLineGenerator(lines: string[]): AsyncGenerator { + return (async function* () { + for (const line of lines) { + yield line; + } + })(); + } + describe('execute', () => { it('should find matches for a simple pattern in all files', async () => { const params: GrepToolParams = { pattern: 'world' }; @@ -147,6 +160,35 @@ describe('GrepTool', () => { expect(result.returnDisplay).toBe('Found 3 matches'); }, 30000); + it('should include files that start with ".." in JS fallback', async () => { + await fs.writeFile(path.join(tempRootDir, '..env'), 'world in ..env'); + const params: GrepToolParams = { pattern: 'world' }; + const invocation = grepTool.build(params); + const result = await invocation.execute(abortSignal); + expect(result.llmContent).toContain('File: ..env'); + expect(result.llmContent).toContain('L1: world in ..env'); + }); + + it('should ignore system grep output that escapes base path', async () => { + vi.mocked(execStreaming).mockImplementationOnce(() => + createLineGenerator(['..env:1:hello', '../secret.txt:2:leak']), + ); + + const params: GrepToolParams = { pattern: 'hello' }; + const invocation = grepTool.build(params) as unknown as { + isCommandAvailable: (command: string) => Promise; + execute: (signal: AbortSignal) => Promise; + }; + invocation.isCommandAvailable = vi.fn( + async (command: string) => command === 'grep', + ); + + const result = await invocation.execute(abortSignal); + expect(result.llmContent).toContain('File: ..env'); + expect(result.llmContent).toContain('L1: hello'); + expect(result.llmContent).not.toContain('secret.txt'); + }); + it('should find matches in a specific path', async () => { const params: GrepToolParams = { pattern: 'world', dir_path: 'sub' }; const invocation = grepTool.build(params); diff --git a/packages/core/src/tools/grep.ts b/packages/core/src/tools/grep.ts index 3fbbb141d6..ed4fdcb93a 100644 --- a/packages/core/src/tools/grep.ts +++ b/packages/core/src/tools/grep.ts @@ -8,10 +8,14 @@ import type { MessageBus } from '../confirmation-bus/message-bus.js'; import fs from 'node:fs'; import fsPromises from 'node:fs/promises'; import path from 'node:path'; -import { EOL } from 'node:os'; import { spawn } from 'node:child_process'; import { globStream } from 'glob'; import type { ToolInvocation, ToolResult } from './tools.js'; +import { execStreaming } from '../utils/shell-utils.js'; +import { + DEFAULT_TOTAL_MAX_MATCHES, + DEFAULT_SEARCH_TIMEOUT_MS, +} from './constants.js'; import { BaseDeclarativeTool, BaseToolInvocation, Kind } from './tools.js'; import { makeRelative, shortenPath } from '../utils/paths.js'; import { getErrorMessage, isNodeError } from '../utils/errors.js'; @@ -111,6 +115,47 @@ class GrepToolInvocation extends BaseToolInvocation< return targetPath; } + /** + * Parses a single line of grep-like output (git grep, system grep). + * Expects format: filePath:lineNumber:lineContent + * @param {string} line The line to parse. + * @param {string} basePath The absolute directory for path resolution. + * @returns {GrepMatch | null} Parsed match or null if malformed. + */ + private parseGrepLine(line: string, basePath: string): GrepMatch | null { + if (!line.trim()) return null; + + // Use regex to locate the first occurrence of :: + // This allows filenames to contain colons, as long as they don't look like :: + // Note: This regex assumes filenames do not contain colons, or at least not followed by digits. + const match = line.match(/^(.+?):(\d+):(.*)$/); + if (!match) return null; + + const [, filePathRaw, lineNumberStr, lineContent] = match; + const lineNumber = parseInt(lineNumberStr, 10); + + if (!isNaN(lineNumber)) { + const absoluteFilePath = path.resolve(basePath, filePathRaw); + const relativeCheck = path.relative(basePath, absoluteFilePath); + if ( + relativeCheck === '..' || + relativeCheck.startsWith(`..${path.sep}`) || + path.isAbsolute(relativeCheck) + ) { + return null; + } + + const relativeFilePath = path.relative(basePath, absoluteFilePath); + + return { + filePath: relativeFilePath || path.basename(absoluteFilePath), + lineNumber, + line: lineContent, + }; + } + return null; + } + async execute(signal: AbortSignal): Promise { try { const workspaceContext = this.config.getWorkspaceContext(); @@ -129,23 +174,48 @@ class GrepToolInvocation extends BaseToolInvocation< // Collect matches from all search directories let allMatches: GrepMatch[] = []; - for (const searchDir of searchDirectories) { - const matches = await this.performGrepSearch({ - pattern: this.params.pattern, - path: searchDir, - include: this.params.include, - signal, - }); + const totalMaxMatches = DEFAULT_TOTAL_MAX_MATCHES; - // Add directory prefix if searching multiple directories - if (searchDirectories.length > 1) { - const dirName = path.basename(searchDir); - matches.forEach((match) => { - match.filePath = path.join(dirName, match.filePath); + // Create a timeout controller to prevent indefinitely hanging searches + const timeoutController = new AbortController(); + const timeoutId = setTimeout(() => { + timeoutController.abort(); + }, DEFAULT_SEARCH_TIMEOUT_MS); + + // Link the passed signal to our timeout controller + const onAbort = () => timeoutController.abort(); + if (signal.aborted) { + onAbort(); + } else { + signal.addEventListener('abort', onAbort, { once: true }); + } + + try { + for (const searchDir of searchDirectories) { + const remainingLimit = totalMaxMatches - allMatches.length; + if (remainingLimit <= 0) break; + + const matches = await this.performGrepSearch({ + pattern: this.params.pattern, + path: searchDir, + include: this.params.include, + maxMatches: remainingLimit, + signal: timeoutController.signal, }); - } - allMatches = allMatches.concat(matches); + // Add directory prefix if searching multiple directories + if (searchDirectories.length > 1) { + const dirName = path.basename(searchDir); + matches.forEach((match) => { + match.filePath = path.join(dirName, match.filePath); + }); + } + + allMatches = allMatches.concat(matches); + } + } finally { + clearTimeout(timeoutId); + signal.removeEventListener('abort', onAbort); } let searchLocationDescription: string; @@ -164,6 +234,8 @@ class GrepToolInvocation extends BaseToolInvocation< return { llmContent: noMatchMsg, returnDisplay: `No matches found` }; } + const wasTruncated = allMatches.length >= totalMaxMatches; + // Group matches by file const matchesByFile = allMatches.reduce( (acc, match) => { @@ -181,9 +253,7 @@ class GrepToolInvocation extends BaseToolInvocation< const matchCount = allMatches.length; const matchTerm = matchCount === 1 ? 'match' : 'matches'; - let llmContent = `Found ${matchCount} ${matchTerm} for pattern "${this.params.pattern}" ${searchLocationDescription}${this.params.include ? ` (filter: "${this.params.include}")` : ''}: ---- -`; + let llmContent = `Found ${matchCount} ${matchTerm} for pattern "${this.params.pattern}" ${searchLocationDescription}${this.params.include ? ` (filter: "${this.params.include}")` : ''}${wasTruncated ? ` (results limited to ${totalMaxMatches} matches for performance)` : ''}:\n---\n`; for (const filePath in matchesByFile) { llmContent += `File: ${filePath}\n`; @@ -196,7 +266,7 @@ class GrepToolInvocation extends BaseToolInvocation< return { llmContent: llmContent.trim(), - returnDisplay: `Found ${matchCount} ${matchTerm}`, + returnDisplay: `Found ${matchCount} ${matchTerm}${wasTruncated ? ' (limited)' : ''}`, }; } catch (error) { debugLogger.warn(`Error during GrepLogic execution: ${error}`); @@ -241,92 +311,6 @@ class GrepToolInvocation extends BaseToolInvocation< }); } - /** - * Parses the standard output of grep-like commands (git grep, system grep). - * Expects format: filePath:lineNumber:lineContent - * Handles colons within file paths and line content correctly. - * @param {string} output The raw stdout string. - * @param {string} basePath The absolute directory the search was run from, for relative paths. - * @returns {GrepMatch[]} Array of match objects. - */ - private parseGrepOutput(output: string, basePath: string): GrepMatch[] { - const results: GrepMatch[] = []; - if (!output) return results; - - const lines = output.split(EOL); // Use OS-specific end-of-line - - for (const line of lines) { - if (!line.trim()) continue; - - // Find the index of the first colon. - const firstColonIndex = line.indexOf(':'); - if (firstColonIndex === -1) continue; // Malformed - - // Find the index of the second colon, searching *after* the first one. - const secondColonIndex = line.indexOf(':', firstColonIndex + 1); - if (secondColonIndex === -1) continue; // Malformed - - // Extract parts based on the found colon indices - const filePathRaw = line.substring(0, firstColonIndex); - const lineNumberStr = line.substring( - firstColonIndex + 1, - secondColonIndex, - ); - const lineContent = line.substring(secondColonIndex + 1); - - const lineNumber = parseInt(lineNumberStr, 10); - - if (!isNaN(lineNumber)) { - const absoluteFilePath = path.resolve(basePath, filePathRaw); - const relativeFilePath = path.relative(basePath, absoluteFilePath); - - results.push({ - filePath: relativeFilePath || path.basename(absoluteFilePath), - lineNumber, - line: lineContent, - }); - } - } - return results; - } - - /** - * Gets a description of the grep operation - * @returns A string describing the grep - */ - getDescription(): string { - let description = `'${this.params.pattern}'`; - if (this.params.include) { - description += ` in ${this.params.include}`; - } - if (this.params.dir_path) { - const resolvedPath = path.resolve( - this.config.getTargetDir(), - this.params.dir_path, - ); - if ( - resolvedPath === this.config.getTargetDir() || - this.params.dir_path === '.' - ) { - description += ` within ./`; - } else { - const relativePath = makeRelative( - resolvedPath, - this.config.getTargetDir(), - ); - description += ` within ${shortenPath(relativePath)}`; - } - } else { - // When no path is specified, indicate searching all workspace directories - const workspaceContext = this.config.getWorkspaceContext(); - const directories = workspaceContext.getDirectories(); - if (directories.length > 1) { - description += ` across all workspace directories`; - } - } - return description; - } - /** * Performs the actual search using the prioritized strategies. * @param options Search options including pattern, absolute path, and include glob. @@ -336,9 +320,10 @@ class GrepToolInvocation extends BaseToolInvocation< pattern: string; path: string; // Expects absolute path include?: string; + maxMatches: number; signal: AbortSignal; }): Promise { - const { pattern, path: absolutePath, include } = options; + const { pattern, path: absolutePath, include, maxMatches } = options; let strategyUsed = 'none'; try { @@ -361,32 +346,23 @@ class GrepToolInvocation extends BaseToolInvocation< } try { - const output = await new Promise((resolve, reject) => { - const child = spawn('git', gitArgs, { - cwd: absolutePath, - windowsHide: true, - }); - const stdoutChunks: Buffer[] = []; - const stderrChunks: Buffer[] = []; - - child.stdout.on('data', (chunk) => stdoutChunks.push(chunk)); - child.stderr.on('data', (chunk) => stderrChunks.push(chunk)); - child.on('error', (err) => - reject(new Error(`Failed to start git grep: ${err.message}`)), - ); - child.on('close', (code) => { - const stdoutData = Buffer.concat(stdoutChunks).toString('utf8'); - const stderrData = Buffer.concat(stderrChunks).toString('utf8'); - if (code === 0) resolve(stdoutData); - else if (code === 1) - resolve(''); // No matches - else - reject( - new Error(`git grep exited with code ${code}: ${stderrData}`), - ); - }); + const generator = execStreaming('git', gitArgs, { + cwd: absolutePath, + signal: options.signal, + allowedExitCodes: [0, 1], }); - return this.parseGrepOutput(output, absolutePath); + + const results: GrepMatch[] = []; + for await (const line of generator) { + const match = this.parseGrepLine(line, absolutePath); + if (match) { + results.push(match); + if (results.length >= maxMatches) { + break; + } + } + } + return results; } catch (gitError: unknown) { debugLogger.debug( `GrepLogic: git grep failed: ${getErrorMessage( @@ -433,67 +409,31 @@ class GrepToolInvocation extends BaseToolInvocation< grepArgs.push(pattern); grepArgs.push('.'); + const results: GrepMatch[] = []; try { - const output = await new Promise((resolve, reject) => { - const child = spawn('grep', grepArgs, { - cwd: absolutePath, - windowsHide: true, - }); - const stdoutChunks: Buffer[] = []; - const stderrChunks: Buffer[] = []; - - const onData = (chunk: Buffer) => stdoutChunks.push(chunk); - const onStderr = (chunk: Buffer) => { - const stderrStr = chunk.toString(); - // Suppress common harmless stderr messages - if ( - !stderrStr.includes('Permission denied') && - !/grep:.*: Is a directory/i.test(stderrStr) - ) { - stderrChunks.push(chunk); - } - }; - const onError = (err: Error) => { - cleanup(); - reject(new Error(`Failed to start system grep: ${err.message}`)); - }; - const onClose = (code: number | null) => { - const stdoutData = Buffer.concat(stdoutChunks).toString('utf8'); - const stderrData = Buffer.concat(stderrChunks) - .toString('utf8') - .trim(); - cleanup(); - if (code === 0) resolve(stdoutData); - else if (code === 1) - resolve(''); // No matches - else { - if (stderrData) - reject( - new Error( - `System grep exited with code ${code}: ${stderrData}`, - ), - ); - else resolve(''); // Exit code > 1 but no stderr, likely just suppressed errors - } - }; - - const cleanup = () => { - child.stdout.removeListener('data', onData); - child.stderr.removeListener('data', onStderr); - child.removeListener('error', onError); - child.removeListener('close', onClose); - if (child.connected) { - child.disconnect(); - } - }; - - child.stdout.on('data', onData); - child.stderr.on('data', onStderr); - child.on('error', onError); - child.on('close', onClose); + const generator = execStreaming('grep', grepArgs, { + cwd: absolutePath, + signal: options.signal, + allowedExitCodes: [0, 1], }); - return this.parseGrepOutput(output, absolutePath); + + for await (const line of generator) { + const match = this.parseGrepLine(line, absolutePath); + if (match) { + results.push(match); + if (results.length >= maxMatches) { + break; + } + } + } + return results; } catch (grepError: unknown) { + if ( + grepError instanceof Error && + /Permission denied|Is a directory/i.test(grepError.message) + ) { + return results; + } debugLogger.debug( `GrepLogic: System grep failed: ${getErrorMessage( grepError, @@ -523,11 +463,22 @@ class GrepToolInvocation extends BaseToolInvocation< const allMatches: GrepMatch[] = []; for await (const filePath of filesStream) { + if (allMatches.length >= maxMatches) break; const fileAbsolutePath = filePath; + // security check + const relativePath = path.relative(absolutePath, fileAbsolutePath); + if ( + relativePath === '..' || + relativePath.startsWith(`..${path.sep}`) || + path.isAbsolute(relativePath) + ) + continue; + try { const content = await fsPromises.readFile(fileAbsolutePath, 'utf8'); const lines = content.split(/\r?\n/); - lines.forEach((line, index) => { + for (let index = 0; index < lines.length; index++) { + const line = lines[index]; if (regex.test(line)) { allMatches.push({ filePath: @@ -536,8 +487,9 @@ class GrepToolInvocation extends BaseToolInvocation< lineNumber: index + 1, line, }); + if (allMatches.length >= maxMatches) break; } - }); + } } catch (readError: unknown) { // Ignore errors like permission denied or file gone during read if (!isNodeError(readError) || readError.code !== 'ENOENT') { @@ -560,9 +512,40 @@ class GrepToolInvocation extends BaseToolInvocation< throw error; // Re-throw } } -} -// --- GrepLogic Class --- + getDescription(): string { + let description = `'${this.params.pattern}'`; + if (this.params.include) { + description += ` in ${this.params.include}`; + } + if (this.params.dir_path) { + const resolvedPath = path.resolve( + this.config.getTargetDir(), + this.params.dir_path, + ); + if ( + resolvedPath === this.config.getTargetDir() || + this.params.dir_path === '.' + ) { + description += ` within ./`; + } else { + const relativePath = makeRelative( + resolvedPath, + this.config.getTargetDir(), + ); + description += ` within ${shortenPath(relativePath)}`; + } + } else { + // When no path is specified, indicate searching all workspace directories + const workspaceContext = this.config.getWorkspaceContext(); + const directories = workspaceContext.getDirectories(); + if (directories.length > 1) { + description += ` across all workspace directories`; + } + } + return description; + } +} /** * Implementation of the Grep tool logic (moved from CLI) @@ -581,8 +564,7 @@ export class GrepTool extends BaseDeclarativeTool { { properties: { pattern: { - description: - "The regular expression (regex) pattern to search for within file contents (e.g., 'function\\s+myFunction', 'import\\s+\\{.*\\}\\s+from\\s+.*').", + description: `The regular expression (regex) pattern to search for within file contents (e.g., 'function\\s+myFunction', 'import\\s+\\{.*\\}\\s+from\\s+.*').`, type: 'string', }, dir_path: { @@ -591,8 +573,7 @@ export class GrepTool extends BaseDeclarativeTool { type: 'string', }, include: { - description: - "Optional: A glob pattern to filter which files are searched (e.g., '*.js', '*.{ts,tsx}', 'src/**'). If omitted, searches all files (respecting potential global ignores).", + description: `Optional: A glob pattern to filter which files are searched (e.g., '*.js', '*.{ts,tsx}', 'src/**'). If omitted, searches all files (respecting potential global ignores).`, type: 'string', }, }, diff --git a/packages/core/src/tools/ripGrep.test.ts b/packages/core/src/tools/ripGrep.test.ts index 415db097e3..4c27dde5b1 100644 --- a/packages/core/src/tools/ripGrep.test.ts +++ b/packages/core/src/tools/ripGrep.test.ts @@ -23,6 +23,8 @@ import { Storage } from '../config/storage.js'; import { createMockWorkspaceContext } from '../test-utils/mockWorkspaceContext.js'; import type { ChildProcess } from 'node:child_process'; import { spawn } from 'node:child_process'; +import { PassThrough, Readable } from 'node:stream'; +import EventEmitter from 'node:events'; import { downloadRipGrep } from '@joshua.litt/get-ripgrep'; import { createMockMessageBus } from '../test-utils/mock-message-bus.js'; // Mock dependencies for canUseRipgrep @@ -197,47 +199,43 @@ describe('ensureRgPath', () => { function createMockSpawn( options: { outputData?: string; - exitCode?: number; + exitCode?: number | null; signal?: string; } = {}, ) { const { outputData, exitCode = 0, signal } = options; return () => { - const mockProcess = { - stdout: { - on: vi.fn(), - removeListener: vi.fn(), + // strict Readable implementation + let pushed = false; + const stdout = new Readable({ + read() { + if (!pushed) { + if (outputData) { + this.push(outputData); + } + this.push(null); // EOF + pushed = true; + } }, - stderr: { - on: vi.fn(), - removeListener: vi.fn(), - }, - on: vi.fn(), - removeListener: vi.fn(), - kill: vi.fn(), - }; + }); - // Set up event listeners immediately + const stderr = new PassThrough(); + const mockProcess = new EventEmitter() as ChildProcess; + mockProcess.stdout = stdout as unknown as Readable; + mockProcess.stderr = stderr; + mockProcess.kill = vi.fn(); + // @ts-expect-error - mocking private/internal property + mockProcess.killed = false; + // @ts-expect-error - mocking private/internal property + mockProcess.exitCode = null; + + // Emulating process exit setTimeout(() => { - const stdoutDataHandler = mockProcess.stdout.on.mock.calls.find( - (call) => call[0] === 'data', - )?.[1]; + mockProcess.emit('close', exitCode, signal); + }, 10); - const closeHandler = mockProcess.on.mock.calls.find( - (call) => call[0] === 'close', - )?.[1]; - - if (stdoutDataHandler && outputData) { - stdoutDataHandler(Buffer.from(outputData)); - } - - if (closeHandler) { - closeHandler(exitCode, signal); - } - }, 0); - - return mockProcess as unknown as ChildProcess; + return mockProcess; }; } @@ -406,6 +404,40 @@ describe('RipGrepTool', () => { expect(result.returnDisplay).toBe('Found 3 matches'); }); + it('should ignore matches that escape the base path', async () => { + mockSpawn.mockImplementationOnce( + createMockSpawn({ + outputData: + JSON.stringify({ + type: 'match', + data: { + path: { text: '..env' }, + line_number: 1, + lines: { text: 'world in ..env\n' }, + }, + }) + + '\n' + + JSON.stringify({ + type: 'match', + data: { + path: { text: '../secret.txt' }, + line_number: 1, + lines: { text: 'leak\n' }, + }, + }) + + '\n', + exitCode: 0, + }), + ); + + const params: RipGrepToolParams = { pattern: 'world' }; + const invocation = grepTool.build(params); + const result = await invocation.execute(abortSignal); + expect(result.llmContent).toContain('File: ..env'); + expect(result.llmContent).toContain('L1: world in ..env'); + expect(result.llmContent).not.toContain('secret.txt'); + }); + it('should find matches in a specific path', async () => { // Setup specific mock for this test - searching in 'sub' should only return matches from that directory mockSpawn.mockImplementationOnce( @@ -471,51 +503,20 @@ describe('RipGrepTool', () => { ); // Setup specific mock for this test - searching for 'hello' in 'sub' with '*.js' filter - mockSpawn.mockImplementationOnce(() => { - const mockProcess = { - stdout: { - on: vi.fn(), - removeListener: vi.fn(), - }, - stderr: { - on: vi.fn(), - removeListener: vi.fn(), - }, - on: vi.fn(), - removeListener: vi.fn(), - kill: vi.fn(), - }; - - setTimeout(() => { - const onData = mockProcess.stdout.on.mock.calls.find( - (call) => call[0] === 'data', - )?.[1]; - const onClose = mockProcess.on.mock.calls.find( - (call) => call[0] === 'close', - )?.[1]; - - if (onData) { - // Only return match from the .js file in sub directory - onData( - Buffer.from( - JSON.stringify({ - type: 'match', - data: { - path: { text: 'another.js' }, - line_number: 1, - lines: { text: 'const greeting = "hello";\n' }, - }, - }) + '\n', - ), - ); - } - if (onClose) { - onClose(0); - } - }, 0); - - return mockProcess as unknown as ChildProcess; - }); + mockSpawn.mockImplementationOnce( + createMockSpawn({ + outputData: + JSON.stringify({ + type: 'match', + data: { + path: { text: 'another.js' }, + line_number: 1, + lines: { text: 'const greeting = "hello";\n' }, + }, + }) + '\n', + exitCode: 0, + }), + ); const params: RipGrepToolParams = { pattern: 'hello', @@ -559,59 +560,114 @@ describe('RipGrepTool', () => { const params: RipGrepToolParams = { pattern: '[[' }; const invocation = grepTool.build(params); const result = await invocation.execute(abortSignal); - expect(result.llmContent).toContain('ripgrep exited with code 2'); + expect(result.llmContent).toContain('Process exited with code 2'); expect(result.returnDisplay).toContain( - 'Error: ripgrep exited with code 2', + 'Error: Process exited with code 2', ); }); + it('should ignore invalid regex error from ripgrep when it is not a user error', async () => { + mockSpawn.mockImplementation( + createMockSpawn({ + outputData: '', + exitCode: 2, + signal: undefined, + }), + ); + + const invocation = grepTool.build({ + pattern: 'foo', + dir_path: tempRootDir, + }); + + const result = await invocation.execute(abortSignal); + expect(result.llmContent).toContain('Process exited with code 2'); + expect(result.returnDisplay).toContain( + 'Error: Process exited with code 2', + ); + }); + + it('should handle massive output by terminating early without crashing (Regression)', async () => { + const massiveOutputLines = 30000; + + // Custom mock for massive streaming + mockSpawn.mockImplementation(() => { + const stdout = new PassThrough(); + const stderr = new PassThrough(); + const mockProcess = new EventEmitter() as ChildProcess; + mockProcess.stdout = stdout; + mockProcess.stderr = stderr; + mockProcess.kill = vi.fn(); + // @ts-expect-error - mocking private/internal property + mockProcess.killed = false; + // @ts-expect-error - mocking private/internal property + mockProcess.exitCode = null; + + // Push data over time + let linesPushed = 0; + const pushInterval = setInterval(() => { + if (linesPushed >= massiveOutputLines) { + clearInterval(pushInterval); + stdout.end(); + mockProcess.emit('close', 0); + return; + } + + // Push a batch + try { + for (let i = 0; i < 2000 && linesPushed < massiveOutputLines; i++) { + const match = JSON.stringify({ + type: 'match', + data: { + path: { text: `file_${linesPushed}.txt` }, + line_number: 1, + lines: { text: `match ${linesPushed}\n` }, + }, + }); + stdout.write(match + '\n'); + linesPushed++; + } + } catch (_e) { + clearInterval(pushInterval); + } + }, 1); + + mockProcess.kill = vi.fn().mockImplementation(() => { + clearInterval(pushInterval); + stdout.end(); + // Emit close async to allow listeners to attach + setTimeout(() => mockProcess.emit('close', 0, 'SIGTERM'), 0); + return true; + }); + + return mockProcess; + }); + + const invocation = grepTool.build({ + pattern: 'test', + dir_path: tempRootDir, + }); + const result = await invocation.execute(abortSignal); + + expect(result.returnDisplay).toContain('(limited)'); + }, 10000); + it('should handle regex special characters correctly', async () => { // Setup specific mock for this test - regex pattern 'foo.*bar' should match 'const foo = "bar";' - mockSpawn.mockImplementationOnce(() => { - const mockProcess = { - stdout: { - on: vi.fn(), - removeListener: vi.fn(), - }, - stderr: { - on: vi.fn(), - removeListener: vi.fn(), - }, - on: vi.fn(), - removeListener: vi.fn(), - kill: vi.fn(), - }; - - setTimeout(() => { - const onData = mockProcess.stdout.on.mock.calls.find( - (call) => call[0] === 'data', - )?.[1]; - const onClose = mockProcess.on.mock.calls.find( - (call) => call[0] === 'close', - )?.[1]; - - if (onData) { - // Return match for the regex pattern - onData( - Buffer.from( - JSON.stringify({ - type: 'match', - data: { - path: { text: 'fileB.js' }, - line_number: 1, - lines: { text: 'const foo = "bar";\n' }, - }, - }) + '\n', - ), - ); - } - if (onClose) { - onClose(0); - } - }, 0); - - return mockProcess as unknown as ChildProcess; - }); + mockSpawn.mockImplementationOnce( + createMockSpawn({ + outputData: + JSON.stringify({ + type: 'match', + data: { + path: { text: 'fileB.js' }, + line_number: 1, + lines: { text: 'const foo = "bar";\n' }, + }, + }) + '\n', + exitCode: 0, + }), + ); const params: RipGrepToolParams = { pattern: 'foo.*bar' }; // Matches 'const foo = "bar";' const invocation = grepTool.build(params); @@ -625,61 +681,30 @@ describe('RipGrepTool', () => { it('should be case-insensitive by default (JS fallback)', async () => { // Setup specific mock for this test - case insensitive search for 'HELLO' - mockSpawn.mockImplementationOnce(() => { - const mockProcess = { - stdout: { - on: vi.fn(), - removeListener: vi.fn(), - }, - stderr: { - on: vi.fn(), - removeListener: vi.fn(), - }, - on: vi.fn(), - removeListener: vi.fn(), - kill: vi.fn(), - }; - - setTimeout(() => { - const onData = mockProcess.stdout.on.mock.calls.find( - (call) => call[0] === 'data', - )?.[1]; - const onClose = mockProcess.on.mock.calls.find( - (call) => call[0] === 'close', - )?.[1]; - - if (onData) { - // Return case-insensitive matches for 'HELLO' - onData( - Buffer.from( - JSON.stringify({ - type: 'match', - data: { - path: { text: 'fileA.txt' }, - line_number: 1, - lines: { text: 'hello world\n' }, - }, - }) + - '\n' + - JSON.stringify({ - type: 'match', - data: { - path: { text: 'fileB.js' }, - line_number: 2, - lines: { text: 'function baz() { return "hello"; }\n' }, - }, - }) + - '\n', - ), - ); - } - if (onClose) { - onClose(0); - } - }, 0); - - return mockProcess as unknown as ChildProcess; - }); + mockSpawn.mockImplementationOnce( + createMockSpawn({ + outputData: + JSON.stringify({ + type: 'match', + data: { + path: { text: 'fileA.txt' }, + line_number: 1, + lines: { text: 'hello world\n' }, + }, + }) + + '\n' + + JSON.stringify({ + type: 'match', + data: { + path: { text: 'fileB.js' }, + line_number: 2, + lines: { text: 'function baz() { return "hello"; }\n' }, + }, + }) + + '\n', + exitCode: 0, + }), + ); const params: RipGrepToolParams = { pattern: 'HELLO' }; const invocation = grepTool.build(params); @@ -742,97 +767,39 @@ describe('RipGrepTool', () => { // Setup specific mock for this test - multi-directory search for 'world' // Mock will be called twice - once for each directory - let callCount = 0; - mockSpawn.mockImplementation(() => { - callCount++; - const mockProcess = { - stdout: { - on: vi.fn(), - removeListener: vi.fn(), - }, - stderr: { - on: vi.fn(), - removeListener: vi.fn(), - }, - on: vi.fn(), - removeListener: vi.fn(), - kill: vi.fn(), - }; - setTimeout(() => { - const stdoutDataHandler = mockProcess.stdout.on.mock.calls.find( - (call) => call[0] === 'data', - )?.[1]; - - const closeHandler = mockProcess.on.mock.calls.find( - (call) => call[0] === 'close', - )?.[1]; - - let outputData = ''; - if (callCount === 1) { - // First directory (tempRootDir) - outputData = - JSON.stringify({ - type: 'match', - data: { - path: { text: 'fileA.txt' }, - line_number: 1, - lines: { text: 'hello world\n' }, - }, - }) + - '\n' + - JSON.stringify({ - type: 'match', - data: { - path: { text: 'fileA.txt' }, - line_number: 2, - lines: { text: 'second line with world\n' }, - }, - }) + - '\n' + - JSON.stringify({ - type: 'match', - data: { - path: { text: 'sub/fileC.txt' }, - line_number: 1, - lines: { text: 'another world in sub dir\n' }, - }, - }) + - '\n'; - } else if (callCount === 2) { - // Second directory (secondDir) - outputData = - JSON.stringify({ - type: 'match', - data: { - path: { text: 'other.txt' }, - line_number: 2, - lines: { text: 'world in second\n' }, - }, - }) + - '\n' + - JSON.stringify({ - type: 'match', - data: { - path: { text: 'another.js' }, - line_number: 1, - lines: { text: 'function world() { return "test"; }\n' }, - }, - }) + - '\n'; - } - - if (stdoutDataHandler && outputData) { - stdoutDataHandler(Buffer.from(outputData)); - } - - if (closeHandler) { - closeHandler(0); - } - }, 0); - - return mockProcess as unknown as ChildProcess; - }); + mockSpawn.mockImplementationOnce( + createMockSpawn({ + outputData: + JSON.stringify({ + type: 'match', + data: { + path: { text: 'fileA.txt' }, + line_number: 1, + lines: { text: 'hello world\n' }, + }, + }) + + '\n' + + JSON.stringify({ + type: 'match', + data: { + path: { text: 'fileA.txt' }, + line_number: 2, + lines: { text: 'second line with world\n' }, + }, + }) + + '\n' + + JSON.stringify({ + type: 'match', + data: { + path: { text: 'sub/fileC.txt' }, + line_number: 1, + lines: { text: 'another world in sub dir\n' }, + }, + }) + + '\n', + }), + ); const multiDirGrepTool = new RipGrepTool( multiDirConfig, @@ -886,50 +853,19 @@ describe('RipGrepTool', () => { } as unknown as Config; // Setup specific mock for this test - searching in 'sub' should only return matches from that directory - mockSpawn.mockImplementationOnce(() => { - const mockProcess = { - stdout: { - on: vi.fn(), - removeListener: vi.fn(), - }, - stderr: { - on: vi.fn(), - removeListener: vi.fn(), - }, - on: vi.fn(), - removeListener: vi.fn(), - kill: vi.fn(), - }; - - setTimeout(() => { - const onData = mockProcess.stdout.on.mock.calls.find( - (call) => call[0] === 'data', - )?.[1]; - const onClose = mockProcess.on.mock.calls.find( - (call) => call[0] === 'close', - )?.[1]; - - if (onData) { - onData( - Buffer.from( - JSON.stringify({ - type: 'match', - data: { - path: { text: 'fileC.txt' }, - line_number: 1, - lines: { text: 'another world in sub dir\n' }, - }, - }) + '\n', - ), - ); - } - if (onClose) { - onClose(0); - } - }, 0); - - return mockProcess as unknown as ChildProcess; - }); + mockSpawn.mockImplementationOnce( + createMockSpawn({ + outputData: + JSON.stringify({ + type: 'match', + data: { + path: { text: 'fileC.txt' }, + line_number: 1, + lines: { text: 'another world in sub dir\n' }, + }, + }) + '\n', + }), + ); const multiDirGrepTool = new RipGrepTool( multiDirConfig, @@ -970,35 +906,12 @@ describe('RipGrepTool', () => { it('should abort streaming search when signal is triggered', async () => { // Setup specific mock for this test - simulate process being killed due to abort - mockSpawn.mockImplementationOnce(() => { - const mockProcess = { - stdout: { - on: vi.fn(), - removeListener: vi.fn(), - }, - stderr: { - on: vi.fn(), - removeListener: vi.fn(), - }, - on: vi.fn(), - removeListener: vi.fn(), - kill: vi.fn(), - }; - - // Simulate process being aborted - use setTimeout to ensure handlers are registered first - setTimeout(() => { - const closeHandler = mockProcess.on.mock.calls.find( - (call) => call[0] === 'close', - )?.[1]; - - if (closeHandler) { - // Simulate process killed by signal (code is null, signal is SIGTERM) - closeHandler(null, 'SIGTERM'); - } - }, 0); - - return mockProcess as unknown as ChildProcess; - }); + mockSpawn.mockImplementationOnce( + createMockSpawn({ + exitCode: null, + signal: 'SIGTERM', + }), + ); const controller = new AbortController(); const params: RipGrepToolParams = { pattern: 'test' }; @@ -1008,12 +921,7 @@ describe('RipGrepTool', () => { controller.abort(); const result = await invocation.execute(controller.signal); - expect(result.llmContent).toContain( - 'Error during grep search operation: ripgrep was terminated by signal:', - ); - expect(result.returnDisplay).toContain( - 'Error: ripgrep was terminated by signal:', - ); + expect(result.returnDisplay).toContain('No matches found'); }); }); @@ -1060,50 +968,19 @@ describe('RipGrepTool', () => { ); // Setup specific mock for this test - searching for 'world' should find the file with special characters - mockSpawn.mockImplementationOnce(() => { - const mockProcess = { - stdout: { - on: vi.fn(), - removeListener: vi.fn(), - }, - stderr: { - on: vi.fn(), - removeListener: vi.fn(), - }, - on: vi.fn(), - removeListener: vi.fn(), - kill: vi.fn(), - }; - - setTimeout(() => { - const onData = mockProcess.stdout.on.mock.calls.find( - (call) => call[0] === 'data', - )?.[1]; - const onClose = mockProcess.on.mock.calls.find( - (call) => call[0] === 'close', - )?.[1]; - - if (onData) { - onData( - Buffer.from( - JSON.stringify({ - type: 'match', - data: { - path: { text: specialFileName }, - line_number: 1, - lines: { text: 'hello world with special chars\n' }, - }, - }) + '\n', - ), - ); - } - if (onClose) { - onClose(0); - } - }, 0); - - return mockProcess as unknown as ChildProcess; - }); + mockSpawn.mockImplementationOnce( + createMockSpawn({ + outputData: + JSON.stringify({ + type: 'match', + data: { + path: { text: specialFileName }, + line_number: 1, + lines: { text: 'hello world with special chars\n' }, + }, + }) + '\n', + }), + ); const params: RipGrepToolParams = { pattern: 'world' }; const invocation = grepTool.build(params); @@ -1122,50 +999,19 @@ describe('RipGrepTool', () => { ); // Setup specific mock for this test - searching for 'deep' should find the deeply nested file - mockSpawn.mockImplementationOnce(() => { - const mockProcess = { - stdout: { - on: vi.fn(), - removeListener: vi.fn(), - }, - stderr: { - on: vi.fn(), - removeListener: vi.fn(), - }, - on: vi.fn(), - removeListener: vi.fn(), - kill: vi.fn(), - }; - - setTimeout(() => { - const onData = mockProcess.stdout.on.mock.calls.find( - (call) => call[0] === 'data', - )?.[1]; - const onClose = mockProcess.on.mock.calls.find( - (call) => call[0] === 'close', - )?.[1]; - - if (onData) { - onData( - Buffer.from( - JSON.stringify({ - type: 'match', - data: { - path: { text: 'a/b/c/d/e/deep.txt' }, - line_number: 1, - lines: { text: 'content in deep directory\n' }, - }, - }) + '\n', - ), - ); - } - if (onClose) { - onClose(0); - } - }, 0); - - return mockProcess as unknown as ChildProcess; - }); + mockSpawn.mockImplementationOnce( + createMockSpawn({ + outputData: + JSON.stringify({ + type: 'match', + data: { + path: { text: 'a/b/c/d/e/deep.txt' }, + line_number: 1, + lines: { text: 'content in deep directory\n' }, + }, + }) + '\n', + }), + ); const params: RipGrepToolParams = { pattern: 'deep' }; const invocation = grepTool.build(params); @@ -1184,50 +1030,19 @@ describe('RipGrepTool', () => { ); // Setup specific mock for this test - regex pattern should match function declarations - mockSpawn.mockImplementationOnce(() => { - const mockProcess = { - stdout: { - on: vi.fn(), - removeListener: vi.fn(), - }, - stderr: { - on: vi.fn(), - removeListener: vi.fn(), - }, - on: vi.fn(), - removeListener: vi.fn(), - kill: vi.fn(), - }; - - setTimeout(() => { - const onData = mockProcess.stdout.on.mock.calls.find( - (call) => call[0] === 'data', - )?.[1]; - const onClose = mockProcess.on.mock.calls.find( - (call) => call[0] === 'close', - )?.[1]; - - if (onData) { - onData( - Buffer.from( - JSON.stringify({ - type: 'match', - data: { - path: { text: 'code.js' }, - line_number: 1, - lines: { text: 'function getName() { return "test"; }\n' }, - }, - }) + '\n', - ), - ); - } - if (onClose) { - onClose(0); - } - }, 0); - - return mockProcess as unknown as ChildProcess; - }); + mockSpawn.mockImplementationOnce( + createMockSpawn({ + outputData: + JSON.stringify({ + type: 'match', + data: { + path: { text: 'code.js' }, + line_number: 1, + lines: { text: 'function getName() { return "test"; }\n' }, + }, + }) + '\n', + }), + ); const params: RipGrepToolParams = { pattern: 'function\\s+\\w+\\s*\\(' }; const invocation = grepTool.build(params); @@ -1244,69 +1059,38 @@ describe('RipGrepTool', () => { ); // Setup specific mock for this test - case insensitive search should match all variants - mockSpawn.mockImplementationOnce(() => { - const mockProcess = { - stdout: { - on: vi.fn(), - removeListener: vi.fn(), - }, - stderr: { - on: vi.fn(), - removeListener: vi.fn(), - }, - on: vi.fn(), - removeListener: vi.fn(), - kill: vi.fn(), - }; - - setTimeout(() => { - const onData = mockProcess.stdout.on.mock.calls.find( - (call) => call[0] === 'data', - )?.[1]; - const onClose = mockProcess.on.mock.calls.find( - (call) => call[0] === 'close', - )?.[1]; - - if (onData) { - onData( - Buffer.from( - JSON.stringify({ - type: 'match', - data: { - path: { text: 'case.txt' }, - line_number: 1, - lines: { text: 'Hello World\n' }, - }, - }) + - '\n' + - JSON.stringify({ - type: 'match', - data: { - path: { text: 'case.txt' }, - line_number: 2, - lines: { text: 'hello world\n' }, - }, - }) + - '\n' + - JSON.stringify({ - type: 'match', - data: { - path: { text: 'case.txt' }, - line_number: 3, - lines: { text: 'HELLO WORLD\n' }, - }, - }) + - '\n', - ), - ); - } - if (onClose) { - onClose(0); - } - }, 0); - - return mockProcess as unknown as ChildProcess; - }); + mockSpawn.mockImplementationOnce( + createMockSpawn({ + outputData: + JSON.stringify({ + type: 'match', + data: { + path: { text: 'case.txt' }, + line_number: 1, + lines: { text: 'Hello World\n' }, + }, + }) + + '\n' + + JSON.stringify({ + type: 'match', + data: { + path: { text: 'case.txt' }, + line_number: 2, + lines: { text: 'hello world\n' }, + }, + }) + + '\n' + + JSON.stringify({ + type: 'match', + data: { + path: { text: 'case.txt' }, + line_number: 3, + lines: { text: 'HELLO WORLD\n' }, + }, + }) + + '\n', + }), + ); const params: RipGrepToolParams = { pattern: 'hello' }; const invocation = grepTool.build(params); @@ -1324,50 +1108,19 @@ describe('RipGrepTool', () => { ); // Setup specific mock for this test - escaped regex pattern should match price format - mockSpawn.mockImplementationOnce(() => { - const mockProcess = { - stdout: { - on: vi.fn(), - removeListener: vi.fn(), - }, - stderr: { - on: vi.fn(), - removeListener: vi.fn(), - }, - on: vi.fn(), - removeListener: vi.fn(), - kill: vi.fn(), - }; - - setTimeout(() => { - const onData = mockProcess.stdout.on.mock.calls.find( - (call) => call[0] === 'data', - )?.[1]; - const onClose = mockProcess.on.mock.calls.find( - (call) => call[0] === 'close', - )?.[1]; - - if (onData) { - onData( - Buffer.from( - JSON.stringify({ - type: 'match', - data: { - path: { text: 'special.txt' }, - line_number: 1, - lines: { text: 'Price: $19.99\n' }, - }, - }) + '\n', - ), - ); - } - if (onClose) { - onClose(0); - } - }, 0); - - return mockProcess as unknown as ChildProcess; - }); + mockSpawn.mockImplementationOnce( + createMockSpawn({ + outputData: + JSON.stringify({ + type: 'match', + data: { + path: { text: 'special.txt' }, + line_number: 1, + lines: { text: 'Price: $19.99\n' }, + }, + }) + '\n', + }), + ); const params: RipGrepToolParams = { pattern: '\\$\\d+\\.\\d+' }; const invocation = grepTool.build(params); @@ -1392,60 +1145,29 @@ describe('RipGrepTool', () => { await fs.writeFile(path.join(tempRootDir, 'test.txt'), 'text content'); // Setup specific mock for this test - include pattern should filter to only ts/tsx files - mockSpawn.mockImplementationOnce(() => { - const mockProcess = { - stdout: { - on: vi.fn(), - removeListener: vi.fn(), - }, - stderr: { - on: vi.fn(), - removeListener: vi.fn(), - }, - on: vi.fn(), - removeListener: vi.fn(), - kill: vi.fn(), - }; - - setTimeout(() => { - const onData = mockProcess.stdout.on.mock.calls.find( - (call) => call[0] === 'data', - )?.[1]; - const onClose = mockProcess.on.mock.calls.find( - (call) => call[0] === 'close', - )?.[1]; - - if (onData) { - onData( - Buffer.from( - JSON.stringify({ - type: 'match', - data: { - path: { text: 'test.ts' }, - line_number: 1, - lines: { text: 'typescript content\n' }, - }, - }) + - '\n' + - JSON.stringify({ - type: 'match', - data: { - path: { text: 'test.tsx' }, - line_number: 1, - lines: { text: 'tsx content\n' }, - }, - }) + - '\n', - ), - ); - } - if (onClose) { - onClose(0); - } - }, 0); - - return mockProcess as unknown as ChildProcess; - }); + mockSpawn.mockImplementationOnce( + createMockSpawn({ + outputData: + JSON.stringify({ + type: 'match', + data: { + path: { text: 'test.ts' }, + line_number: 1, + lines: { text: 'typescript content\n' }, + }, + }) + + '\n' + + JSON.stringify({ + type: 'match', + data: { + path: { text: 'test.tsx' }, + line_number: 1, + lines: { text: 'tsx content\n' }, + }, + }) + + '\n', + }), + ); const params: RipGrepToolParams = { pattern: 'content', @@ -1469,50 +1191,19 @@ describe('RipGrepTool', () => { await fs.writeFile(path.join(tempRootDir, 'other.ts'), 'other code'); // Setup specific mock for this test - include pattern should filter to only src/** files - mockSpawn.mockImplementationOnce(() => { - const mockProcess = { - stdout: { - on: vi.fn(), - removeListener: vi.fn(), - }, - stderr: { - on: vi.fn(), - removeListener: vi.fn(), - }, - on: vi.fn(), - removeListener: vi.fn(), - kill: vi.fn(), - }; - - setTimeout(() => { - const onData = mockProcess.stdout.on.mock.calls.find( - (call) => call[0] === 'data', - )?.[1]; - const onClose = mockProcess.on.mock.calls.find( - (call) => call[0] === 'close', - )?.[1]; - - if (onData) { - onData( - Buffer.from( - JSON.stringify({ - type: 'match', - data: { - path: { text: 'src/main.ts' }, - line_number: 1, - lines: { text: 'source code\n' }, - }, - }) + '\n', - ), - ); - } - if (onClose) { - onClose(0); - } - }, 0); - - return mockProcess as unknown as ChildProcess; - }); + mockSpawn.mockImplementationOnce( + createMockSpawn({ + outputData: + JSON.stringify({ + type: 'match', + data: { + path: { text: 'src/main.ts' }, + line_number: 1, + lines: { text: 'source code\n' }, + }, + }) + '\n', + }), + ); const params: RipGrepToolParams = { pattern: 'code', diff --git a/packages/core/src/tools/ripGrep.ts b/packages/core/src/tools/ripGrep.ts index 12f6d720e2..c4642ca20e 100644 --- a/packages/core/src/tools/ripGrep.ts +++ b/packages/core/src/tools/ripGrep.ts @@ -7,7 +7,6 @@ import type { MessageBus } from '../confirmation-bus/message-bus.js'; import fs from 'node:fs'; import path from 'node:path'; -import { spawn } from 'node:child_process'; import { downloadRipGrep } from '@joshua.litt/get-ripgrep'; import type { ToolInvocation, ToolResult } from './tools.js'; import { BaseDeclarativeTool, BaseToolInvocation, Kind } from './tools.js'; @@ -24,8 +23,11 @@ import { COMMON_DIRECTORY_EXCLUDES, } from '../utils/ignorePatterns.js'; import { GeminiIgnoreParser } from '../utils/geminiIgnoreParser.js'; - -const DEFAULT_TOTAL_MAX_MATCHES = 20000; +import { execStreaming } from '../utils/shell-utils.js'; +import { + DEFAULT_TOTAL_MAX_MATCHES, + DEFAULT_SEARCH_TIMEOUT_MS, +} from './constants.js'; function getRgCandidateFilenames(): readonly string[] { return process.platform === 'win32' ? ['rg.exe', 'rg'] : ['rg']; @@ -213,21 +215,38 @@ class GrepToolInvocation extends BaseToolInvocation< debugLogger.log(`[GrepTool] Total result limit: ${totalMaxMatches}`); } - let allMatches = await this.performRipgrepSearch({ - pattern: this.params.pattern, - path: searchDirAbs!, - include: this.params.include, - case_sensitive: this.params.case_sensitive, - fixed_strings: this.params.fixed_strings, - context: this.params.context, - after: this.params.after, - before: this.params.before, - no_ignore: this.params.no_ignore, - signal, - }); + // Create a timeout controller to prevent indefinitely hanging searches + const timeoutController = new AbortController(); + const timeoutId = setTimeout(() => { + timeoutController.abort(); + }, DEFAULT_SEARCH_TIMEOUT_MS); - if (allMatches.length >= totalMaxMatches) { - allMatches = allMatches.slice(0, totalMaxMatches); + // Link the passed signal to our timeout controller + const onAbort = () => timeoutController.abort(); + if (signal.aborted) { + onAbort(); + } else { + signal.addEventListener('abort', onAbort, { once: true }); + } + + let allMatches: GrepMatch[]; + try { + allMatches = await this.performRipgrepSearch({ + pattern: this.params.pattern, + path: searchDirAbs!, + include: this.params.include, + case_sensitive: this.params.case_sensitive, + fixed_strings: this.params.fixed_strings, + context: this.params.context, + after: this.params.after, + before: this.params.before, + no_ignore: this.params.no_ignore, + maxMatches: totalMaxMatches, + signal: timeoutController.signal, + }); + } finally { + clearTimeout(timeoutId); + signal.removeEventListener('abort', onAbort); } const searchLocationDescription = `in path "${searchDirDisplay}"`; @@ -254,13 +273,7 @@ class GrepToolInvocation extends BaseToolInvocation< const matchCount = allMatches.length; const matchTerm = matchCount === 1 ? 'match' : 'matches'; - let llmContent = `Found ${matchCount} ${matchTerm} for pattern "${this.params.pattern}" ${searchLocationDescription}${this.params.include ? ` (filter: "${this.params.include}")` : ''}`; - - if (wasTruncated) { - llmContent += ` (results limited to ${totalMaxMatches} matches for performance)`; - } - - llmContent += `:\n---\n`; + let llmContent = `Found ${matchCount} ${matchTerm} for pattern "${this.params.pattern}" ${searchLocationDescription}${this.params.include ? ` (filter: "${this.params.include}")` : ''}${wasTruncated ? ` (results limited to ${totalMaxMatches} matches for performance)` : ''}:\n---\n`; for (const filePath in matchesByFile) { llmContent += `File: ${filePath}\n`; @@ -271,14 +284,11 @@ class GrepToolInvocation extends BaseToolInvocation< llmContent += '---\n'; } - let displayMessage = `Found ${matchCount} ${matchTerm}`; - if (wasTruncated) { - displayMessage += ` (limited)`; - } - return { llmContent: llmContent.trim(), - returnDisplay: displayMessage, + returnDisplay: `Found ${matchCount} ${matchTerm}${ + wasTruncated ? ' (limited)' : '' + }`, }; } catch (error) { debugLogger.warn(`Error during GrepLogic execution: ${error}`); @@ -290,41 +300,6 @@ class GrepToolInvocation extends BaseToolInvocation< } } - private parseRipgrepJsonOutput( - output: string, - basePath: string, - ): GrepMatch[] { - const results: GrepMatch[] = []; - if (!output) return results; - - const lines = output.trim().split('\n'); - - for (const line of lines) { - if (!line.trim()) continue; - - try { - const json = JSON.parse(line); - if (json.type === 'match') { - const match = json.data; - // Defensive check: ensure text properties exist (skips binary/invalid encoding) - if (match.path?.text && match.lines?.text) { - const absoluteFilePath = path.resolve(basePath, match.path.text); - const relativeFilePath = path.relative(basePath, absoluteFilePath); - - results.push({ - filePath: relativeFilePath || path.basename(absoluteFilePath), - lineNumber: match.line_number, - line: match.lines.text.trimEnd(), - }); - } - } - } catch (error) { - debugLogger.warn(`Failed to parse ripgrep JSON line: ${line}`, error); - } - } - return results; - } - private async performRipgrepSearch(options: { pattern: string; path: string; @@ -335,6 +310,7 @@ class GrepToolInvocation extends BaseToolInvocation< after?: number; before?: number; no_ignore?: boolean; + maxMatches: number; signal: AbortSignal; }): Promise { const { @@ -347,6 +323,7 @@ class GrepToolInvocation extends BaseToolInvocation< after, before, no_ignore, + maxMatches, } = options; const rgArgs = ['--json']; @@ -402,64 +379,72 @@ class GrepToolInvocation extends BaseToolInvocation< rgArgs.push('--threads', '4'); rgArgs.push(absolutePath); + const results: GrepMatch[] = []; try { const rgPath = await ensureRgPath(); - const output = await new Promise((resolve, reject) => { - const child = spawn(rgPath, rgArgs, { - windowsHide: true, - }); - - const stdoutChunks: Buffer[] = []; - const stderrChunks: Buffer[] = []; - - const cleanup = () => { - if (options.signal.aborted) { - child.kill(); - } - }; - - options.signal.addEventListener('abort', cleanup, { once: true }); - - child.stdout.on('data', (chunk) => stdoutChunks.push(chunk)); - child.stderr.on('data', (chunk) => stderrChunks.push(chunk)); - - child.on('error', (err) => { - options.signal.removeEventListener('abort', cleanup); - reject( - new Error( - `Failed to start ripgrep: ${err.message}. Please ensure @lvce-editor/ripgrep is properly installed.`, - ), - ); - }); - - child.on('close', (code, signal) => { - options.signal.removeEventListener('abort', cleanup); - const stdoutData = Buffer.concat(stdoutChunks).toString('utf8'); - const stderrData = Buffer.concat(stderrChunks).toString('utf8'); - - if (code === 0) { - resolve(stdoutData); - } else if (code === 1) { - resolve(''); // No matches found - } else { - if (signal) { - reject(new Error(`ripgrep was terminated by signal: ${signal}`)); - } else { - reject( - new Error(`ripgrep exited with code ${code}: ${stderrData}`), - ); - } - } - }); + const generator = execStreaming(rgPath, rgArgs, { + signal: options.signal, + allowedExitCodes: [0, 1], }); - return this.parseRipgrepJsonOutput(output, absolutePath); + for await (const line of generator) { + const match = this.parseRipgrepJsonLine(line, absolutePath); + if (match) { + results.push(match); + if (results.length >= maxMatches) { + break; + } + } + } + + return results; } catch (error: unknown) { debugLogger.debug(`GrepLogic: ripgrep failed: ${getErrorMessage(error)}`); throw error; } } + private parseRipgrepJsonLine( + line: string, + basePath: string, + ): GrepMatch | null { + try { + const json = JSON.parse(line); + if (json.type === 'match') { + const match = json.data; + // Defensive check: ensure text properties exist (skips binary/invalid encoding) + if (match.path?.text && match.lines?.text) { + const absoluteFilePath = path.resolve(basePath, match.path.text); + const relativeCheck = path.relative(basePath, absoluteFilePath); + if ( + relativeCheck === '..' || + relativeCheck.startsWith(`..${path.sep}`) || + path.isAbsolute(relativeCheck) + ) { + return null; + } + + const relativeFilePath = path.relative(basePath, absoluteFilePath); + + return { + filePath: relativeFilePath || path.basename(absoluteFilePath), + lineNumber: match.line_number, + line: match.lines.text.trimEnd(), + }; + } + } + } catch (error) { + // Only log if it's not a simple empty line or widely invalid + if (line.trim().length > 0) { + debugLogger.warn( + `Failed to parse ripgrep JSON line: ${line.substring(0, 100)}...`, + error, + ); + } + } + return null; + } + /** * Gets a description of the grep operation * @param params Parameters for the grep operation diff --git a/packages/core/src/utils/shell-utils.integration.test.ts b/packages/core/src/utils/shell-utils.integration.test.ts new file mode 100644 index 0000000000..717e01594b --- /dev/null +++ b/packages/core/src/utils/shell-utils.integration.test.ts @@ -0,0 +1,67 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ +import { describe, it, expect } from 'vitest'; +import { execStreaming } from './shell-utils.js'; + +// Integration tests using real child processes +describe('execStreaming (Integration)', () => { + it('should yield lines from stdout', async () => { + // Use node to echo for cross-platform support + const generator = execStreaming(process.execPath, [ + '-e', + 'console.log("line 1\\nline 2")', + ]); + const lines = []; + for await (const line of generator) { + lines.push(line); + } + expect(lines).toEqual(['line 1', 'line 2']); + }); + + it('should throw error on non-zero exit code', async () => { + // exit 2 via node + const generator = execStreaming(process.execPath, [ + '-e', + 'process.exit(2)', + ]); + + await expect(async () => { + for await (const _ of generator) { + // consume + } + }).rejects.toThrow(); + }); + + it('should abort cleanly when signal is aborted', async () => { + const controller = new AbortController(); + // sleep for 2s via node + const generator = execStreaming( + process.execPath, + ['-e', 'setTimeout(() => {}, 2000)'], + { signal: controller.signal }, + ); + + // Start reading + const readPromise = (async () => { + const lines = []; + try { + for await (const line of generator) { + lines.push(line); + } + } catch (_e) { + // ignore + } + return lines; + })(); + + setTimeout(() => { + controller.abort(); + }, 100); + + const lines = await readPromise; + expect(lines).toEqual([]); + }); +}); diff --git a/packages/core/src/utils/shell-utils.ts b/packages/core/src/utils/shell-utils.ts index 6610a5d45c..3a002f2895 100644 --- a/packages/core/src/utils/shell-utils.ts +++ b/packages/core/src/utils/shell-utils.ts @@ -13,6 +13,7 @@ import { spawnSync, type SpawnOptionsWithoutStdio, } from 'node:child_process'; +import * as readline from 'node:readline'; import type { Node, Tree } from 'web-tree-sitter'; import { Language, Parser, Query } from 'web-tree-sitter'; import { loadWasmBinary } from './fileUtils.js'; @@ -765,3 +766,123 @@ export const spawnAsync = ( reject(err); }); }); + +/** + * Executes a command and yields lines of output as they appear. + * Use for large outputs where buffering is not feasible. + * + * @param command The executable to run + * @param args Arguments for the executable + * @param options Spawn options (cwd, env, etc.) + */ +export async function* execStreaming( + command: string, + args: string[], + options?: SpawnOptionsWithoutStdio & { + signal?: AbortSignal; + allowedExitCodes?: number[]; + }, +): AsyncGenerator { + const child = spawn(command, args, { + ...options, + // ensure we don't open a window on windows if possible/relevant + windowsHide: true, + }); + + const rl = readline.createInterface({ + input: child.stdout, + terminal: false, + }); + + const errorChunks: Buffer[] = []; + let stderrTotalBytes = 0; + const MAX_STDERR_BYTES = 20 * 1024; // 20KB limit + + child.stderr.on('data', (chunk) => { + if (stderrTotalBytes < MAX_STDERR_BYTES) { + errorChunks.push(chunk); + stderrTotalBytes += chunk.length; + } + }); + + let error: Error | null = null; + child.on('error', (err) => { + error = err; + }); + + const onAbort = () => { + // If manually aborted by signal, we kill immediately. + if (!child.killed) child.kill(); + }; + + if (options?.signal?.aborted) { + onAbort(); + } else { + options?.signal?.addEventListener('abort', onAbort); + } + + let finished = false; + try { + for await (const line of rl) { + if (options?.signal?.aborted) break; + yield line; + } + finished = true; + } finally { + rl.close(); + options?.signal?.removeEventListener('abort', onAbort); + + // Ensure process is killed when the generator is closed (consumer breaks loop) + let killedByGenerator = false; + if (!finished && child.exitCode === null && !child.killed) { + try { + child.kill(); + } catch (_e) { + // ignore error if process is already dead + } + killedByGenerator = true; + } + + // Ensure we wait for the process to exit to check codes + await new Promise((resolve, reject) => { + // If an error occurred before we got here (e.g. spawn failure), reject immediately. + if (error) { + reject(error); + return; + } + + function checkExit(code: number | null) { + // If we aborted or killed it manually, we treat it as success (stop waiting) + if (options?.signal?.aborted || killedByGenerator) { + resolve(); + return; + } + + const allowed = options?.allowedExitCodes ?? [0]; + if (code !== null && allowed.includes(code)) { + resolve(); + } else { + // If we have an accumulated error or explicit error event + if (error) reject(error); + else { + const stderr = Buffer.concat(errorChunks).toString('utf8'); + const truncatedMsg = + stderrTotalBytes >= MAX_STDERR_BYTES ? '...[truncated]' : ''; + reject( + new Error( + `Process exited with code ${code}: ${stderr}${truncatedMsg}`, + ), + ); + } + } + } + + if (child.exitCode !== null) { + checkExit(child.exitCode); + } else { + child.on('close', (code) => checkExit(code)); + child.on('error', (err) => reject(err)); + } + }); + } +}