fix(core): stream grep/ripgrep output to prevent OOM (#17146)

This commit is contained in:
Adam Weidman
2026-01-26 16:52:19 -05:00
committed by GitHub
parent c2d0783965
commit 018dc0d5cf
7 changed files with 888 additions and 994 deletions
+7
View File
@@ -0,0 +1,7 @@
/**
* @license
* Copyright 2026 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
export const DEFAULT_TOTAL_MAX_MATCHES = 20000;
export const DEFAULT_SEARCH_TIMEOUT_MS = 30000;
+42
View File
@@ -7,6 +7,7 @@
import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest';
import type { GrepToolParams } from './grep.js';
import { GrepTool } from './grep.js';
import type { ToolResult } from './tools.js';
import path from 'node:path';
import fs from 'node:fs/promises';
import os from 'node:os';
@@ -15,8 +16,12 @@ import { createMockWorkspaceContext } from '../test-utils/mockWorkspaceContext.j
import { ToolErrorType } from './tool-error.js';
import * as glob from 'glob';
import { createMockMessageBus } from '../test-utils/mock-message-bus.js';
import { execStreaming } from '../utils/shell-utils.js';
vi.mock('glob', { spy: true });
vi.mock('../utils/shell-utils.js', () => ({
execStreaming: vi.fn(),
}));
// Mock the child_process module to control grep/git grep behavior
vi.mock('child_process', () => ({
@@ -129,6 +134,14 @@ describe('GrepTool', () => {
});
});
function createLineGenerator(lines: string[]): AsyncGenerator<string> {
return (async function* () {
for (const line of lines) {
yield line;
}
})();
}
describe('execute', () => {
it('should find matches for a simple pattern in all files', async () => {
const params: GrepToolParams = { pattern: 'world' };
@@ -147,6 +160,35 @@ describe('GrepTool', () => {
expect(result.returnDisplay).toBe('Found 3 matches');
}, 30000);
it('should include files that start with ".." in JS fallback', async () => {
await fs.writeFile(path.join(tempRootDir, '..env'), 'world in ..env');
const params: GrepToolParams = { pattern: 'world' };
const invocation = grepTool.build(params);
const result = await invocation.execute(abortSignal);
expect(result.llmContent).toContain('File: ..env');
expect(result.llmContent).toContain('L1: world in ..env');
});
it('should ignore system grep output that escapes base path', async () => {
vi.mocked(execStreaming).mockImplementationOnce(() =>
createLineGenerator(['..env:1:hello', '../secret.txt:2:leak']),
);
const params: GrepToolParams = { pattern: 'hello' };
const invocation = grepTool.build(params) as unknown as {
isCommandAvailable: (command: string) => Promise<boolean>;
execute: (signal: AbortSignal) => Promise<ToolResult>;
};
invocation.isCommandAvailable = vi.fn(
async (command: string) => command === 'grep',
);
const result = await invocation.execute(abortSignal);
expect(result.llmContent).toContain('File: ..env');
expect(result.llmContent).toContain('L1: hello');
expect(result.llmContent).not.toContain('secret.txt');
});
it('should find matches in a specific path', async () => {
const params: GrepToolParams = { pattern: 'world', dir_path: 'sub' };
const invocation = grepTool.build(params);
+158 -177
View File
@@ -8,10 +8,14 @@ import type { MessageBus } from '../confirmation-bus/message-bus.js';
import fs from 'node:fs';
import fsPromises from 'node:fs/promises';
import path from 'node:path';
import { EOL } from 'node:os';
import { spawn } from 'node:child_process';
import { globStream } from 'glob';
import type { ToolInvocation, ToolResult } from './tools.js';
import { execStreaming } from '../utils/shell-utils.js';
import {
DEFAULT_TOTAL_MAX_MATCHES,
DEFAULT_SEARCH_TIMEOUT_MS,
} from './constants.js';
import { BaseDeclarativeTool, BaseToolInvocation, Kind } from './tools.js';
import { makeRelative, shortenPath } from '../utils/paths.js';
import { getErrorMessage, isNodeError } from '../utils/errors.js';
@@ -111,6 +115,47 @@ class GrepToolInvocation extends BaseToolInvocation<
return targetPath;
}
/**
* Parses a single line of grep-like output (git grep, system grep).
* Expects format: filePath:lineNumber:lineContent
* @param {string} line The line to parse.
* @param {string} basePath The absolute directory for path resolution.
* @returns {GrepMatch | null} Parsed match or null if malformed.
*/
private parseGrepLine(line: string, basePath: string): GrepMatch | null {
if (!line.trim()) return null;
// Use regex to locate the first occurrence of :<digits>:
// This allows filenames to contain colons, as long as they don't look like :<digits>:
// Note: This regex assumes filenames do not contain colons, or at least not followed by digits.
const match = line.match(/^(.+?):(\d+):(.*)$/);
if (!match) return null;
const [, filePathRaw, lineNumberStr, lineContent] = match;
const lineNumber = parseInt(lineNumberStr, 10);
if (!isNaN(lineNumber)) {
const absoluteFilePath = path.resolve(basePath, filePathRaw);
const relativeCheck = path.relative(basePath, absoluteFilePath);
if (
relativeCheck === '..' ||
relativeCheck.startsWith(`..${path.sep}`) ||
path.isAbsolute(relativeCheck)
) {
return null;
}
const relativeFilePath = path.relative(basePath, absoluteFilePath);
return {
filePath: relativeFilePath || path.basename(absoluteFilePath),
lineNumber,
line: lineContent,
};
}
return null;
}
async execute(signal: AbortSignal): Promise<ToolResult> {
try {
const workspaceContext = this.config.getWorkspaceContext();
@@ -129,12 +174,33 @@ class GrepToolInvocation extends BaseToolInvocation<
// Collect matches from all search directories
let allMatches: GrepMatch[] = [];
const totalMaxMatches = DEFAULT_TOTAL_MAX_MATCHES;
// Create a timeout controller to prevent indefinitely hanging searches
const timeoutController = new AbortController();
const timeoutId = setTimeout(() => {
timeoutController.abort();
}, DEFAULT_SEARCH_TIMEOUT_MS);
// Link the passed signal to our timeout controller
const onAbort = () => timeoutController.abort();
if (signal.aborted) {
onAbort();
} else {
signal.addEventListener('abort', onAbort, { once: true });
}
try {
for (const searchDir of searchDirectories) {
const remainingLimit = totalMaxMatches - allMatches.length;
if (remainingLimit <= 0) break;
const matches = await this.performGrepSearch({
pattern: this.params.pattern,
path: searchDir,
include: this.params.include,
signal,
maxMatches: remainingLimit,
signal: timeoutController.signal,
});
// Add directory prefix if searching multiple directories
@@ -147,6 +213,10 @@ class GrepToolInvocation extends BaseToolInvocation<
allMatches = allMatches.concat(matches);
}
} finally {
clearTimeout(timeoutId);
signal.removeEventListener('abort', onAbort);
}
let searchLocationDescription: string;
if (searchDirAbs === null) {
@@ -164,6 +234,8 @@ class GrepToolInvocation extends BaseToolInvocation<
return { llmContent: noMatchMsg, returnDisplay: `No matches found` };
}
const wasTruncated = allMatches.length >= totalMaxMatches;
// Group matches by file
const matchesByFile = allMatches.reduce(
(acc, match) => {
@@ -181,9 +253,7 @@ class GrepToolInvocation extends BaseToolInvocation<
const matchCount = allMatches.length;
const matchTerm = matchCount === 1 ? 'match' : 'matches';
let llmContent = `Found ${matchCount} ${matchTerm} for pattern "${this.params.pattern}" ${searchLocationDescription}${this.params.include ? ` (filter: "${this.params.include}")` : ''}:
---
`;
let llmContent = `Found ${matchCount} ${matchTerm} for pattern "${this.params.pattern}" ${searchLocationDescription}${this.params.include ? ` (filter: "${this.params.include}")` : ''}${wasTruncated ? ` (results limited to ${totalMaxMatches} matches for performance)` : ''}:\n---\n`;
for (const filePath in matchesByFile) {
llmContent += `File: ${filePath}\n`;
@@ -196,7 +266,7 @@ class GrepToolInvocation extends BaseToolInvocation<
return {
llmContent: llmContent.trim(),
returnDisplay: `Found ${matchCount} ${matchTerm}`,
returnDisplay: `Found ${matchCount} ${matchTerm}${wasTruncated ? ' (limited)' : ''}`,
};
} catch (error) {
debugLogger.warn(`Error during GrepLogic execution: ${error}`);
@@ -241,92 +311,6 @@ class GrepToolInvocation extends BaseToolInvocation<
});
}
/**
* Parses the standard output of grep-like commands (git grep, system grep).
* Expects format: filePath:lineNumber:lineContent
* Handles colons within file paths and line content correctly.
* @param {string} output The raw stdout string.
* @param {string} basePath The absolute directory the search was run from, for relative paths.
* @returns {GrepMatch[]} Array of match objects.
*/
private parseGrepOutput(output: string, basePath: string): GrepMatch[] {
const results: GrepMatch[] = [];
if (!output) return results;
const lines = output.split(EOL); // Use OS-specific end-of-line
for (const line of lines) {
if (!line.trim()) continue;
// Find the index of the first colon.
const firstColonIndex = line.indexOf(':');
if (firstColonIndex === -1) continue; // Malformed
// Find the index of the second colon, searching *after* the first one.
const secondColonIndex = line.indexOf(':', firstColonIndex + 1);
if (secondColonIndex === -1) continue; // Malformed
// Extract parts based on the found colon indices
const filePathRaw = line.substring(0, firstColonIndex);
const lineNumberStr = line.substring(
firstColonIndex + 1,
secondColonIndex,
);
const lineContent = line.substring(secondColonIndex + 1);
const lineNumber = parseInt(lineNumberStr, 10);
if (!isNaN(lineNumber)) {
const absoluteFilePath = path.resolve(basePath, filePathRaw);
const relativeFilePath = path.relative(basePath, absoluteFilePath);
results.push({
filePath: relativeFilePath || path.basename(absoluteFilePath),
lineNumber,
line: lineContent,
});
}
}
return results;
}
/**
* Gets a description of the grep operation
* @returns A string describing the grep
*/
getDescription(): string {
let description = `'${this.params.pattern}'`;
if (this.params.include) {
description += ` in ${this.params.include}`;
}
if (this.params.dir_path) {
const resolvedPath = path.resolve(
this.config.getTargetDir(),
this.params.dir_path,
);
if (
resolvedPath === this.config.getTargetDir() ||
this.params.dir_path === '.'
) {
description += ` within ./`;
} else {
const relativePath = makeRelative(
resolvedPath,
this.config.getTargetDir(),
);
description += ` within ${shortenPath(relativePath)}`;
}
} else {
// When no path is specified, indicate searching all workspace directories
const workspaceContext = this.config.getWorkspaceContext();
const directories = workspaceContext.getDirectories();
if (directories.length > 1) {
description += ` across all workspace directories`;
}
}
return description;
}
/**
* Performs the actual search using the prioritized strategies.
* @param options Search options including pattern, absolute path, and include glob.
@@ -336,9 +320,10 @@ class GrepToolInvocation extends BaseToolInvocation<
pattern: string;
path: string; // Expects absolute path
include?: string;
maxMatches: number;
signal: AbortSignal;
}): Promise<GrepMatch[]> {
const { pattern, path: absolutePath, include } = options;
const { pattern, path: absolutePath, include, maxMatches } = options;
let strategyUsed = 'none';
try {
@@ -361,32 +346,23 @@ class GrepToolInvocation extends BaseToolInvocation<
}
try {
const output = await new Promise<string>((resolve, reject) => {
const child = spawn('git', gitArgs, {
const generator = execStreaming('git', gitArgs, {
cwd: absolutePath,
windowsHide: true,
signal: options.signal,
allowedExitCodes: [0, 1],
});
const stdoutChunks: Buffer[] = [];
const stderrChunks: Buffer[] = [];
child.stdout.on('data', (chunk) => stdoutChunks.push(chunk));
child.stderr.on('data', (chunk) => stderrChunks.push(chunk));
child.on('error', (err) =>
reject(new Error(`Failed to start git grep: ${err.message}`)),
);
child.on('close', (code) => {
const stdoutData = Buffer.concat(stdoutChunks).toString('utf8');
const stderrData = Buffer.concat(stderrChunks).toString('utf8');
if (code === 0) resolve(stdoutData);
else if (code === 1)
resolve(''); // No matches
else
reject(
new Error(`git grep exited with code ${code}: ${stderrData}`),
);
});
});
return this.parseGrepOutput(output, absolutePath);
const results: GrepMatch[] = [];
for await (const line of generator) {
const match = this.parseGrepLine(line, absolutePath);
if (match) {
results.push(match);
if (results.length >= maxMatches) {
break;
}
}
}
return results;
} catch (gitError: unknown) {
debugLogger.debug(
`GrepLogic: git grep failed: ${getErrorMessage(
@@ -433,67 +409,31 @@ class GrepToolInvocation extends BaseToolInvocation<
grepArgs.push(pattern);
grepArgs.push('.');
const results: GrepMatch[] = [];
try {
const output = await new Promise<string>((resolve, reject) => {
const child = spawn('grep', grepArgs, {
const generator = execStreaming('grep', grepArgs, {
cwd: absolutePath,
windowsHide: true,
signal: options.signal,
allowedExitCodes: [0, 1],
});
const stdoutChunks: Buffer[] = [];
const stderrChunks: Buffer[] = [];
const onData = (chunk: Buffer) => stdoutChunks.push(chunk);
const onStderr = (chunk: Buffer) => {
const stderrStr = chunk.toString();
// Suppress common harmless stderr messages
if (
!stderrStr.includes('Permission denied') &&
!/grep:.*: Is a directory/i.test(stderrStr)
) {
stderrChunks.push(chunk);
for await (const line of generator) {
const match = this.parseGrepLine(line, absolutePath);
if (match) {
results.push(match);
if (results.length >= maxMatches) {
break;
}
};
const onError = (err: Error) => {
cleanup();
reject(new Error(`Failed to start system grep: ${err.message}`));
};
const onClose = (code: number | null) => {
const stdoutData = Buffer.concat(stdoutChunks).toString('utf8');
const stderrData = Buffer.concat(stderrChunks)
.toString('utf8')
.trim();
cleanup();
if (code === 0) resolve(stdoutData);
else if (code === 1)
resolve(''); // No matches
else {
if (stderrData)
reject(
new Error(
`System grep exited with code ${code}: ${stderrData}`,
),
);
else resolve(''); // Exit code > 1 but no stderr, likely just suppressed errors
}
};
const cleanup = () => {
child.stdout.removeListener('data', onData);
child.stderr.removeListener('data', onStderr);
child.removeListener('error', onError);
child.removeListener('close', onClose);
if (child.connected) {
child.disconnect();
}
};
child.stdout.on('data', onData);
child.stderr.on('data', onStderr);
child.on('error', onError);
child.on('close', onClose);
});
return this.parseGrepOutput(output, absolutePath);
return results;
} catch (grepError: unknown) {
if (
grepError instanceof Error &&
/Permission denied|Is a directory/i.test(grepError.message)
) {
return results;
}
debugLogger.debug(
`GrepLogic: System grep failed: ${getErrorMessage(
grepError,
@@ -523,11 +463,22 @@ class GrepToolInvocation extends BaseToolInvocation<
const allMatches: GrepMatch[] = [];
for await (const filePath of filesStream) {
if (allMatches.length >= maxMatches) break;
const fileAbsolutePath = filePath;
// security check
const relativePath = path.relative(absolutePath, fileAbsolutePath);
if (
relativePath === '..' ||
relativePath.startsWith(`..${path.sep}`) ||
path.isAbsolute(relativePath)
)
continue;
try {
const content = await fsPromises.readFile(fileAbsolutePath, 'utf8');
const lines = content.split(/\r?\n/);
lines.forEach((line, index) => {
for (let index = 0; index < lines.length; index++) {
const line = lines[index];
if (regex.test(line)) {
allMatches.push({
filePath:
@@ -536,8 +487,9 @@ class GrepToolInvocation extends BaseToolInvocation<
lineNumber: index + 1,
line,
});
if (allMatches.length >= maxMatches) break;
}
}
});
} catch (readError: unknown) {
// Ignore errors like permission denied or file gone during read
if (!isNodeError(readError) || readError.code !== 'ENOENT') {
@@ -560,9 +512,40 @@ class GrepToolInvocation extends BaseToolInvocation<
throw error; // Re-throw
}
}
}
// --- GrepLogic Class ---
getDescription(): string {
let description = `'${this.params.pattern}'`;
if (this.params.include) {
description += ` in ${this.params.include}`;
}
if (this.params.dir_path) {
const resolvedPath = path.resolve(
this.config.getTargetDir(),
this.params.dir_path,
);
if (
resolvedPath === this.config.getTargetDir() ||
this.params.dir_path === '.'
) {
description += ` within ./`;
} else {
const relativePath = makeRelative(
resolvedPath,
this.config.getTargetDir(),
);
description += ` within ${shortenPath(relativePath)}`;
}
} else {
// When no path is specified, indicate searching all workspace directories
const workspaceContext = this.config.getWorkspaceContext();
const directories = workspaceContext.getDirectories();
if (directories.length > 1) {
description += ` across all workspace directories`;
}
}
return description;
}
}
/**
* Implementation of the Grep tool logic (moved from CLI)
@@ -581,8 +564,7 @@ export class GrepTool extends BaseDeclarativeTool<GrepToolParams, ToolResult> {
{
properties: {
pattern: {
description:
"The regular expression (regex) pattern to search for within file contents (e.g., 'function\\s+myFunction', 'import\\s+\\{.*\\}\\s+from\\s+.*').",
description: `The regular expression (regex) pattern to search for within file contents (e.g., 'function\\s+myFunction', 'import\\s+\\{.*\\}\\s+from\\s+.*').`,
type: 'string',
},
dir_path: {
@@ -591,8 +573,7 @@ export class GrepTool extends BaseDeclarativeTool<GrepToolParams, ToolResult> {
type: 'string',
},
include: {
description:
"Optional: A glob pattern to filter which files are searched (e.g., '*.js', '*.{ts,tsx}', 'src/**'). If omitted, searches all files (respecting potential global ignores).",
description: `Optional: A glob pattern to filter which files are searched (e.g., '*.js', '*.{ts,tsx}', 'src/**'). If omitted, searches all files (respecting potential global ignores).`,
type: 'string',
},
},
+211 -520
View File
@@ -23,6 +23,8 @@ import { Storage } from '../config/storage.js';
import { createMockWorkspaceContext } from '../test-utils/mockWorkspaceContext.js';
import type { ChildProcess } from 'node:child_process';
import { spawn } from 'node:child_process';
import { PassThrough, Readable } from 'node:stream';
import EventEmitter from 'node:events';
import { downloadRipGrep } from '@joshua.litt/get-ripgrep';
import { createMockMessageBus } from '../test-utils/mock-message-bus.js';
// Mock dependencies for canUseRipgrep
@@ -197,47 +199,43 @@ describe('ensureRgPath', () => {
function createMockSpawn(
options: {
outputData?: string;
exitCode?: number;
exitCode?: number | null;
signal?: string;
} = {},
) {
const { outputData, exitCode = 0, signal } = options;
return () => {
const mockProcess = {
stdout: {
on: vi.fn(),
removeListener: vi.fn(),
// strict Readable implementation
let pushed = false;
const stdout = new Readable({
read() {
if (!pushed) {
if (outputData) {
this.push(outputData);
}
this.push(null); // EOF
pushed = true;
}
},
stderr: {
on: vi.fn(),
removeListener: vi.fn(),
},
on: vi.fn(),
removeListener: vi.fn(),
kill: vi.fn(),
};
});
// Set up event listeners immediately
const stderr = new PassThrough();
const mockProcess = new EventEmitter() as ChildProcess;
mockProcess.stdout = stdout as unknown as Readable;
mockProcess.stderr = stderr;
mockProcess.kill = vi.fn();
// @ts-expect-error - mocking private/internal property
mockProcess.killed = false;
// @ts-expect-error - mocking private/internal property
mockProcess.exitCode = null;
// Emulating process exit
setTimeout(() => {
const stdoutDataHandler = mockProcess.stdout.on.mock.calls.find(
(call) => call[0] === 'data',
)?.[1];
mockProcess.emit('close', exitCode, signal);
}, 10);
const closeHandler = mockProcess.on.mock.calls.find(
(call) => call[0] === 'close',
)?.[1];
if (stdoutDataHandler && outputData) {
stdoutDataHandler(Buffer.from(outputData));
}
if (closeHandler) {
closeHandler(exitCode, signal);
}
}, 0);
return mockProcess as unknown as ChildProcess;
return mockProcess;
};
}
@@ -406,6 +404,40 @@ describe('RipGrepTool', () => {
expect(result.returnDisplay).toBe('Found 3 matches');
});
it('should ignore matches that escape the base path', async () => {
mockSpawn.mockImplementationOnce(
createMockSpawn({
outputData:
JSON.stringify({
type: 'match',
data: {
path: { text: '..env' },
line_number: 1,
lines: { text: 'world in ..env\n' },
},
}) +
'\n' +
JSON.stringify({
type: 'match',
data: {
path: { text: '../secret.txt' },
line_number: 1,
lines: { text: 'leak\n' },
},
}) +
'\n',
exitCode: 0,
}),
);
const params: RipGrepToolParams = { pattern: 'world' };
const invocation = grepTool.build(params);
const result = await invocation.execute(abortSignal);
expect(result.llmContent).toContain('File: ..env');
expect(result.llmContent).toContain('L1: world in ..env');
expect(result.llmContent).not.toContain('secret.txt');
});
it('should find matches in a specific path', async () => {
// Setup specific mock for this test - searching in 'sub' should only return matches from that directory
mockSpawn.mockImplementationOnce(
@@ -471,33 +503,9 @@ describe('RipGrepTool', () => {
);
// Setup specific mock for this test - searching for 'hello' in 'sub' with '*.js' filter
mockSpawn.mockImplementationOnce(() => {
const mockProcess = {
stdout: {
on: vi.fn(),
removeListener: vi.fn(),
},
stderr: {
on: vi.fn(),
removeListener: vi.fn(),
},
on: vi.fn(),
removeListener: vi.fn(),
kill: vi.fn(),
};
setTimeout(() => {
const onData = mockProcess.stdout.on.mock.calls.find(
(call) => call[0] === 'data',
)?.[1];
const onClose = mockProcess.on.mock.calls.find(
(call) => call[0] === 'close',
)?.[1];
if (onData) {
// Only return match from the .js file in sub directory
onData(
Buffer.from(
mockSpawn.mockImplementationOnce(
createMockSpawn({
outputData:
JSON.stringify({
type: 'match',
data: {
@@ -506,16 +514,9 @@ describe('RipGrepTool', () => {
lines: { text: 'const greeting = "hello";\n' },
},
}) + '\n',
),
exitCode: 0,
}),
);
}
if (onClose) {
onClose(0);
}
}, 0);
return mockProcess as unknown as ChildProcess;
});
const params: RipGrepToolParams = {
pattern: 'hello',
@@ -559,41 +560,103 @@ describe('RipGrepTool', () => {
const params: RipGrepToolParams = { pattern: '[[' };
const invocation = grepTool.build(params);
const result = await invocation.execute(abortSignal);
expect(result.llmContent).toContain('ripgrep exited with code 2');
expect(result.llmContent).toContain('Process exited with code 2');
expect(result.returnDisplay).toContain(
'Error: ripgrep exited with code 2',
'Error: Process exited with code 2',
);
});
it('should ignore invalid regex error from ripgrep when it is not a user error', async () => {
mockSpawn.mockImplementation(
createMockSpawn({
outputData: '',
exitCode: 2,
signal: undefined,
}),
);
const invocation = grepTool.build({
pattern: 'foo',
dir_path: tempRootDir,
});
const result = await invocation.execute(abortSignal);
expect(result.llmContent).toContain('Process exited with code 2');
expect(result.returnDisplay).toContain(
'Error: Process exited with code 2',
);
});
it('should handle massive output by terminating early without crashing (Regression)', async () => {
const massiveOutputLines = 30000;
// Custom mock for massive streaming
mockSpawn.mockImplementation(() => {
const stdout = new PassThrough();
const stderr = new PassThrough();
const mockProcess = new EventEmitter() as ChildProcess;
mockProcess.stdout = stdout;
mockProcess.stderr = stderr;
mockProcess.kill = vi.fn();
// @ts-expect-error - mocking private/internal property
mockProcess.killed = false;
// @ts-expect-error - mocking private/internal property
mockProcess.exitCode = null;
// Push data over time
let linesPushed = 0;
const pushInterval = setInterval(() => {
if (linesPushed >= massiveOutputLines) {
clearInterval(pushInterval);
stdout.end();
mockProcess.emit('close', 0);
return;
}
// Push a batch
try {
for (let i = 0; i < 2000 && linesPushed < massiveOutputLines; i++) {
const match = JSON.stringify({
type: 'match',
data: {
path: { text: `file_${linesPushed}.txt` },
line_number: 1,
lines: { text: `match ${linesPushed}\n` },
},
});
stdout.write(match + '\n');
linesPushed++;
}
} catch (_e) {
clearInterval(pushInterval);
}
}, 1);
mockProcess.kill = vi.fn().mockImplementation(() => {
clearInterval(pushInterval);
stdout.end();
// Emit close async to allow listeners to attach
setTimeout(() => mockProcess.emit('close', 0, 'SIGTERM'), 0);
return true;
});
return mockProcess;
});
const invocation = grepTool.build({
pattern: 'test',
dir_path: tempRootDir,
});
const result = await invocation.execute(abortSignal);
expect(result.returnDisplay).toContain('(limited)');
}, 10000);
it('should handle regex special characters correctly', async () => {
// Setup specific mock for this test - regex pattern 'foo.*bar' should match 'const foo = "bar";'
mockSpawn.mockImplementationOnce(() => {
const mockProcess = {
stdout: {
on: vi.fn(),
removeListener: vi.fn(),
},
stderr: {
on: vi.fn(),
removeListener: vi.fn(),
},
on: vi.fn(),
removeListener: vi.fn(),
kill: vi.fn(),
};
setTimeout(() => {
const onData = mockProcess.stdout.on.mock.calls.find(
(call) => call[0] === 'data',
)?.[1];
const onClose = mockProcess.on.mock.calls.find(
(call) => call[0] === 'close',
)?.[1];
if (onData) {
// Return match for the regex pattern
onData(
Buffer.from(
mockSpawn.mockImplementationOnce(
createMockSpawn({
outputData:
JSON.stringify({
type: 'match',
data: {
@@ -602,16 +665,9 @@ describe('RipGrepTool', () => {
lines: { text: 'const foo = "bar";\n' },
},
}) + '\n',
),
exitCode: 0,
}),
);
}
if (onClose) {
onClose(0);
}
}, 0);
return mockProcess as unknown as ChildProcess;
});
const params: RipGrepToolParams = { pattern: 'foo.*bar' }; // Matches 'const foo = "bar";'
const invocation = grepTool.build(params);
@@ -625,33 +681,9 @@ describe('RipGrepTool', () => {
it('should be case-insensitive by default (JS fallback)', async () => {
// Setup specific mock for this test - case insensitive search for 'HELLO'
mockSpawn.mockImplementationOnce(() => {
const mockProcess = {
stdout: {
on: vi.fn(),
removeListener: vi.fn(),
},
stderr: {
on: vi.fn(),
removeListener: vi.fn(),
},
on: vi.fn(),
removeListener: vi.fn(),
kill: vi.fn(),
};
setTimeout(() => {
const onData = mockProcess.stdout.on.mock.calls.find(
(call) => call[0] === 'data',
)?.[1];
const onClose = mockProcess.on.mock.calls.find(
(call) => call[0] === 'close',
)?.[1];
if (onData) {
// Return case-insensitive matches for 'HELLO'
onData(
Buffer.from(
mockSpawn.mockImplementationOnce(
createMockSpawn({
outputData:
JSON.stringify({
type: 'match',
data: {
@@ -670,16 +702,9 @@ describe('RipGrepTool', () => {
},
}) +
'\n',
),
exitCode: 0,
}),
);
}
if (onClose) {
onClose(0);
}
}, 0);
return mockProcess as unknown as ChildProcess;
});
const params: RipGrepToolParams = { pattern: 'HELLO' };
const invocation = grepTool.build(params);
@@ -742,36 +767,10 @@ describe('RipGrepTool', () => {
// Setup specific mock for this test - multi-directory search for 'world'
// Mock will be called twice - once for each directory
let callCount = 0;
mockSpawn.mockImplementation(() => {
callCount++;
const mockProcess = {
stdout: {
on: vi.fn(),
removeListener: vi.fn(),
},
stderr: {
on: vi.fn(),
removeListener: vi.fn(),
},
on: vi.fn(),
removeListener: vi.fn(),
kill: vi.fn(),
};
setTimeout(() => {
const stdoutDataHandler = mockProcess.stdout.on.mock.calls.find(
(call) => call[0] === 'data',
)?.[1];
const closeHandler = mockProcess.on.mock.calls.find(
(call) => call[0] === 'close',
)?.[1];
let outputData = '';
if (callCount === 1) {
// First directory (tempRootDir)
outputData =
mockSpawn.mockImplementationOnce(
createMockSpawn({
outputData:
JSON.stringify({
type: 'match',
data: {
@@ -798,41 +797,9 @@ describe('RipGrepTool', () => {
lines: { text: 'another world in sub dir\n' },
},
}) +
'\n';
} else if (callCount === 2) {
// Second directory (secondDir)
outputData =
JSON.stringify({
type: 'match',
data: {
path: { text: 'other.txt' },
line_number: 2,
lines: { text: 'world in second\n' },
},
}) +
'\n' +
JSON.stringify({
type: 'match',
data: {
path: { text: 'another.js' },
line_number: 1,
lines: { text: 'function world() { return "test"; }\n' },
},
}) +
'\n';
}
if (stdoutDataHandler && outputData) {
stdoutDataHandler(Buffer.from(outputData));
}
if (closeHandler) {
closeHandler(0);
}
}, 0);
return mockProcess as unknown as ChildProcess;
});
'\n',
}),
);
const multiDirGrepTool = new RipGrepTool(
multiDirConfig,
@@ -886,32 +853,9 @@ describe('RipGrepTool', () => {
} as unknown as Config;
// Setup specific mock for this test - searching in 'sub' should only return matches from that directory
mockSpawn.mockImplementationOnce(() => {
const mockProcess = {
stdout: {
on: vi.fn(),
removeListener: vi.fn(),
},
stderr: {
on: vi.fn(),
removeListener: vi.fn(),
},
on: vi.fn(),
removeListener: vi.fn(),
kill: vi.fn(),
};
setTimeout(() => {
const onData = mockProcess.stdout.on.mock.calls.find(
(call) => call[0] === 'data',
)?.[1];
const onClose = mockProcess.on.mock.calls.find(
(call) => call[0] === 'close',
)?.[1];
if (onData) {
onData(
Buffer.from(
mockSpawn.mockImplementationOnce(
createMockSpawn({
outputData:
JSON.stringify({
type: 'match',
data: {
@@ -920,16 +864,8 @@ describe('RipGrepTool', () => {
lines: { text: 'another world in sub dir\n' },
},
}) + '\n',
),
}),
);
}
if (onClose) {
onClose(0);
}
}, 0);
return mockProcess as unknown as ChildProcess;
});
const multiDirGrepTool = new RipGrepTool(
multiDirConfig,
@@ -970,35 +906,12 @@ describe('RipGrepTool', () => {
it('should abort streaming search when signal is triggered', async () => {
// Setup specific mock for this test - simulate process being killed due to abort
mockSpawn.mockImplementationOnce(() => {
const mockProcess = {
stdout: {
on: vi.fn(),
removeListener: vi.fn(),
},
stderr: {
on: vi.fn(),
removeListener: vi.fn(),
},
on: vi.fn(),
removeListener: vi.fn(),
kill: vi.fn(),
};
// Simulate process being aborted - use setTimeout to ensure handlers are registered first
setTimeout(() => {
const closeHandler = mockProcess.on.mock.calls.find(
(call) => call[0] === 'close',
)?.[1];
if (closeHandler) {
// Simulate process killed by signal (code is null, signal is SIGTERM)
closeHandler(null, 'SIGTERM');
}
}, 0);
return mockProcess as unknown as ChildProcess;
});
mockSpawn.mockImplementationOnce(
createMockSpawn({
exitCode: null,
signal: 'SIGTERM',
}),
);
const controller = new AbortController();
const params: RipGrepToolParams = { pattern: 'test' };
@@ -1008,12 +921,7 @@ describe('RipGrepTool', () => {
controller.abort();
const result = await invocation.execute(controller.signal);
expect(result.llmContent).toContain(
'Error during grep search operation: ripgrep was terminated by signal:',
);
expect(result.returnDisplay).toContain(
'Error: ripgrep was terminated by signal:',
);
expect(result.returnDisplay).toContain('No matches found');
});
});
@@ -1060,32 +968,9 @@ describe('RipGrepTool', () => {
);
// Setup specific mock for this test - searching for 'world' should find the file with special characters
mockSpawn.mockImplementationOnce(() => {
const mockProcess = {
stdout: {
on: vi.fn(),
removeListener: vi.fn(),
},
stderr: {
on: vi.fn(),
removeListener: vi.fn(),
},
on: vi.fn(),
removeListener: vi.fn(),
kill: vi.fn(),
};
setTimeout(() => {
const onData = mockProcess.stdout.on.mock.calls.find(
(call) => call[0] === 'data',
)?.[1];
const onClose = mockProcess.on.mock.calls.find(
(call) => call[0] === 'close',
)?.[1];
if (onData) {
onData(
Buffer.from(
mockSpawn.mockImplementationOnce(
createMockSpawn({
outputData:
JSON.stringify({
type: 'match',
data: {
@@ -1094,16 +979,8 @@ describe('RipGrepTool', () => {
lines: { text: 'hello world with special chars\n' },
},
}) + '\n',
),
}),
);
}
if (onClose) {
onClose(0);
}
}, 0);
return mockProcess as unknown as ChildProcess;
});
const params: RipGrepToolParams = { pattern: 'world' };
const invocation = grepTool.build(params);
@@ -1122,32 +999,9 @@ describe('RipGrepTool', () => {
);
// Setup specific mock for this test - searching for 'deep' should find the deeply nested file
mockSpawn.mockImplementationOnce(() => {
const mockProcess = {
stdout: {
on: vi.fn(),
removeListener: vi.fn(),
},
stderr: {
on: vi.fn(),
removeListener: vi.fn(),
},
on: vi.fn(),
removeListener: vi.fn(),
kill: vi.fn(),
};
setTimeout(() => {
const onData = mockProcess.stdout.on.mock.calls.find(
(call) => call[0] === 'data',
)?.[1];
const onClose = mockProcess.on.mock.calls.find(
(call) => call[0] === 'close',
)?.[1];
if (onData) {
onData(
Buffer.from(
mockSpawn.mockImplementationOnce(
createMockSpawn({
outputData:
JSON.stringify({
type: 'match',
data: {
@@ -1156,16 +1010,8 @@ describe('RipGrepTool', () => {
lines: { text: 'content in deep directory\n' },
},
}) + '\n',
),
}),
);
}
if (onClose) {
onClose(0);
}
}, 0);
return mockProcess as unknown as ChildProcess;
});
const params: RipGrepToolParams = { pattern: 'deep' };
const invocation = grepTool.build(params);
@@ -1184,32 +1030,9 @@ describe('RipGrepTool', () => {
);
// Setup specific mock for this test - regex pattern should match function declarations
mockSpawn.mockImplementationOnce(() => {
const mockProcess = {
stdout: {
on: vi.fn(),
removeListener: vi.fn(),
},
stderr: {
on: vi.fn(),
removeListener: vi.fn(),
},
on: vi.fn(),
removeListener: vi.fn(),
kill: vi.fn(),
};
setTimeout(() => {
const onData = mockProcess.stdout.on.mock.calls.find(
(call) => call[0] === 'data',
)?.[1];
const onClose = mockProcess.on.mock.calls.find(
(call) => call[0] === 'close',
)?.[1];
if (onData) {
onData(
Buffer.from(
mockSpawn.mockImplementationOnce(
createMockSpawn({
outputData:
JSON.stringify({
type: 'match',
data: {
@@ -1218,16 +1041,8 @@ describe('RipGrepTool', () => {
lines: { text: 'function getName() { return "test"; }\n' },
},
}) + '\n',
),
}),
);
}
if (onClose) {
onClose(0);
}
}, 0);
return mockProcess as unknown as ChildProcess;
});
const params: RipGrepToolParams = { pattern: 'function\\s+\\w+\\s*\\(' };
const invocation = grepTool.build(params);
@@ -1244,32 +1059,9 @@ describe('RipGrepTool', () => {
);
// Setup specific mock for this test - case insensitive search should match all variants
mockSpawn.mockImplementationOnce(() => {
const mockProcess = {
stdout: {
on: vi.fn(),
removeListener: vi.fn(),
},
stderr: {
on: vi.fn(),
removeListener: vi.fn(),
},
on: vi.fn(),
removeListener: vi.fn(),
kill: vi.fn(),
};
setTimeout(() => {
const onData = mockProcess.stdout.on.mock.calls.find(
(call) => call[0] === 'data',
)?.[1];
const onClose = mockProcess.on.mock.calls.find(
(call) => call[0] === 'close',
)?.[1];
if (onData) {
onData(
Buffer.from(
mockSpawn.mockImplementationOnce(
createMockSpawn({
outputData:
JSON.stringify({
type: 'match',
data: {
@@ -1297,16 +1089,8 @@ describe('RipGrepTool', () => {
},
}) +
'\n',
),
}),
);
}
if (onClose) {
onClose(0);
}
}, 0);
return mockProcess as unknown as ChildProcess;
});
const params: RipGrepToolParams = { pattern: 'hello' };
const invocation = grepTool.build(params);
@@ -1324,32 +1108,9 @@ describe('RipGrepTool', () => {
);
// Setup specific mock for this test - escaped regex pattern should match price format
mockSpawn.mockImplementationOnce(() => {
const mockProcess = {
stdout: {
on: vi.fn(),
removeListener: vi.fn(),
},
stderr: {
on: vi.fn(),
removeListener: vi.fn(),
},
on: vi.fn(),
removeListener: vi.fn(),
kill: vi.fn(),
};
setTimeout(() => {
const onData = mockProcess.stdout.on.mock.calls.find(
(call) => call[0] === 'data',
)?.[1];
const onClose = mockProcess.on.mock.calls.find(
(call) => call[0] === 'close',
)?.[1];
if (onData) {
onData(
Buffer.from(
mockSpawn.mockImplementationOnce(
createMockSpawn({
outputData:
JSON.stringify({
type: 'match',
data: {
@@ -1358,16 +1119,8 @@ describe('RipGrepTool', () => {
lines: { text: 'Price: $19.99\n' },
},
}) + '\n',
),
}),
);
}
if (onClose) {
onClose(0);
}
}, 0);
return mockProcess as unknown as ChildProcess;
});
const params: RipGrepToolParams = { pattern: '\\$\\d+\\.\\d+' };
const invocation = grepTool.build(params);
@@ -1392,32 +1145,9 @@ describe('RipGrepTool', () => {
await fs.writeFile(path.join(tempRootDir, 'test.txt'), 'text content');
// Setup specific mock for this test - include pattern should filter to only ts/tsx files
mockSpawn.mockImplementationOnce(() => {
const mockProcess = {
stdout: {
on: vi.fn(),
removeListener: vi.fn(),
},
stderr: {
on: vi.fn(),
removeListener: vi.fn(),
},
on: vi.fn(),
removeListener: vi.fn(),
kill: vi.fn(),
};
setTimeout(() => {
const onData = mockProcess.stdout.on.mock.calls.find(
(call) => call[0] === 'data',
)?.[1];
const onClose = mockProcess.on.mock.calls.find(
(call) => call[0] === 'close',
)?.[1];
if (onData) {
onData(
Buffer.from(
mockSpawn.mockImplementationOnce(
createMockSpawn({
outputData:
JSON.stringify({
type: 'match',
data: {
@@ -1436,16 +1166,8 @@ describe('RipGrepTool', () => {
},
}) +
'\n',
),
}),
);
}
if (onClose) {
onClose(0);
}
}, 0);
return mockProcess as unknown as ChildProcess;
});
const params: RipGrepToolParams = {
pattern: 'content',
@@ -1469,32 +1191,9 @@ describe('RipGrepTool', () => {
await fs.writeFile(path.join(tempRootDir, 'other.ts'), 'other code');
// Setup specific mock for this test - include pattern should filter to only src/** files
mockSpawn.mockImplementationOnce(() => {
const mockProcess = {
stdout: {
on: vi.fn(),
removeListener: vi.fn(),
},
stderr: {
on: vi.fn(),
removeListener: vi.fn(),
},
on: vi.fn(),
removeListener: vi.fn(),
kill: vi.fn(),
};
setTimeout(() => {
const onData = mockProcess.stdout.on.mock.calls.find(
(call) => call[0] === 'data',
)?.[1];
const onClose = mockProcess.on.mock.calls.find(
(call) => call[0] === 'close',
)?.[1];
if (onData) {
onData(
Buffer.from(
mockSpawn.mockImplementationOnce(
createMockSpawn({
outputData:
JSON.stringify({
type: 'match',
data: {
@@ -1503,16 +1202,8 @@ describe('RipGrepTool', () => {
lines: { text: 'source code\n' },
},
}) + '\n',
),
}),
);
}
if (onClose) {
onClose(0);
}
}, 0);
return mockProcess as unknown as ChildProcess;
});
const params: RipGrepToolParams = {
pattern: 'code',
+86 -101
View File
@@ -7,7 +7,6 @@
import type { MessageBus } from '../confirmation-bus/message-bus.js';
import fs from 'node:fs';
import path from 'node:path';
import { spawn } from 'node:child_process';
import { downloadRipGrep } from '@joshua.litt/get-ripgrep';
import type { ToolInvocation, ToolResult } from './tools.js';
import { BaseDeclarativeTool, BaseToolInvocation, Kind } from './tools.js';
@@ -24,8 +23,11 @@ import {
COMMON_DIRECTORY_EXCLUDES,
} from '../utils/ignorePatterns.js';
import { GeminiIgnoreParser } from '../utils/geminiIgnoreParser.js';
const DEFAULT_TOTAL_MAX_MATCHES = 20000;
import { execStreaming } from '../utils/shell-utils.js';
import {
DEFAULT_TOTAL_MAX_MATCHES,
DEFAULT_SEARCH_TIMEOUT_MS,
} from './constants.js';
function getRgCandidateFilenames(): readonly string[] {
return process.platform === 'win32' ? ['rg.exe', 'rg'] : ['rg'];
@@ -213,7 +215,23 @@ class GrepToolInvocation extends BaseToolInvocation<
debugLogger.log(`[GrepTool] Total result limit: ${totalMaxMatches}`);
}
let allMatches = await this.performRipgrepSearch({
// Create a timeout controller to prevent indefinitely hanging searches
const timeoutController = new AbortController();
const timeoutId = setTimeout(() => {
timeoutController.abort();
}, DEFAULT_SEARCH_TIMEOUT_MS);
// Link the passed signal to our timeout controller
const onAbort = () => timeoutController.abort();
if (signal.aborted) {
onAbort();
} else {
signal.addEventListener('abort', onAbort, { once: true });
}
let allMatches: GrepMatch[];
try {
allMatches = await this.performRipgrepSearch({
pattern: this.params.pattern,
path: searchDirAbs!,
include: this.params.include,
@@ -223,11 +241,12 @@ class GrepToolInvocation extends BaseToolInvocation<
after: this.params.after,
before: this.params.before,
no_ignore: this.params.no_ignore,
signal,
maxMatches: totalMaxMatches,
signal: timeoutController.signal,
});
if (allMatches.length >= totalMaxMatches) {
allMatches = allMatches.slice(0, totalMaxMatches);
} finally {
clearTimeout(timeoutId);
signal.removeEventListener('abort', onAbort);
}
const searchLocationDescription = `in path "${searchDirDisplay}"`;
@@ -254,13 +273,7 @@ class GrepToolInvocation extends BaseToolInvocation<
const matchCount = allMatches.length;
const matchTerm = matchCount === 1 ? 'match' : 'matches';
let llmContent = `Found ${matchCount} ${matchTerm} for pattern "${this.params.pattern}" ${searchLocationDescription}${this.params.include ? ` (filter: "${this.params.include}")` : ''}`;
if (wasTruncated) {
llmContent += ` (results limited to ${totalMaxMatches} matches for performance)`;
}
llmContent += `:\n---\n`;
let llmContent = `Found ${matchCount} ${matchTerm} for pattern "${this.params.pattern}" ${searchLocationDescription}${this.params.include ? ` (filter: "${this.params.include}")` : ''}${wasTruncated ? ` (results limited to ${totalMaxMatches} matches for performance)` : ''}:\n---\n`;
for (const filePath in matchesByFile) {
llmContent += `File: ${filePath}\n`;
@@ -271,14 +284,11 @@ class GrepToolInvocation extends BaseToolInvocation<
llmContent += '---\n';
}
let displayMessage = `Found ${matchCount} ${matchTerm}`;
if (wasTruncated) {
displayMessage += ` (limited)`;
}
return {
llmContent: llmContent.trim(),
returnDisplay: displayMessage,
returnDisplay: `Found ${matchCount} ${matchTerm}${
wasTruncated ? ' (limited)' : ''
}`,
};
} catch (error) {
debugLogger.warn(`Error during GrepLogic execution: ${error}`);
@@ -290,41 +300,6 @@ class GrepToolInvocation extends BaseToolInvocation<
}
}
private parseRipgrepJsonOutput(
output: string,
basePath: string,
): GrepMatch[] {
const results: GrepMatch[] = [];
if (!output) return results;
const lines = output.trim().split('\n');
for (const line of lines) {
if (!line.trim()) continue;
try {
const json = JSON.parse(line);
if (json.type === 'match') {
const match = json.data;
// Defensive check: ensure text properties exist (skips binary/invalid encoding)
if (match.path?.text && match.lines?.text) {
const absoluteFilePath = path.resolve(basePath, match.path.text);
const relativeFilePath = path.relative(basePath, absoluteFilePath);
results.push({
filePath: relativeFilePath || path.basename(absoluteFilePath),
lineNumber: match.line_number,
line: match.lines.text.trimEnd(),
});
}
}
} catch (error) {
debugLogger.warn(`Failed to parse ripgrep JSON line: ${line}`, error);
}
}
return results;
}
private async performRipgrepSearch(options: {
pattern: string;
path: string;
@@ -335,6 +310,7 @@ class GrepToolInvocation extends BaseToolInvocation<
after?: number;
before?: number;
no_ignore?: boolean;
maxMatches: number;
signal: AbortSignal;
}): Promise<GrepMatch[]> {
const {
@@ -347,6 +323,7 @@ class GrepToolInvocation extends BaseToolInvocation<
after,
before,
no_ignore,
maxMatches,
} = options;
const rgArgs = ['--json'];
@@ -402,64 +379,72 @@ class GrepToolInvocation extends BaseToolInvocation<
rgArgs.push('--threads', '4');
rgArgs.push(absolutePath);
const results: GrepMatch[] = [];
try {
const rgPath = await ensureRgPath();
const output = await new Promise<string>((resolve, reject) => {
const child = spawn(rgPath, rgArgs, {
windowsHide: true,
const generator = execStreaming(rgPath, rgArgs, {
signal: options.signal,
allowedExitCodes: [0, 1],
});
const stdoutChunks: Buffer[] = [];
const stderrChunks: Buffer[] = [];
const cleanup = () => {
if (options.signal.aborted) {
child.kill();
}
};
options.signal.addEventListener('abort', cleanup, { once: true });
child.stdout.on('data', (chunk) => stdoutChunks.push(chunk));
child.stderr.on('data', (chunk) => stderrChunks.push(chunk));
child.on('error', (err) => {
options.signal.removeEventListener('abort', cleanup);
reject(
new Error(
`Failed to start ripgrep: ${err.message}. Please ensure @lvce-editor/ripgrep is properly installed.`,
),
);
});
child.on('close', (code, signal) => {
options.signal.removeEventListener('abort', cleanup);
const stdoutData = Buffer.concat(stdoutChunks).toString('utf8');
const stderrData = Buffer.concat(stderrChunks).toString('utf8');
if (code === 0) {
resolve(stdoutData);
} else if (code === 1) {
resolve(''); // No matches found
} else {
if (signal) {
reject(new Error(`ripgrep was terminated by signal: ${signal}`));
} else {
reject(
new Error(`ripgrep exited with code ${code}: ${stderrData}`),
);
for await (const line of generator) {
const match = this.parseRipgrepJsonLine(line, absolutePath);
if (match) {
results.push(match);
if (results.length >= maxMatches) {
break;
}
}
}
});
});
return this.parseRipgrepJsonOutput(output, absolutePath);
return results;
} catch (error: unknown) {
debugLogger.debug(`GrepLogic: ripgrep failed: ${getErrorMessage(error)}`);
throw error;
}
}
private parseRipgrepJsonLine(
line: string,
basePath: string,
): GrepMatch | null {
try {
const json = JSON.parse(line);
if (json.type === 'match') {
const match = json.data;
// Defensive check: ensure text properties exist (skips binary/invalid encoding)
if (match.path?.text && match.lines?.text) {
const absoluteFilePath = path.resolve(basePath, match.path.text);
const relativeCheck = path.relative(basePath, absoluteFilePath);
if (
relativeCheck === '..' ||
relativeCheck.startsWith(`..${path.sep}`) ||
path.isAbsolute(relativeCheck)
) {
return null;
}
const relativeFilePath = path.relative(basePath, absoluteFilePath);
return {
filePath: relativeFilePath || path.basename(absoluteFilePath),
lineNumber: match.line_number,
line: match.lines.text.trimEnd(),
};
}
}
} catch (error) {
// Only log if it's not a simple empty line or widely invalid
if (line.trim().length > 0) {
debugLogger.warn(
`Failed to parse ripgrep JSON line: ${line.substring(0, 100)}...`,
error,
);
}
}
return null;
}
/**
* Gets a description of the grep operation
* @param params Parameters for the grep operation
@@ -0,0 +1,67 @@
/**
* @license
* Copyright 2026 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import { describe, it, expect } from 'vitest';
import { execStreaming } from './shell-utils.js';
// Integration tests using real child processes
describe('execStreaming (Integration)', () => {
it('should yield lines from stdout', async () => {
// Use node to echo for cross-platform support
const generator = execStreaming(process.execPath, [
'-e',
'console.log("line 1\\nline 2")',
]);
const lines = [];
for await (const line of generator) {
lines.push(line);
}
expect(lines).toEqual(['line 1', 'line 2']);
});
it('should throw error on non-zero exit code', async () => {
// exit 2 via node
const generator = execStreaming(process.execPath, [
'-e',
'process.exit(2)',
]);
await expect(async () => {
for await (const _ of generator) {
// consume
}
}).rejects.toThrow();
});
it('should abort cleanly when signal is aborted', async () => {
const controller = new AbortController();
// sleep for 2s via node
const generator = execStreaming(
process.execPath,
['-e', 'setTimeout(() => {}, 2000)'],
{ signal: controller.signal },
);
// Start reading
const readPromise = (async () => {
const lines = [];
try {
for await (const line of generator) {
lines.push(line);
}
} catch (_e) {
// ignore
}
return lines;
})();
setTimeout(() => {
controller.abort();
}, 100);
const lines = await readPromise;
expect(lines).toEqual([]);
});
});
+121
View File
@@ -13,6 +13,7 @@ import {
spawnSync,
type SpawnOptionsWithoutStdio,
} from 'node:child_process';
import * as readline from 'node:readline';
import type { Node, Tree } from 'web-tree-sitter';
import { Language, Parser, Query } from 'web-tree-sitter';
import { loadWasmBinary } from './fileUtils.js';
@@ -765,3 +766,123 @@ export const spawnAsync = (
reject(err);
});
});
/**
* Executes a command and yields lines of output as they appear.
* Use for large outputs where buffering is not feasible.
*
* @param command The executable to run
* @param args Arguments for the executable
* @param options Spawn options (cwd, env, etc.)
*/
export async function* execStreaming(
command: string,
args: string[],
options?: SpawnOptionsWithoutStdio & {
signal?: AbortSignal;
allowedExitCodes?: number[];
},
): AsyncGenerator<string, void, void> {
const child = spawn(command, args, {
...options,
// ensure we don't open a window on windows if possible/relevant
windowsHide: true,
});
const rl = readline.createInterface({
input: child.stdout,
terminal: false,
});
const errorChunks: Buffer[] = [];
let stderrTotalBytes = 0;
const MAX_STDERR_BYTES = 20 * 1024; // 20KB limit
child.stderr.on('data', (chunk) => {
if (stderrTotalBytes < MAX_STDERR_BYTES) {
errorChunks.push(chunk);
stderrTotalBytes += chunk.length;
}
});
let error: Error | null = null;
child.on('error', (err) => {
error = err;
});
const onAbort = () => {
// If manually aborted by signal, we kill immediately.
if (!child.killed) child.kill();
};
if (options?.signal?.aborted) {
onAbort();
} else {
options?.signal?.addEventListener('abort', onAbort);
}
let finished = false;
try {
for await (const line of rl) {
if (options?.signal?.aborted) break;
yield line;
}
finished = true;
} finally {
rl.close();
options?.signal?.removeEventListener('abort', onAbort);
// Ensure process is killed when the generator is closed (consumer breaks loop)
let killedByGenerator = false;
if (!finished && child.exitCode === null && !child.killed) {
try {
child.kill();
} catch (_e) {
// ignore error if process is already dead
}
killedByGenerator = true;
}
// Ensure we wait for the process to exit to check codes
await new Promise<void>((resolve, reject) => {
// If an error occurred before we got here (e.g. spawn failure), reject immediately.
if (error) {
reject(error);
return;
}
function checkExit(code: number | null) {
// If we aborted or killed it manually, we treat it as success (stop waiting)
if (options?.signal?.aborted || killedByGenerator) {
resolve();
return;
}
const allowed = options?.allowedExitCodes ?? [0];
if (code !== null && allowed.includes(code)) {
resolve();
} else {
// If we have an accumulated error or explicit error event
if (error) reject(error);
else {
const stderr = Buffer.concat(errorChunks).toString('utf8');
const truncatedMsg =
stderrTotalBytes >= MAX_STDERR_BYTES ? '...[truncated]' : '';
reject(
new Error(
`Process exited with code ${code}: ${stderr}${truncatedMsg}`,
),
);
}
}
}
if (child.exitCode !== null) {
checkExit(child.exitCode);
} else {
child.on('close', (code) => checkExit(code));
child.on('error', (err) => reject(err));
}
});
}
}