fix(core): stream grep/ripgrep output to prevent OOM (#17146)

This commit is contained in:
Adam Weidman
2026-01-26 16:52:19 -05:00
committed by GitHub
parent c2d0783965
commit 018dc0d5cf
7 changed files with 888 additions and 994 deletions
+98 -113
View File
@@ -7,7 +7,6 @@
import type { MessageBus } from '../confirmation-bus/message-bus.js';
import fs from 'node:fs';
import path from 'node:path';
import { spawn } from 'node:child_process';
import { downloadRipGrep } from '@joshua.litt/get-ripgrep';
import type { ToolInvocation, ToolResult } from './tools.js';
import { BaseDeclarativeTool, BaseToolInvocation, Kind } from './tools.js';
@@ -24,8 +23,11 @@ import {
COMMON_DIRECTORY_EXCLUDES,
} from '../utils/ignorePatterns.js';
import { GeminiIgnoreParser } from '../utils/geminiIgnoreParser.js';
const DEFAULT_TOTAL_MAX_MATCHES = 20000;
import { execStreaming } from '../utils/shell-utils.js';
import {
DEFAULT_TOTAL_MAX_MATCHES,
DEFAULT_SEARCH_TIMEOUT_MS,
} from './constants.js';
function getRgCandidateFilenames(): readonly string[] {
return process.platform === 'win32' ? ['rg.exe', 'rg'] : ['rg'];
@@ -213,21 +215,38 @@ class GrepToolInvocation extends BaseToolInvocation<
debugLogger.log(`[GrepTool] Total result limit: ${totalMaxMatches}`);
}
let allMatches = await this.performRipgrepSearch({
pattern: this.params.pattern,
path: searchDirAbs!,
include: this.params.include,
case_sensitive: this.params.case_sensitive,
fixed_strings: this.params.fixed_strings,
context: this.params.context,
after: this.params.after,
before: this.params.before,
no_ignore: this.params.no_ignore,
signal,
});
// Create a timeout controller to prevent indefinitely hanging searches
const timeoutController = new AbortController();
const timeoutId = setTimeout(() => {
timeoutController.abort();
}, DEFAULT_SEARCH_TIMEOUT_MS);
if (allMatches.length >= totalMaxMatches) {
allMatches = allMatches.slice(0, totalMaxMatches);
// Link the passed signal to our timeout controller
const onAbort = () => timeoutController.abort();
if (signal.aborted) {
onAbort();
} else {
signal.addEventListener('abort', onAbort, { once: true });
}
let allMatches: GrepMatch[];
try {
allMatches = await this.performRipgrepSearch({
pattern: this.params.pattern,
path: searchDirAbs!,
include: this.params.include,
case_sensitive: this.params.case_sensitive,
fixed_strings: this.params.fixed_strings,
context: this.params.context,
after: this.params.after,
before: this.params.before,
no_ignore: this.params.no_ignore,
maxMatches: totalMaxMatches,
signal: timeoutController.signal,
});
} finally {
clearTimeout(timeoutId);
signal.removeEventListener('abort', onAbort);
}
const searchLocationDescription = `in path "${searchDirDisplay}"`;
@@ -254,13 +273,7 @@ class GrepToolInvocation extends BaseToolInvocation<
const matchCount = allMatches.length;
const matchTerm = matchCount === 1 ? 'match' : 'matches';
let llmContent = `Found ${matchCount} ${matchTerm} for pattern "${this.params.pattern}" ${searchLocationDescription}${this.params.include ? ` (filter: "${this.params.include}")` : ''}`;
if (wasTruncated) {
llmContent += ` (results limited to ${totalMaxMatches} matches for performance)`;
}
llmContent += `:\n---\n`;
let llmContent = `Found ${matchCount} ${matchTerm} for pattern "${this.params.pattern}" ${searchLocationDescription}${this.params.include ? ` (filter: "${this.params.include}")` : ''}${wasTruncated ? ` (results limited to ${totalMaxMatches} matches for performance)` : ''}:\n---\n`;
for (const filePath in matchesByFile) {
llmContent += `File: ${filePath}\n`;
@@ -271,14 +284,11 @@ class GrepToolInvocation extends BaseToolInvocation<
llmContent += '---\n';
}
let displayMessage = `Found ${matchCount} ${matchTerm}`;
if (wasTruncated) {
displayMessage += ` (limited)`;
}
return {
llmContent: llmContent.trim(),
returnDisplay: displayMessage,
returnDisplay: `Found ${matchCount} ${matchTerm}${
wasTruncated ? ' (limited)' : ''
}`,
};
} catch (error) {
debugLogger.warn(`Error during GrepLogic execution: ${error}`);
@@ -290,41 +300,6 @@ class GrepToolInvocation extends BaseToolInvocation<
}
}
private parseRipgrepJsonOutput(
output: string,
basePath: string,
): GrepMatch[] {
const results: GrepMatch[] = [];
if (!output) return results;
const lines = output.trim().split('\n');
for (const line of lines) {
if (!line.trim()) continue;
try {
const json = JSON.parse(line);
if (json.type === 'match') {
const match = json.data;
// Defensive check: ensure text properties exist (skips binary/invalid encoding)
if (match.path?.text && match.lines?.text) {
const absoluteFilePath = path.resolve(basePath, match.path.text);
const relativeFilePath = path.relative(basePath, absoluteFilePath);
results.push({
filePath: relativeFilePath || path.basename(absoluteFilePath),
lineNumber: match.line_number,
line: match.lines.text.trimEnd(),
});
}
}
} catch (error) {
debugLogger.warn(`Failed to parse ripgrep JSON line: ${line}`, error);
}
}
return results;
}
private async performRipgrepSearch(options: {
pattern: string;
path: string;
@@ -335,6 +310,7 @@ class GrepToolInvocation extends BaseToolInvocation<
after?: number;
before?: number;
no_ignore?: boolean;
maxMatches: number;
signal: AbortSignal;
}): Promise<GrepMatch[]> {
const {
@@ -347,6 +323,7 @@ class GrepToolInvocation extends BaseToolInvocation<
after,
before,
no_ignore,
maxMatches,
} = options;
const rgArgs = ['--json'];
@@ -402,64 +379,72 @@ class GrepToolInvocation extends BaseToolInvocation<
rgArgs.push('--threads', '4');
rgArgs.push(absolutePath);
const results: GrepMatch[] = [];
try {
const rgPath = await ensureRgPath();
const output = await new Promise<string>((resolve, reject) => {
const child = spawn(rgPath, rgArgs, {
windowsHide: true,
});
const stdoutChunks: Buffer[] = [];
const stderrChunks: Buffer[] = [];
const cleanup = () => {
if (options.signal.aborted) {
child.kill();
}
};
options.signal.addEventListener('abort', cleanup, { once: true });
child.stdout.on('data', (chunk) => stdoutChunks.push(chunk));
child.stderr.on('data', (chunk) => stderrChunks.push(chunk));
child.on('error', (err) => {
options.signal.removeEventListener('abort', cleanup);
reject(
new Error(
`Failed to start ripgrep: ${err.message}. Please ensure @lvce-editor/ripgrep is properly installed.`,
),
);
});
child.on('close', (code, signal) => {
options.signal.removeEventListener('abort', cleanup);
const stdoutData = Buffer.concat(stdoutChunks).toString('utf8');
const stderrData = Buffer.concat(stderrChunks).toString('utf8');
if (code === 0) {
resolve(stdoutData);
} else if (code === 1) {
resolve(''); // No matches found
} else {
if (signal) {
reject(new Error(`ripgrep was terminated by signal: ${signal}`));
} else {
reject(
new Error(`ripgrep exited with code ${code}: ${stderrData}`),
);
}
}
});
const generator = execStreaming(rgPath, rgArgs, {
signal: options.signal,
allowedExitCodes: [0, 1],
});
return this.parseRipgrepJsonOutput(output, absolutePath);
for await (const line of generator) {
const match = this.parseRipgrepJsonLine(line, absolutePath);
if (match) {
results.push(match);
if (results.length >= maxMatches) {
break;
}
}
}
return results;
} catch (error: unknown) {
debugLogger.debug(`GrepLogic: ripgrep failed: ${getErrorMessage(error)}`);
throw error;
}
}
private parseRipgrepJsonLine(
line: string,
basePath: string,
): GrepMatch | null {
try {
const json = JSON.parse(line);
if (json.type === 'match') {
const match = json.data;
// Defensive check: ensure text properties exist (skips binary/invalid encoding)
if (match.path?.text && match.lines?.text) {
const absoluteFilePath = path.resolve(basePath, match.path.text);
const relativeCheck = path.relative(basePath, absoluteFilePath);
if (
relativeCheck === '..' ||
relativeCheck.startsWith(`..${path.sep}`) ||
path.isAbsolute(relativeCheck)
) {
return null;
}
const relativeFilePath = path.relative(basePath, absoluteFilePath);
return {
filePath: relativeFilePath || path.basename(absoluteFilePath),
lineNumber: match.line_number,
line: match.lines.text.trimEnd(),
};
}
}
} catch (error) {
// Only log if it's not a simple empty line or widely invalid
if (line.trim().length > 0) {
debugLogger.warn(
`Failed to parse ripgrep JSON line: ${line.substring(0, 100)}...`,
error,
);
}
}
return null;
}
/**
* Gets a description of the grep operation
* @param params Parameters for the grep operation