mirror of
https://github.com/google-gemini/gemini-cli.git
synced 2026-03-11 06:31:01 -07:00
fix(core): stream grep/ripgrep output to prevent OOM (#17146)
This commit is contained in:
7
packages/core/src/tools/constants.ts
Normal file
7
packages/core/src/tools/constants.ts
Normal file
@@ -0,0 +1,7 @@
|
||||
/**
|
||||
* @license
|
||||
* Copyright 2026 Google LLC
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
export const DEFAULT_TOTAL_MAX_MATCHES = 20000;
|
||||
export const DEFAULT_SEARCH_TIMEOUT_MS = 30000;
|
||||
@@ -7,6 +7,7 @@
|
||||
import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest';
|
||||
import type { GrepToolParams } from './grep.js';
|
||||
import { GrepTool } from './grep.js';
|
||||
import type { ToolResult } from './tools.js';
|
||||
import path from 'node:path';
|
||||
import fs from 'node:fs/promises';
|
||||
import os from 'node:os';
|
||||
@@ -15,8 +16,12 @@ import { createMockWorkspaceContext } from '../test-utils/mockWorkspaceContext.j
|
||||
import { ToolErrorType } from './tool-error.js';
|
||||
import * as glob from 'glob';
|
||||
import { createMockMessageBus } from '../test-utils/mock-message-bus.js';
|
||||
import { execStreaming } from '../utils/shell-utils.js';
|
||||
|
||||
vi.mock('glob', { spy: true });
|
||||
vi.mock('../utils/shell-utils.js', () => ({
|
||||
execStreaming: vi.fn(),
|
||||
}));
|
||||
|
||||
// Mock the child_process module to control grep/git grep behavior
|
||||
vi.mock('child_process', () => ({
|
||||
@@ -129,6 +134,14 @@ describe('GrepTool', () => {
|
||||
});
|
||||
});
|
||||
|
||||
function createLineGenerator(lines: string[]): AsyncGenerator<string> {
|
||||
return (async function* () {
|
||||
for (const line of lines) {
|
||||
yield line;
|
||||
}
|
||||
})();
|
||||
}
|
||||
|
||||
describe('execute', () => {
|
||||
it('should find matches for a simple pattern in all files', async () => {
|
||||
const params: GrepToolParams = { pattern: 'world' };
|
||||
@@ -147,6 +160,35 @@ describe('GrepTool', () => {
|
||||
expect(result.returnDisplay).toBe('Found 3 matches');
|
||||
}, 30000);
|
||||
|
||||
it('should include files that start with ".." in JS fallback', async () => {
|
||||
await fs.writeFile(path.join(tempRootDir, '..env'), 'world in ..env');
|
||||
const params: GrepToolParams = { pattern: 'world' };
|
||||
const invocation = grepTool.build(params);
|
||||
const result = await invocation.execute(abortSignal);
|
||||
expect(result.llmContent).toContain('File: ..env');
|
||||
expect(result.llmContent).toContain('L1: world in ..env');
|
||||
});
|
||||
|
||||
it('should ignore system grep output that escapes base path', async () => {
|
||||
vi.mocked(execStreaming).mockImplementationOnce(() =>
|
||||
createLineGenerator(['..env:1:hello', '../secret.txt:2:leak']),
|
||||
);
|
||||
|
||||
const params: GrepToolParams = { pattern: 'hello' };
|
||||
const invocation = grepTool.build(params) as unknown as {
|
||||
isCommandAvailable: (command: string) => Promise<boolean>;
|
||||
execute: (signal: AbortSignal) => Promise<ToolResult>;
|
||||
};
|
||||
invocation.isCommandAvailable = vi.fn(
|
||||
async (command: string) => command === 'grep',
|
||||
);
|
||||
|
||||
const result = await invocation.execute(abortSignal);
|
||||
expect(result.llmContent).toContain('File: ..env');
|
||||
expect(result.llmContent).toContain('L1: hello');
|
||||
expect(result.llmContent).not.toContain('secret.txt');
|
||||
});
|
||||
|
||||
it('should find matches in a specific path', async () => {
|
||||
const params: GrepToolParams = { pattern: 'world', dir_path: 'sub' };
|
||||
const invocation = grepTool.build(params);
|
||||
|
||||
@@ -8,10 +8,14 @@ import type { MessageBus } from '../confirmation-bus/message-bus.js';
|
||||
import fs from 'node:fs';
|
||||
import fsPromises from 'node:fs/promises';
|
||||
import path from 'node:path';
|
||||
import { EOL } from 'node:os';
|
||||
import { spawn } from 'node:child_process';
|
||||
import { globStream } from 'glob';
|
||||
import type { ToolInvocation, ToolResult } from './tools.js';
|
||||
import { execStreaming } from '../utils/shell-utils.js';
|
||||
import {
|
||||
DEFAULT_TOTAL_MAX_MATCHES,
|
||||
DEFAULT_SEARCH_TIMEOUT_MS,
|
||||
} from './constants.js';
|
||||
import { BaseDeclarativeTool, BaseToolInvocation, Kind } from './tools.js';
|
||||
import { makeRelative, shortenPath } from '../utils/paths.js';
|
||||
import { getErrorMessage, isNodeError } from '../utils/errors.js';
|
||||
@@ -111,6 +115,47 @@ class GrepToolInvocation extends BaseToolInvocation<
|
||||
return targetPath;
|
||||
}
|
||||
|
||||
/**
|
||||
* Parses a single line of grep-like output (git grep, system grep).
|
||||
* Expects format: filePath:lineNumber:lineContent
|
||||
* @param {string} line The line to parse.
|
||||
* @param {string} basePath The absolute directory for path resolution.
|
||||
* @returns {GrepMatch | null} Parsed match or null if malformed.
|
||||
*/
|
||||
private parseGrepLine(line: string, basePath: string): GrepMatch | null {
|
||||
if (!line.trim()) return null;
|
||||
|
||||
// Use regex to locate the first occurrence of :<digits>:
|
||||
// This allows filenames to contain colons, as long as they don't look like :<digits>:
|
||||
// Note: This regex assumes filenames do not contain colons, or at least not followed by digits.
|
||||
const match = line.match(/^(.+?):(\d+):(.*)$/);
|
||||
if (!match) return null;
|
||||
|
||||
const [, filePathRaw, lineNumberStr, lineContent] = match;
|
||||
const lineNumber = parseInt(lineNumberStr, 10);
|
||||
|
||||
if (!isNaN(lineNumber)) {
|
||||
const absoluteFilePath = path.resolve(basePath, filePathRaw);
|
||||
const relativeCheck = path.relative(basePath, absoluteFilePath);
|
||||
if (
|
||||
relativeCheck === '..' ||
|
||||
relativeCheck.startsWith(`..${path.sep}`) ||
|
||||
path.isAbsolute(relativeCheck)
|
||||
) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const relativeFilePath = path.relative(basePath, absoluteFilePath);
|
||||
|
||||
return {
|
||||
filePath: relativeFilePath || path.basename(absoluteFilePath),
|
||||
lineNumber,
|
||||
line: lineContent,
|
||||
};
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
async execute(signal: AbortSignal): Promise<ToolResult> {
|
||||
try {
|
||||
const workspaceContext = this.config.getWorkspaceContext();
|
||||
@@ -129,23 +174,48 @@ class GrepToolInvocation extends BaseToolInvocation<
|
||||
|
||||
// Collect matches from all search directories
|
||||
let allMatches: GrepMatch[] = [];
|
||||
for (const searchDir of searchDirectories) {
|
||||
const matches = await this.performGrepSearch({
|
||||
pattern: this.params.pattern,
|
||||
path: searchDir,
|
||||
include: this.params.include,
|
||||
signal,
|
||||
});
|
||||
const totalMaxMatches = DEFAULT_TOTAL_MAX_MATCHES;
|
||||
|
||||
// Add directory prefix if searching multiple directories
|
||||
if (searchDirectories.length > 1) {
|
||||
const dirName = path.basename(searchDir);
|
||||
matches.forEach((match) => {
|
||||
match.filePath = path.join(dirName, match.filePath);
|
||||
// Create a timeout controller to prevent indefinitely hanging searches
|
||||
const timeoutController = new AbortController();
|
||||
const timeoutId = setTimeout(() => {
|
||||
timeoutController.abort();
|
||||
}, DEFAULT_SEARCH_TIMEOUT_MS);
|
||||
|
||||
// Link the passed signal to our timeout controller
|
||||
const onAbort = () => timeoutController.abort();
|
||||
if (signal.aborted) {
|
||||
onAbort();
|
||||
} else {
|
||||
signal.addEventListener('abort', onAbort, { once: true });
|
||||
}
|
||||
|
||||
try {
|
||||
for (const searchDir of searchDirectories) {
|
||||
const remainingLimit = totalMaxMatches - allMatches.length;
|
||||
if (remainingLimit <= 0) break;
|
||||
|
||||
const matches = await this.performGrepSearch({
|
||||
pattern: this.params.pattern,
|
||||
path: searchDir,
|
||||
include: this.params.include,
|
||||
maxMatches: remainingLimit,
|
||||
signal: timeoutController.signal,
|
||||
});
|
||||
}
|
||||
|
||||
allMatches = allMatches.concat(matches);
|
||||
// Add directory prefix if searching multiple directories
|
||||
if (searchDirectories.length > 1) {
|
||||
const dirName = path.basename(searchDir);
|
||||
matches.forEach((match) => {
|
||||
match.filePath = path.join(dirName, match.filePath);
|
||||
});
|
||||
}
|
||||
|
||||
allMatches = allMatches.concat(matches);
|
||||
}
|
||||
} finally {
|
||||
clearTimeout(timeoutId);
|
||||
signal.removeEventListener('abort', onAbort);
|
||||
}
|
||||
|
||||
let searchLocationDescription: string;
|
||||
@@ -164,6 +234,8 @@ class GrepToolInvocation extends BaseToolInvocation<
|
||||
return { llmContent: noMatchMsg, returnDisplay: `No matches found` };
|
||||
}
|
||||
|
||||
const wasTruncated = allMatches.length >= totalMaxMatches;
|
||||
|
||||
// Group matches by file
|
||||
const matchesByFile = allMatches.reduce(
|
||||
(acc, match) => {
|
||||
@@ -181,9 +253,7 @@ class GrepToolInvocation extends BaseToolInvocation<
|
||||
const matchCount = allMatches.length;
|
||||
const matchTerm = matchCount === 1 ? 'match' : 'matches';
|
||||
|
||||
let llmContent = `Found ${matchCount} ${matchTerm} for pattern "${this.params.pattern}" ${searchLocationDescription}${this.params.include ? ` (filter: "${this.params.include}")` : ''}:
|
||||
---
|
||||
`;
|
||||
let llmContent = `Found ${matchCount} ${matchTerm} for pattern "${this.params.pattern}" ${searchLocationDescription}${this.params.include ? ` (filter: "${this.params.include}")` : ''}${wasTruncated ? ` (results limited to ${totalMaxMatches} matches for performance)` : ''}:\n---\n`;
|
||||
|
||||
for (const filePath in matchesByFile) {
|
||||
llmContent += `File: ${filePath}\n`;
|
||||
@@ -196,7 +266,7 @@ class GrepToolInvocation extends BaseToolInvocation<
|
||||
|
||||
return {
|
||||
llmContent: llmContent.trim(),
|
||||
returnDisplay: `Found ${matchCount} ${matchTerm}`,
|
||||
returnDisplay: `Found ${matchCount} ${matchTerm}${wasTruncated ? ' (limited)' : ''}`,
|
||||
};
|
||||
} catch (error) {
|
||||
debugLogger.warn(`Error during GrepLogic execution: ${error}`);
|
||||
@@ -241,92 +311,6 @@ class GrepToolInvocation extends BaseToolInvocation<
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Parses the standard output of grep-like commands (git grep, system grep).
|
||||
* Expects format: filePath:lineNumber:lineContent
|
||||
* Handles colons within file paths and line content correctly.
|
||||
* @param {string} output The raw stdout string.
|
||||
* @param {string} basePath The absolute directory the search was run from, for relative paths.
|
||||
* @returns {GrepMatch[]} Array of match objects.
|
||||
*/
|
||||
private parseGrepOutput(output: string, basePath: string): GrepMatch[] {
|
||||
const results: GrepMatch[] = [];
|
||||
if (!output) return results;
|
||||
|
||||
const lines = output.split(EOL); // Use OS-specific end-of-line
|
||||
|
||||
for (const line of lines) {
|
||||
if (!line.trim()) continue;
|
||||
|
||||
// Find the index of the first colon.
|
||||
const firstColonIndex = line.indexOf(':');
|
||||
if (firstColonIndex === -1) continue; // Malformed
|
||||
|
||||
// Find the index of the second colon, searching *after* the first one.
|
||||
const secondColonIndex = line.indexOf(':', firstColonIndex + 1);
|
||||
if (secondColonIndex === -1) continue; // Malformed
|
||||
|
||||
// Extract parts based on the found colon indices
|
||||
const filePathRaw = line.substring(0, firstColonIndex);
|
||||
const lineNumberStr = line.substring(
|
||||
firstColonIndex + 1,
|
||||
secondColonIndex,
|
||||
);
|
||||
const lineContent = line.substring(secondColonIndex + 1);
|
||||
|
||||
const lineNumber = parseInt(lineNumberStr, 10);
|
||||
|
||||
if (!isNaN(lineNumber)) {
|
||||
const absoluteFilePath = path.resolve(basePath, filePathRaw);
|
||||
const relativeFilePath = path.relative(basePath, absoluteFilePath);
|
||||
|
||||
results.push({
|
||||
filePath: relativeFilePath || path.basename(absoluteFilePath),
|
||||
lineNumber,
|
||||
line: lineContent,
|
||||
});
|
||||
}
|
||||
}
|
||||
return results;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a description of the grep operation
|
||||
* @returns A string describing the grep
|
||||
*/
|
||||
getDescription(): string {
|
||||
let description = `'${this.params.pattern}'`;
|
||||
if (this.params.include) {
|
||||
description += ` in ${this.params.include}`;
|
||||
}
|
||||
if (this.params.dir_path) {
|
||||
const resolvedPath = path.resolve(
|
||||
this.config.getTargetDir(),
|
||||
this.params.dir_path,
|
||||
);
|
||||
if (
|
||||
resolvedPath === this.config.getTargetDir() ||
|
||||
this.params.dir_path === '.'
|
||||
) {
|
||||
description += ` within ./`;
|
||||
} else {
|
||||
const relativePath = makeRelative(
|
||||
resolvedPath,
|
||||
this.config.getTargetDir(),
|
||||
);
|
||||
description += ` within ${shortenPath(relativePath)}`;
|
||||
}
|
||||
} else {
|
||||
// When no path is specified, indicate searching all workspace directories
|
||||
const workspaceContext = this.config.getWorkspaceContext();
|
||||
const directories = workspaceContext.getDirectories();
|
||||
if (directories.length > 1) {
|
||||
description += ` across all workspace directories`;
|
||||
}
|
||||
}
|
||||
return description;
|
||||
}
|
||||
|
||||
/**
|
||||
* Performs the actual search using the prioritized strategies.
|
||||
* @param options Search options including pattern, absolute path, and include glob.
|
||||
@@ -336,9 +320,10 @@ class GrepToolInvocation extends BaseToolInvocation<
|
||||
pattern: string;
|
||||
path: string; // Expects absolute path
|
||||
include?: string;
|
||||
maxMatches: number;
|
||||
signal: AbortSignal;
|
||||
}): Promise<GrepMatch[]> {
|
||||
const { pattern, path: absolutePath, include } = options;
|
||||
const { pattern, path: absolutePath, include, maxMatches } = options;
|
||||
let strategyUsed = 'none';
|
||||
|
||||
try {
|
||||
@@ -361,32 +346,23 @@ class GrepToolInvocation extends BaseToolInvocation<
|
||||
}
|
||||
|
||||
try {
|
||||
const output = await new Promise<string>((resolve, reject) => {
|
||||
const child = spawn('git', gitArgs, {
|
||||
cwd: absolutePath,
|
||||
windowsHide: true,
|
||||
});
|
||||
const stdoutChunks: Buffer[] = [];
|
||||
const stderrChunks: Buffer[] = [];
|
||||
|
||||
child.stdout.on('data', (chunk) => stdoutChunks.push(chunk));
|
||||
child.stderr.on('data', (chunk) => stderrChunks.push(chunk));
|
||||
child.on('error', (err) =>
|
||||
reject(new Error(`Failed to start git grep: ${err.message}`)),
|
||||
);
|
||||
child.on('close', (code) => {
|
||||
const stdoutData = Buffer.concat(stdoutChunks).toString('utf8');
|
||||
const stderrData = Buffer.concat(stderrChunks).toString('utf8');
|
||||
if (code === 0) resolve(stdoutData);
|
||||
else if (code === 1)
|
||||
resolve(''); // No matches
|
||||
else
|
||||
reject(
|
||||
new Error(`git grep exited with code ${code}: ${stderrData}`),
|
||||
);
|
||||
});
|
||||
const generator = execStreaming('git', gitArgs, {
|
||||
cwd: absolutePath,
|
||||
signal: options.signal,
|
||||
allowedExitCodes: [0, 1],
|
||||
});
|
||||
return this.parseGrepOutput(output, absolutePath);
|
||||
|
||||
const results: GrepMatch[] = [];
|
||||
for await (const line of generator) {
|
||||
const match = this.parseGrepLine(line, absolutePath);
|
||||
if (match) {
|
||||
results.push(match);
|
||||
if (results.length >= maxMatches) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
return results;
|
||||
} catch (gitError: unknown) {
|
||||
debugLogger.debug(
|
||||
`GrepLogic: git grep failed: ${getErrorMessage(
|
||||
@@ -433,67 +409,31 @@ class GrepToolInvocation extends BaseToolInvocation<
|
||||
grepArgs.push(pattern);
|
||||
grepArgs.push('.');
|
||||
|
||||
const results: GrepMatch[] = [];
|
||||
try {
|
||||
const output = await new Promise<string>((resolve, reject) => {
|
||||
const child = spawn('grep', grepArgs, {
|
||||
cwd: absolutePath,
|
||||
windowsHide: true,
|
||||
});
|
||||
const stdoutChunks: Buffer[] = [];
|
||||
const stderrChunks: Buffer[] = [];
|
||||
|
||||
const onData = (chunk: Buffer) => stdoutChunks.push(chunk);
|
||||
const onStderr = (chunk: Buffer) => {
|
||||
const stderrStr = chunk.toString();
|
||||
// Suppress common harmless stderr messages
|
||||
if (
|
||||
!stderrStr.includes('Permission denied') &&
|
||||
!/grep:.*: Is a directory/i.test(stderrStr)
|
||||
) {
|
||||
stderrChunks.push(chunk);
|
||||
}
|
||||
};
|
||||
const onError = (err: Error) => {
|
||||
cleanup();
|
||||
reject(new Error(`Failed to start system grep: ${err.message}`));
|
||||
};
|
||||
const onClose = (code: number | null) => {
|
||||
const stdoutData = Buffer.concat(stdoutChunks).toString('utf8');
|
||||
const stderrData = Buffer.concat(stderrChunks)
|
||||
.toString('utf8')
|
||||
.trim();
|
||||
cleanup();
|
||||
if (code === 0) resolve(stdoutData);
|
||||
else if (code === 1)
|
||||
resolve(''); // No matches
|
||||
else {
|
||||
if (stderrData)
|
||||
reject(
|
||||
new Error(
|
||||
`System grep exited with code ${code}: ${stderrData}`,
|
||||
),
|
||||
);
|
||||
else resolve(''); // Exit code > 1 but no stderr, likely just suppressed errors
|
||||
}
|
||||
};
|
||||
|
||||
const cleanup = () => {
|
||||
child.stdout.removeListener('data', onData);
|
||||
child.stderr.removeListener('data', onStderr);
|
||||
child.removeListener('error', onError);
|
||||
child.removeListener('close', onClose);
|
||||
if (child.connected) {
|
||||
child.disconnect();
|
||||
}
|
||||
};
|
||||
|
||||
child.stdout.on('data', onData);
|
||||
child.stderr.on('data', onStderr);
|
||||
child.on('error', onError);
|
||||
child.on('close', onClose);
|
||||
const generator = execStreaming('grep', grepArgs, {
|
||||
cwd: absolutePath,
|
||||
signal: options.signal,
|
||||
allowedExitCodes: [0, 1],
|
||||
});
|
||||
return this.parseGrepOutput(output, absolutePath);
|
||||
|
||||
for await (const line of generator) {
|
||||
const match = this.parseGrepLine(line, absolutePath);
|
||||
if (match) {
|
||||
results.push(match);
|
||||
if (results.length >= maxMatches) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
return results;
|
||||
} catch (grepError: unknown) {
|
||||
if (
|
||||
grepError instanceof Error &&
|
||||
/Permission denied|Is a directory/i.test(grepError.message)
|
||||
) {
|
||||
return results;
|
||||
}
|
||||
debugLogger.debug(
|
||||
`GrepLogic: System grep failed: ${getErrorMessage(
|
||||
grepError,
|
||||
@@ -523,11 +463,22 @@ class GrepToolInvocation extends BaseToolInvocation<
|
||||
const allMatches: GrepMatch[] = [];
|
||||
|
||||
for await (const filePath of filesStream) {
|
||||
if (allMatches.length >= maxMatches) break;
|
||||
const fileAbsolutePath = filePath;
|
||||
// security check
|
||||
const relativePath = path.relative(absolutePath, fileAbsolutePath);
|
||||
if (
|
||||
relativePath === '..' ||
|
||||
relativePath.startsWith(`..${path.sep}`) ||
|
||||
path.isAbsolute(relativePath)
|
||||
)
|
||||
continue;
|
||||
|
||||
try {
|
||||
const content = await fsPromises.readFile(fileAbsolutePath, 'utf8');
|
||||
const lines = content.split(/\r?\n/);
|
||||
lines.forEach((line, index) => {
|
||||
for (let index = 0; index < lines.length; index++) {
|
||||
const line = lines[index];
|
||||
if (regex.test(line)) {
|
||||
allMatches.push({
|
||||
filePath:
|
||||
@@ -536,8 +487,9 @@ class GrepToolInvocation extends BaseToolInvocation<
|
||||
lineNumber: index + 1,
|
||||
line,
|
||||
});
|
||||
if (allMatches.length >= maxMatches) break;
|
||||
}
|
||||
});
|
||||
}
|
||||
} catch (readError: unknown) {
|
||||
// Ignore errors like permission denied or file gone during read
|
||||
if (!isNodeError(readError) || readError.code !== 'ENOENT') {
|
||||
@@ -560,9 +512,40 @@ class GrepToolInvocation extends BaseToolInvocation<
|
||||
throw error; // Re-throw
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// --- GrepLogic Class ---
|
||||
getDescription(): string {
|
||||
let description = `'${this.params.pattern}'`;
|
||||
if (this.params.include) {
|
||||
description += ` in ${this.params.include}`;
|
||||
}
|
||||
if (this.params.dir_path) {
|
||||
const resolvedPath = path.resolve(
|
||||
this.config.getTargetDir(),
|
||||
this.params.dir_path,
|
||||
);
|
||||
if (
|
||||
resolvedPath === this.config.getTargetDir() ||
|
||||
this.params.dir_path === '.'
|
||||
) {
|
||||
description += ` within ./`;
|
||||
} else {
|
||||
const relativePath = makeRelative(
|
||||
resolvedPath,
|
||||
this.config.getTargetDir(),
|
||||
);
|
||||
description += ` within ${shortenPath(relativePath)}`;
|
||||
}
|
||||
} else {
|
||||
// When no path is specified, indicate searching all workspace directories
|
||||
const workspaceContext = this.config.getWorkspaceContext();
|
||||
const directories = workspaceContext.getDirectories();
|
||||
if (directories.length > 1) {
|
||||
description += ` across all workspace directories`;
|
||||
}
|
||||
}
|
||||
return description;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Implementation of the Grep tool logic (moved from CLI)
|
||||
@@ -581,8 +564,7 @@ export class GrepTool extends BaseDeclarativeTool<GrepToolParams, ToolResult> {
|
||||
{
|
||||
properties: {
|
||||
pattern: {
|
||||
description:
|
||||
"The regular expression (regex) pattern to search for within file contents (e.g., 'function\\s+myFunction', 'import\\s+\\{.*\\}\\s+from\\s+.*').",
|
||||
description: `The regular expression (regex) pattern to search for within file contents (e.g., 'function\\s+myFunction', 'import\\s+\\{.*\\}\\s+from\\s+.*').`,
|
||||
type: 'string',
|
||||
},
|
||||
dir_path: {
|
||||
@@ -591,8 +573,7 @@ export class GrepTool extends BaseDeclarativeTool<GrepToolParams, ToolResult> {
|
||||
type: 'string',
|
||||
},
|
||||
include: {
|
||||
description:
|
||||
"Optional: A glob pattern to filter which files are searched (e.g., '*.js', '*.{ts,tsx}', 'src/**'). If omitted, searches all files (respecting potential global ignores).",
|
||||
description: `Optional: A glob pattern to filter which files are searched (e.g., '*.js', '*.{ts,tsx}', 'src/**'). If omitted, searches all files (respecting potential global ignores).`,
|
||||
type: 'string',
|
||||
},
|
||||
},
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -7,7 +7,6 @@
|
||||
import type { MessageBus } from '../confirmation-bus/message-bus.js';
|
||||
import fs from 'node:fs';
|
||||
import path from 'node:path';
|
||||
import { spawn } from 'node:child_process';
|
||||
import { downloadRipGrep } from '@joshua.litt/get-ripgrep';
|
||||
import type { ToolInvocation, ToolResult } from './tools.js';
|
||||
import { BaseDeclarativeTool, BaseToolInvocation, Kind } from './tools.js';
|
||||
@@ -24,8 +23,11 @@ import {
|
||||
COMMON_DIRECTORY_EXCLUDES,
|
||||
} from '../utils/ignorePatterns.js';
|
||||
import { GeminiIgnoreParser } from '../utils/geminiIgnoreParser.js';
|
||||
|
||||
const DEFAULT_TOTAL_MAX_MATCHES = 20000;
|
||||
import { execStreaming } from '../utils/shell-utils.js';
|
||||
import {
|
||||
DEFAULT_TOTAL_MAX_MATCHES,
|
||||
DEFAULT_SEARCH_TIMEOUT_MS,
|
||||
} from './constants.js';
|
||||
|
||||
function getRgCandidateFilenames(): readonly string[] {
|
||||
return process.platform === 'win32' ? ['rg.exe', 'rg'] : ['rg'];
|
||||
@@ -213,21 +215,38 @@ class GrepToolInvocation extends BaseToolInvocation<
|
||||
debugLogger.log(`[GrepTool] Total result limit: ${totalMaxMatches}`);
|
||||
}
|
||||
|
||||
let allMatches = await this.performRipgrepSearch({
|
||||
pattern: this.params.pattern,
|
||||
path: searchDirAbs!,
|
||||
include: this.params.include,
|
||||
case_sensitive: this.params.case_sensitive,
|
||||
fixed_strings: this.params.fixed_strings,
|
||||
context: this.params.context,
|
||||
after: this.params.after,
|
||||
before: this.params.before,
|
||||
no_ignore: this.params.no_ignore,
|
||||
signal,
|
||||
});
|
||||
// Create a timeout controller to prevent indefinitely hanging searches
|
||||
const timeoutController = new AbortController();
|
||||
const timeoutId = setTimeout(() => {
|
||||
timeoutController.abort();
|
||||
}, DEFAULT_SEARCH_TIMEOUT_MS);
|
||||
|
||||
if (allMatches.length >= totalMaxMatches) {
|
||||
allMatches = allMatches.slice(0, totalMaxMatches);
|
||||
// Link the passed signal to our timeout controller
|
||||
const onAbort = () => timeoutController.abort();
|
||||
if (signal.aborted) {
|
||||
onAbort();
|
||||
} else {
|
||||
signal.addEventListener('abort', onAbort, { once: true });
|
||||
}
|
||||
|
||||
let allMatches: GrepMatch[];
|
||||
try {
|
||||
allMatches = await this.performRipgrepSearch({
|
||||
pattern: this.params.pattern,
|
||||
path: searchDirAbs!,
|
||||
include: this.params.include,
|
||||
case_sensitive: this.params.case_sensitive,
|
||||
fixed_strings: this.params.fixed_strings,
|
||||
context: this.params.context,
|
||||
after: this.params.after,
|
||||
before: this.params.before,
|
||||
no_ignore: this.params.no_ignore,
|
||||
maxMatches: totalMaxMatches,
|
||||
signal: timeoutController.signal,
|
||||
});
|
||||
} finally {
|
||||
clearTimeout(timeoutId);
|
||||
signal.removeEventListener('abort', onAbort);
|
||||
}
|
||||
|
||||
const searchLocationDescription = `in path "${searchDirDisplay}"`;
|
||||
@@ -254,13 +273,7 @@ class GrepToolInvocation extends BaseToolInvocation<
|
||||
const matchCount = allMatches.length;
|
||||
const matchTerm = matchCount === 1 ? 'match' : 'matches';
|
||||
|
||||
let llmContent = `Found ${matchCount} ${matchTerm} for pattern "${this.params.pattern}" ${searchLocationDescription}${this.params.include ? ` (filter: "${this.params.include}")` : ''}`;
|
||||
|
||||
if (wasTruncated) {
|
||||
llmContent += ` (results limited to ${totalMaxMatches} matches for performance)`;
|
||||
}
|
||||
|
||||
llmContent += `:\n---\n`;
|
||||
let llmContent = `Found ${matchCount} ${matchTerm} for pattern "${this.params.pattern}" ${searchLocationDescription}${this.params.include ? ` (filter: "${this.params.include}")` : ''}${wasTruncated ? ` (results limited to ${totalMaxMatches} matches for performance)` : ''}:\n---\n`;
|
||||
|
||||
for (const filePath in matchesByFile) {
|
||||
llmContent += `File: ${filePath}\n`;
|
||||
@@ -271,14 +284,11 @@ class GrepToolInvocation extends BaseToolInvocation<
|
||||
llmContent += '---\n';
|
||||
}
|
||||
|
||||
let displayMessage = `Found ${matchCount} ${matchTerm}`;
|
||||
if (wasTruncated) {
|
||||
displayMessage += ` (limited)`;
|
||||
}
|
||||
|
||||
return {
|
||||
llmContent: llmContent.trim(),
|
||||
returnDisplay: displayMessage,
|
||||
returnDisplay: `Found ${matchCount} ${matchTerm}${
|
||||
wasTruncated ? ' (limited)' : ''
|
||||
}`,
|
||||
};
|
||||
} catch (error) {
|
||||
debugLogger.warn(`Error during GrepLogic execution: ${error}`);
|
||||
@@ -290,41 +300,6 @@ class GrepToolInvocation extends BaseToolInvocation<
|
||||
}
|
||||
}
|
||||
|
||||
private parseRipgrepJsonOutput(
|
||||
output: string,
|
||||
basePath: string,
|
||||
): GrepMatch[] {
|
||||
const results: GrepMatch[] = [];
|
||||
if (!output) return results;
|
||||
|
||||
const lines = output.trim().split('\n');
|
||||
|
||||
for (const line of lines) {
|
||||
if (!line.trim()) continue;
|
||||
|
||||
try {
|
||||
const json = JSON.parse(line);
|
||||
if (json.type === 'match') {
|
||||
const match = json.data;
|
||||
// Defensive check: ensure text properties exist (skips binary/invalid encoding)
|
||||
if (match.path?.text && match.lines?.text) {
|
||||
const absoluteFilePath = path.resolve(basePath, match.path.text);
|
||||
const relativeFilePath = path.relative(basePath, absoluteFilePath);
|
||||
|
||||
results.push({
|
||||
filePath: relativeFilePath || path.basename(absoluteFilePath),
|
||||
lineNumber: match.line_number,
|
||||
line: match.lines.text.trimEnd(),
|
||||
});
|
||||
}
|
||||
}
|
||||
} catch (error) {
|
||||
debugLogger.warn(`Failed to parse ripgrep JSON line: ${line}`, error);
|
||||
}
|
||||
}
|
||||
return results;
|
||||
}
|
||||
|
||||
private async performRipgrepSearch(options: {
|
||||
pattern: string;
|
||||
path: string;
|
||||
@@ -335,6 +310,7 @@ class GrepToolInvocation extends BaseToolInvocation<
|
||||
after?: number;
|
||||
before?: number;
|
||||
no_ignore?: boolean;
|
||||
maxMatches: number;
|
||||
signal: AbortSignal;
|
||||
}): Promise<GrepMatch[]> {
|
||||
const {
|
||||
@@ -347,6 +323,7 @@ class GrepToolInvocation extends BaseToolInvocation<
|
||||
after,
|
||||
before,
|
||||
no_ignore,
|
||||
maxMatches,
|
||||
} = options;
|
||||
|
||||
const rgArgs = ['--json'];
|
||||
@@ -402,64 +379,72 @@ class GrepToolInvocation extends BaseToolInvocation<
|
||||
rgArgs.push('--threads', '4');
|
||||
rgArgs.push(absolutePath);
|
||||
|
||||
const results: GrepMatch[] = [];
|
||||
try {
|
||||
const rgPath = await ensureRgPath();
|
||||
const output = await new Promise<string>((resolve, reject) => {
|
||||
const child = spawn(rgPath, rgArgs, {
|
||||
windowsHide: true,
|
||||
});
|
||||
|
||||
const stdoutChunks: Buffer[] = [];
|
||||
const stderrChunks: Buffer[] = [];
|
||||
|
||||
const cleanup = () => {
|
||||
if (options.signal.aborted) {
|
||||
child.kill();
|
||||
}
|
||||
};
|
||||
|
||||
options.signal.addEventListener('abort', cleanup, { once: true });
|
||||
|
||||
child.stdout.on('data', (chunk) => stdoutChunks.push(chunk));
|
||||
child.stderr.on('data', (chunk) => stderrChunks.push(chunk));
|
||||
|
||||
child.on('error', (err) => {
|
||||
options.signal.removeEventListener('abort', cleanup);
|
||||
reject(
|
||||
new Error(
|
||||
`Failed to start ripgrep: ${err.message}. Please ensure @lvce-editor/ripgrep is properly installed.`,
|
||||
),
|
||||
);
|
||||
});
|
||||
|
||||
child.on('close', (code, signal) => {
|
||||
options.signal.removeEventListener('abort', cleanup);
|
||||
const stdoutData = Buffer.concat(stdoutChunks).toString('utf8');
|
||||
const stderrData = Buffer.concat(stderrChunks).toString('utf8');
|
||||
|
||||
if (code === 0) {
|
||||
resolve(stdoutData);
|
||||
} else if (code === 1) {
|
||||
resolve(''); // No matches found
|
||||
} else {
|
||||
if (signal) {
|
||||
reject(new Error(`ripgrep was terminated by signal: ${signal}`));
|
||||
} else {
|
||||
reject(
|
||||
new Error(`ripgrep exited with code ${code}: ${stderrData}`),
|
||||
);
|
||||
}
|
||||
}
|
||||
});
|
||||
const generator = execStreaming(rgPath, rgArgs, {
|
||||
signal: options.signal,
|
||||
allowedExitCodes: [0, 1],
|
||||
});
|
||||
|
||||
return this.parseRipgrepJsonOutput(output, absolutePath);
|
||||
for await (const line of generator) {
|
||||
const match = this.parseRipgrepJsonLine(line, absolutePath);
|
||||
if (match) {
|
||||
results.push(match);
|
||||
if (results.length >= maxMatches) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return results;
|
||||
} catch (error: unknown) {
|
||||
debugLogger.debug(`GrepLogic: ripgrep failed: ${getErrorMessage(error)}`);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
private parseRipgrepJsonLine(
|
||||
line: string,
|
||||
basePath: string,
|
||||
): GrepMatch | null {
|
||||
try {
|
||||
const json = JSON.parse(line);
|
||||
if (json.type === 'match') {
|
||||
const match = json.data;
|
||||
// Defensive check: ensure text properties exist (skips binary/invalid encoding)
|
||||
if (match.path?.text && match.lines?.text) {
|
||||
const absoluteFilePath = path.resolve(basePath, match.path.text);
|
||||
const relativeCheck = path.relative(basePath, absoluteFilePath);
|
||||
if (
|
||||
relativeCheck === '..' ||
|
||||
relativeCheck.startsWith(`..${path.sep}`) ||
|
||||
path.isAbsolute(relativeCheck)
|
||||
) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const relativeFilePath = path.relative(basePath, absoluteFilePath);
|
||||
|
||||
return {
|
||||
filePath: relativeFilePath || path.basename(absoluteFilePath),
|
||||
lineNumber: match.line_number,
|
||||
line: match.lines.text.trimEnd(),
|
||||
};
|
||||
}
|
||||
}
|
||||
} catch (error) {
|
||||
// Only log if it's not a simple empty line or widely invalid
|
||||
if (line.trim().length > 0) {
|
||||
debugLogger.warn(
|
||||
`Failed to parse ripgrep JSON line: ${line.substring(0, 100)}...`,
|
||||
error,
|
||||
);
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a description of the grep operation
|
||||
* @param params Parameters for the grep operation
|
||||
|
||||
67
packages/core/src/utils/shell-utils.integration.test.ts
Normal file
67
packages/core/src/utils/shell-utils.integration.test.ts
Normal file
@@ -0,0 +1,67 @@
|
||||
/**
|
||||
* @license
|
||||
* Copyright 2026 Google LLC
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
import { describe, it, expect } from 'vitest';
|
||||
import { execStreaming } from './shell-utils.js';
|
||||
|
||||
// Integration tests using real child processes
|
||||
describe('execStreaming (Integration)', () => {
|
||||
it('should yield lines from stdout', async () => {
|
||||
// Use node to echo for cross-platform support
|
||||
const generator = execStreaming(process.execPath, [
|
||||
'-e',
|
||||
'console.log("line 1\\nline 2")',
|
||||
]);
|
||||
const lines = [];
|
||||
for await (const line of generator) {
|
||||
lines.push(line);
|
||||
}
|
||||
expect(lines).toEqual(['line 1', 'line 2']);
|
||||
});
|
||||
|
||||
it('should throw error on non-zero exit code', async () => {
|
||||
// exit 2 via node
|
||||
const generator = execStreaming(process.execPath, [
|
||||
'-e',
|
||||
'process.exit(2)',
|
||||
]);
|
||||
|
||||
await expect(async () => {
|
||||
for await (const _ of generator) {
|
||||
// consume
|
||||
}
|
||||
}).rejects.toThrow();
|
||||
});
|
||||
|
||||
it('should abort cleanly when signal is aborted', async () => {
|
||||
const controller = new AbortController();
|
||||
// sleep for 2s via node
|
||||
const generator = execStreaming(
|
||||
process.execPath,
|
||||
['-e', 'setTimeout(() => {}, 2000)'],
|
||||
{ signal: controller.signal },
|
||||
);
|
||||
|
||||
// Start reading
|
||||
const readPromise = (async () => {
|
||||
const lines = [];
|
||||
try {
|
||||
for await (const line of generator) {
|
||||
lines.push(line);
|
||||
}
|
||||
} catch (_e) {
|
||||
// ignore
|
||||
}
|
||||
return lines;
|
||||
})();
|
||||
|
||||
setTimeout(() => {
|
||||
controller.abort();
|
||||
}, 100);
|
||||
|
||||
const lines = await readPromise;
|
||||
expect(lines).toEqual([]);
|
||||
});
|
||||
});
|
||||
@@ -13,6 +13,7 @@ import {
|
||||
spawnSync,
|
||||
type SpawnOptionsWithoutStdio,
|
||||
} from 'node:child_process';
|
||||
import * as readline from 'node:readline';
|
||||
import type { Node, Tree } from 'web-tree-sitter';
|
||||
import { Language, Parser, Query } from 'web-tree-sitter';
|
||||
import { loadWasmBinary } from './fileUtils.js';
|
||||
@@ -765,3 +766,123 @@ export const spawnAsync = (
|
||||
reject(err);
|
||||
});
|
||||
});
|
||||
|
||||
/**
|
||||
* Executes a command and yields lines of output as they appear.
|
||||
* Use for large outputs where buffering is not feasible.
|
||||
*
|
||||
* @param command The executable to run
|
||||
* @param args Arguments for the executable
|
||||
* @param options Spawn options (cwd, env, etc.)
|
||||
*/
|
||||
export async function* execStreaming(
|
||||
command: string,
|
||||
args: string[],
|
||||
options?: SpawnOptionsWithoutStdio & {
|
||||
signal?: AbortSignal;
|
||||
allowedExitCodes?: number[];
|
||||
},
|
||||
): AsyncGenerator<string, void, void> {
|
||||
const child = spawn(command, args, {
|
||||
...options,
|
||||
// ensure we don't open a window on windows if possible/relevant
|
||||
windowsHide: true,
|
||||
});
|
||||
|
||||
const rl = readline.createInterface({
|
||||
input: child.stdout,
|
||||
terminal: false,
|
||||
});
|
||||
|
||||
const errorChunks: Buffer[] = [];
|
||||
let stderrTotalBytes = 0;
|
||||
const MAX_STDERR_BYTES = 20 * 1024; // 20KB limit
|
||||
|
||||
child.stderr.on('data', (chunk) => {
|
||||
if (stderrTotalBytes < MAX_STDERR_BYTES) {
|
||||
errorChunks.push(chunk);
|
||||
stderrTotalBytes += chunk.length;
|
||||
}
|
||||
});
|
||||
|
||||
let error: Error | null = null;
|
||||
child.on('error', (err) => {
|
||||
error = err;
|
||||
});
|
||||
|
||||
const onAbort = () => {
|
||||
// If manually aborted by signal, we kill immediately.
|
||||
if (!child.killed) child.kill();
|
||||
};
|
||||
|
||||
if (options?.signal?.aborted) {
|
||||
onAbort();
|
||||
} else {
|
||||
options?.signal?.addEventListener('abort', onAbort);
|
||||
}
|
||||
|
||||
let finished = false;
|
||||
try {
|
||||
for await (const line of rl) {
|
||||
if (options?.signal?.aborted) break;
|
||||
yield line;
|
||||
}
|
||||
finished = true;
|
||||
} finally {
|
||||
rl.close();
|
||||
options?.signal?.removeEventListener('abort', onAbort);
|
||||
|
||||
// Ensure process is killed when the generator is closed (consumer breaks loop)
|
||||
let killedByGenerator = false;
|
||||
if (!finished && child.exitCode === null && !child.killed) {
|
||||
try {
|
||||
child.kill();
|
||||
} catch (_e) {
|
||||
// ignore error if process is already dead
|
||||
}
|
||||
killedByGenerator = true;
|
||||
}
|
||||
|
||||
// Ensure we wait for the process to exit to check codes
|
||||
await new Promise<void>((resolve, reject) => {
|
||||
// If an error occurred before we got here (e.g. spawn failure), reject immediately.
|
||||
if (error) {
|
||||
reject(error);
|
||||
return;
|
||||
}
|
||||
|
||||
function checkExit(code: number | null) {
|
||||
// If we aborted or killed it manually, we treat it as success (stop waiting)
|
||||
if (options?.signal?.aborted || killedByGenerator) {
|
||||
resolve();
|
||||
return;
|
||||
}
|
||||
|
||||
const allowed = options?.allowedExitCodes ?? [0];
|
||||
if (code !== null && allowed.includes(code)) {
|
||||
resolve();
|
||||
} else {
|
||||
// If we have an accumulated error or explicit error event
|
||||
if (error) reject(error);
|
||||
else {
|
||||
const stderr = Buffer.concat(errorChunks).toString('utf8');
|
||||
const truncatedMsg =
|
||||
stderrTotalBytes >= MAX_STDERR_BYTES ? '...[truncated]' : '';
|
||||
reject(
|
||||
new Error(
|
||||
`Process exited with code ${code}: ${stderr}${truncatedMsg}`,
|
||||
),
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (child.exitCode !== null) {
|
||||
checkExit(child.exitCode);
|
||||
} else {
|
||||
child.on('close', (code) => checkExit(code));
|
||||
child.on('error', (err) => reject(err));
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user