Files
gemini-cli/packages/core/src/tools/grep.ts

690 lines
23 KiB
TypeScript
Raw Normal View History

/**
* @license
* Copyright 2025 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import type { MessageBus } from '../confirmation-bus/message-bus.js';
import fs from 'node:fs';
import fsPromises from 'node:fs/promises';
import path from 'node:path';
import { EOL } from 'node:os';
import { spawn } from 'node:child_process';
import { globStream } from 'glob';
import type { ToolInvocation, ToolResult } from './tools.js';
import { BaseDeclarativeTool, BaseToolInvocation, Kind } from './tools.js';
import { makeRelative, shortenPath } from '../utils/paths.js';
import { getErrorMessage, isNodeError } from '../utils/errors.js';
import { isGitRepository } from '../utils/gitUtils.js';
import type { Config } from '../config/config.js';
import type { FileExclusions } from '../utils/ignorePatterns.js';
import { ToolErrorType } from './tool-error.js';
import { GREP_TOOL_NAME } from './tool-names.js';
import { debugLogger } from '../utils/debugLogger.js';
// --- Interfaces ---
/**
* Parameters for the GrepTool
*/
export interface GrepToolParams {
/**
* The regular expression pattern to search for in file contents
*/
pattern: string;
/**
* The directory to search in (optional, defaults to current directory relative to root)
*/
2025-11-06 15:03:52 -08:00
dir_path?: string;
/**
* File pattern to include in the search (e.g. "*.js", "*.{ts,tsx}")
*/
include?: string;
}
/**
* Result object for a single grep match
*/
interface GrepMatch {
filePath: string;
lineNumber: number;
line: string;
}
class GrepToolInvocation extends BaseToolInvocation<
GrepToolParams,
ToolResult
> {
private readonly fileExclusions: FileExclusions;
constructor(
private readonly config: Config,
params: GrepToolParams,
messageBus?: MessageBus,
_toolName?: string,
_toolDisplayName?: string,
) {
super(params, messageBus, _toolName, _toolDisplayName);
this.fileExclusions = config.getFileExclusions();
}
/**
* Checks if a path is within the root directory and resolves it.
* @param relativePath Path relative to the root directory (or undefined for root).
* @returns The absolute path if valid and exists, or null if no path specified (to search all directories).
* @throws {Error} If path is outside root, doesn't exist, or isn't a directory.
*/
private resolveAndValidatePath(relativePath?: string): string | null {
// If no path specified, return null to indicate searching all workspace directories
if (!relativePath) {
return null;
}
const targetPath = path.resolve(this.config.getTargetDir(), relativePath);
// Security Check: Ensure the resolved path is within workspace boundaries
const workspaceContext = this.config.getWorkspaceContext();
if (!workspaceContext.isPathWithinWorkspace(targetPath)) {
const directories = workspaceContext.getDirectories();
throw new Error(
`Path validation failed: Attempted path "${relativePath}" resolves outside the allowed workspace directories: ${directories.join(', ')}`,
);
}
// Check existence and type after resolving
try {
const stats = fs.statSync(targetPath);
if (!stats.isDirectory()) {
throw new Error(`Path is not a directory: ${targetPath}`);
}
} catch (error: unknown) {
if (isNodeError(error) && error.code !== 'ENOENT') {
throw new Error(`Path does not exist: ${targetPath}`);
}
throw new Error(
`Failed to access path stats for ${targetPath}: ${error}`,
);
}
return targetPath;
}
async execute(signal: AbortSignal): Promise<ToolResult> {
try {
const workspaceContext = this.config.getWorkspaceContext();
2025-11-06 15:03:52 -08:00
const searchDirAbs = this.resolveAndValidatePath(this.params.dir_path);
const searchDirDisplay = this.params.dir_path || '.';
// Determine which directories to search
let searchDirectories: readonly string[];
if (searchDirAbs === null) {
// No path specified - search all workspace directories
searchDirectories = workspaceContext.getDirectories();
} else {
// Specific path provided - search only that directory
searchDirectories = [searchDirAbs];
}
// Collect matches from all search directories
let allMatches: GrepMatch[] = [];
for (const searchDir of searchDirectories) {
const matches = await this.performGrepSearch({
pattern: this.params.pattern,
path: searchDir,
include: this.params.include,
signal,
});
// Add directory prefix if searching multiple directories
if (searchDirectories.length > 1) {
const dirName = path.basename(searchDir);
matches.forEach((match) => {
match.filePath = path.join(dirName, match.filePath);
});
}
allMatches = allMatches.concat(matches);
}
let searchLocationDescription: string;
if (searchDirAbs === null) {
const numDirs = workspaceContext.getDirectories().length;
searchLocationDescription =
numDirs > 1
? `across ${numDirs} workspace directories`
: `in the workspace directory`;
} else {
searchLocationDescription = `in path "${searchDirDisplay}"`;
}
if (allMatches.length === 0) {
const noMatchMsg = `No matches found for pattern "${this.params.pattern}" ${searchLocationDescription}${this.params.include ? ` (filter: "${this.params.include}")` : ''}.`;
return { llmContent: noMatchMsg, returnDisplay: `No matches found` };
}
// Group matches by file
const matchesByFile = allMatches.reduce(
(acc, match) => {
const fileKey = match.filePath;
if (!acc[fileKey]) {
acc[fileKey] = [];
}
acc[fileKey].push(match);
acc[fileKey].sort((a, b) => a.lineNumber - b.lineNumber);
return acc;
},
{} as Record<string, GrepMatch[]>,
);
const matchCount = allMatches.length;
const matchTerm = matchCount === 1 ? 'match' : 'matches';
let llmContent = `Found ${matchCount} ${matchTerm} for pattern "${this.params.pattern}" ${searchLocationDescription}${this.params.include ? ` (filter: "${this.params.include}")` : ''}:
---
`;
for (const filePath in matchesByFile) {
llmContent += `File: ${filePath}\n`;
matchesByFile[filePath].forEach((match) => {
const trimmedLine = match.line.trim();
llmContent += `L${match.lineNumber}: ${trimmedLine}\n`;
});
llmContent += '---\n';
}
return {
llmContent: llmContent.trim(),
returnDisplay: `Found ${matchCount} ${matchTerm}`,
};
} catch (error) {
debugLogger.warn(`Error during GrepLogic execution: ${error}`);
const errorMessage = getErrorMessage(error);
return {
llmContent: `Error during grep search operation: ${errorMessage}`,
returnDisplay: `Error: ${errorMessage}`,
error: {
message: errorMessage,
type: ToolErrorType.GREP_EXECUTION_ERROR,
},
};
}
}
/**
* Checks if a command is available in the system's PATH.
* @param {string} command The command name (e.g., 'git', 'grep').
* @returns {Promise<boolean>} True if the command is available, false otherwise.
*/
private isCommandAvailable(command: string): Promise<boolean> {
return new Promise((resolve) => {
const checkCommand = process.platform === 'win32' ? 'where' : 'command';
const checkArgs =
process.platform === 'win32' ? [command] : ['-v', command];
try {
const child = spawn(checkCommand, checkArgs, {
stdio: 'ignore',
shell: true,
});
child.on('close', (code) => resolve(code === 0));
child.on('error', (err) => {
debugLogger.debug(
`[GrepTool] Failed to start process for '${command}':`,
err.message,
);
resolve(false);
});
} catch {
resolve(false);
}
});
}
/**
* Parses the standard output of grep-like commands (git grep, system grep).
* Expects format: filePath:lineNumber:lineContent
* Handles colons within file paths and line content correctly.
* @param {string} output The raw stdout string.
* @param {string} basePath The absolute directory the search was run from, for relative paths.
* @returns {GrepMatch[]} Array of match objects.
*/
private parseGrepOutput(output: string, basePath: string): GrepMatch[] {
const results: GrepMatch[] = [];
if (!output) return results;
const lines = output.split(EOL); // Use OS-specific end-of-line
for (const line of lines) {
if (!line.trim()) continue;
// Find the index of the first colon.
const firstColonIndex = line.indexOf(':');
if (firstColonIndex === -1) continue; // Malformed
// Find the index of the second colon, searching *after* the first one.
const secondColonIndex = line.indexOf(':', firstColonIndex + 1);
if (secondColonIndex === -1) continue; // Malformed
// Extract parts based on the found colon indices
const filePathRaw = line.substring(0, firstColonIndex);
const lineNumberStr = line.substring(
firstColonIndex + 1,
secondColonIndex,
);
const lineContent = line.substring(secondColonIndex + 1);
const lineNumber = parseInt(lineNumberStr, 10);
if (!isNaN(lineNumber)) {
const absoluteFilePath = path.resolve(basePath, filePathRaw);
const relativeFilePath = path.relative(basePath, absoluteFilePath);
results.push({
filePath: relativeFilePath || path.basename(absoluteFilePath),
lineNumber,
line: lineContent,
});
}
}
return results;
}
/**
* Gets a description of the grep operation
* @returns A string describing the grep
*/
getDescription(): string {
let description = `'${this.params.pattern}'`;
if (this.params.include) {
description += ` in ${this.params.include}`;
}
2025-11-06 15:03:52 -08:00
if (this.params.dir_path) {
const resolvedPath = path.resolve(
this.config.getTargetDir(),
2025-11-06 15:03:52 -08:00
this.params.dir_path,
);
if (
resolvedPath === this.config.getTargetDir() ||
2025-11-06 15:03:52 -08:00
this.params.dir_path === '.'
) {
description += ` within ./`;
} else {
const relativePath = makeRelative(
resolvedPath,
this.config.getTargetDir(),
);
description += ` within ${shortenPath(relativePath)}`;
}
} else {
// When no path is specified, indicate searching all workspace directories
const workspaceContext = this.config.getWorkspaceContext();
const directories = workspaceContext.getDirectories();
if (directories.length > 1) {
description += ` across all workspace directories`;
}
}
return description;
}
/**
* Performs the actual search using the prioritized strategies.
* @param options Search options including pattern, absolute path, and include glob.
* @returns A promise resolving to an array of match objects.
*/
private async performGrepSearch(options: {
pattern: string;
path: string; // Expects absolute path
include?: string;
signal: AbortSignal;
}): Promise<GrepMatch[]> {
const { pattern, path: absolutePath, include } = options;
let strategyUsed = 'none';
try {
// --- Strategy 1: git grep ---
const isGit = isGitRepository(absolutePath);
const gitAvailable = isGit && (await this.isCommandAvailable('git'));
if (gitAvailable) {
strategyUsed = 'git grep';
const gitArgs = [
'grep',
'--untracked',
'-n',
'-E',
'--ignore-case',
pattern,
];
if (include) {
gitArgs.push('--', include);
}
try {
const output = await new Promise<string>((resolve, reject) => {
const child = spawn('git', gitArgs, {
cwd: absolutePath,
windowsHide: true,
});
const stdoutChunks: Buffer[] = [];
const stderrChunks: Buffer[] = [];
child.stdout.on('data', (chunk) => stdoutChunks.push(chunk));
child.stderr.on('data', (chunk) => stderrChunks.push(chunk));
child.on('error', (err) =>
reject(new Error(`Failed to start git grep: ${err.message}`)),
);
child.on('close', (code) => {
const stdoutData = Buffer.concat(stdoutChunks).toString('utf8');
const stderrData = Buffer.concat(stderrChunks).toString('utf8');
if (code === 0) resolve(stdoutData);
else if (code === 1)
resolve(''); // No matches
else
reject(
new Error(`git grep exited with code ${code}: ${stderrData}`),
);
});
});
return this.parseGrepOutput(output, absolutePath);
} catch (gitError: unknown) {
debugLogger.debug(
`GrepLogic: git grep failed: ${getErrorMessage(
gitError,
)}. Falling back...`,
);
}
}
// --- Strategy 2: System grep ---
debugLogger.debug(
'GrepLogic: System grep is being considered as fallback strategy.',
);
const grepAvailable = await this.isCommandAvailable('grep');
if (grepAvailable) {
strategyUsed = 'system grep';
const grepArgs = ['-r', '-n', '-H', '-E', '-I'];
// Extract directory names from exclusion patterns for grep --exclude-dir
const globExcludes = this.fileExclusions.getGlobExcludes();
const commonExcludes = globExcludes
.map((pattern) => {
let dir = pattern;
if (dir.startsWith('**/')) {
dir = dir.substring(3);
}
if (dir.endsWith('/**')) {
dir = dir.slice(0, -3);
} else if (dir.endsWith('/')) {
dir = dir.slice(0, -1);
}
// Only consider patterns that are likely directories. This filters out file patterns.
if (dir && !dir.includes('/') && !dir.includes('*')) {
return dir;
}
return null;
})
.filter((dir): dir is string => !!dir);
commonExcludes.forEach((dir) => grepArgs.push(`--exclude-dir=${dir}`));
if (include) {
grepArgs.push(`--include=${include}`);
}
grepArgs.push(pattern);
grepArgs.push('.');
try {
const output = await new Promise<string>((resolve, reject) => {
const child = spawn('grep', grepArgs, {
cwd: absolutePath,
windowsHide: true,
});
const stdoutChunks: Buffer[] = [];
const stderrChunks: Buffer[] = [];
Jacob314/memory fixes (#754) Address multiple possible memory leaks found bystatic analysis of the codebase. The primary source of the leaks was event listeners on child processes and global objects that were not being properly removed, potentially causing their closures to be retained in memory indefinitely particularly for processes that did not exit. There are two commits. A larger one made by gemini CLI and a smaller one by me to make sure we always disconnect child processes as part of the cleanup methods. These changes may not actually fix any leaks but do look like reasonable defensive coding to avoid leaking event listeners or child processes. The following files were fixed: This is Gemini's somewhat overconfident description of what it did. packages/core/src/tools/shell.ts: Fixed a leak where an abortSignal listener was not being reliably removed. packages/cli/src/utils/readStdin.ts: Fixed a significant leak where listeners on process.stdin were never removed. packages/cli/src/utils/sandbox.ts: Fixed leaks in the imageExists and pullImage helper functions where listeners on spawned child processes were not being removed. packages/core/src/tools/grep.ts: Fixed three separate leaks in the isCommandAvailable check and the git grep and system grep strategies due to un-removed listeners on child processes. packages/core/src/tools/tool-registry.ts: Corrected a leak in the execute method of the DiscoveredTool class where listeners on the spawned tool process were not being removed.
2025-06-05 06:40:33 -07:00
const onData = (chunk: Buffer) => stdoutChunks.push(chunk);
const onStderr = (chunk: Buffer) => {
const stderrStr = chunk.toString();
// Suppress common harmless stderr messages
if (
!stderrStr.includes('Permission denied') &&
!/grep:.*: Is a directory/i.test(stderrStr)
) {
stderrChunks.push(chunk);
}
Jacob314/memory fixes (#754) Address multiple possible memory leaks found bystatic analysis of the codebase. The primary source of the leaks was event listeners on child processes and global objects that were not being properly removed, potentially causing their closures to be retained in memory indefinitely particularly for processes that did not exit. There are two commits. A larger one made by gemini CLI and a smaller one by me to make sure we always disconnect child processes as part of the cleanup methods. These changes may not actually fix any leaks but do look like reasonable defensive coding to avoid leaking event listeners or child processes. The following files were fixed: This is Gemini's somewhat overconfident description of what it did. packages/core/src/tools/shell.ts: Fixed a leak where an abortSignal listener was not being reliably removed. packages/cli/src/utils/readStdin.ts: Fixed a significant leak where listeners on process.stdin were never removed. packages/cli/src/utils/sandbox.ts: Fixed leaks in the imageExists and pullImage helper functions where listeners on spawned child processes were not being removed. packages/core/src/tools/grep.ts: Fixed three separate leaks in the isCommandAvailable check and the git grep and system grep strategies due to un-removed listeners on child processes. packages/core/src/tools/tool-registry.ts: Corrected a leak in the execute method of the DiscoveredTool class where listeners on the spawned tool process were not being removed.
2025-06-05 06:40:33 -07:00
};
const onError = (err: Error) => {
cleanup();
reject(new Error(`Failed to start system grep: ${err.message}`));
};
const onClose = (code: number | null) => {
const stdoutData = Buffer.concat(stdoutChunks).toString('utf8');
const stderrData = Buffer.concat(stderrChunks)
.toString('utf8')
.trim();
Jacob314/memory fixes (#754) Address multiple possible memory leaks found bystatic analysis of the codebase. The primary source of the leaks was event listeners on child processes and global objects that were not being properly removed, potentially causing their closures to be retained in memory indefinitely particularly for processes that did not exit. There are two commits. A larger one made by gemini CLI and a smaller one by me to make sure we always disconnect child processes as part of the cleanup methods. These changes may not actually fix any leaks but do look like reasonable defensive coding to avoid leaking event listeners or child processes. The following files were fixed: This is Gemini's somewhat overconfident description of what it did. packages/core/src/tools/shell.ts: Fixed a leak where an abortSignal listener was not being reliably removed. packages/cli/src/utils/readStdin.ts: Fixed a significant leak where listeners on process.stdin were never removed. packages/cli/src/utils/sandbox.ts: Fixed leaks in the imageExists and pullImage helper functions where listeners on spawned child processes were not being removed. packages/core/src/tools/grep.ts: Fixed three separate leaks in the isCommandAvailable check and the git grep and system grep strategies due to un-removed listeners on child processes. packages/core/src/tools/tool-registry.ts: Corrected a leak in the execute method of the DiscoveredTool class where listeners on the spawned tool process were not being removed.
2025-06-05 06:40:33 -07:00
cleanup();
if (code === 0) resolve(stdoutData);
else if (code === 1)
resolve(''); // No matches
else {
if (stderrData)
reject(
new Error(
`System grep exited with code ${code}: ${stderrData}`,
),
);
else resolve(''); // Exit code > 1 but no stderr, likely just suppressed errors
}
Jacob314/memory fixes (#754) Address multiple possible memory leaks found bystatic analysis of the codebase. The primary source of the leaks was event listeners on child processes and global objects that were not being properly removed, potentially causing their closures to be retained in memory indefinitely particularly for processes that did not exit. There are two commits. A larger one made by gemini CLI and a smaller one by me to make sure we always disconnect child processes as part of the cleanup methods. These changes may not actually fix any leaks but do look like reasonable defensive coding to avoid leaking event listeners or child processes. The following files were fixed: This is Gemini's somewhat overconfident description of what it did. packages/core/src/tools/shell.ts: Fixed a leak where an abortSignal listener was not being reliably removed. packages/cli/src/utils/readStdin.ts: Fixed a significant leak where listeners on process.stdin were never removed. packages/cli/src/utils/sandbox.ts: Fixed leaks in the imageExists and pullImage helper functions where listeners on spawned child processes were not being removed. packages/core/src/tools/grep.ts: Fixed three separate leaks in the isCommandAvailable check and the git grep and system grep strategies due to un-removed listeners on child processes. packages/core/src/tools/tool-registry.ts: Corrected a leak in the execute method of the DiscoveredTool class where listeners on the spawned tool process were not being removed.
2025-06-05 06:40:33 -07:00
};
const cleanup = () => {
child.stdout.removeListener('data', onData);
child.stderr.removeListener('data', onStderr);
child.removeListener('error', onError);
child.removeListener('close', onClose);
if (child.connected) {
child.disconnect();
}
};
child.stdout.on('data', onData);
child.stderr.on('data', onStderr);
child.on('error', onError);
child.on('close', onClose);
});
return this.parseGrepOutput(output, absolutePath);
} catch (grepError: unknown) {
debugLogger.debug(
`GrepLogic: System grep failed: ${getErrorMessage(
grepError,
)}. Falling back...`,
);
}
}
// --- Strategy 3: Pure JavaScript Fallback ---
debugLogger.debug(
'GrepLogic: Falling back to JavaScript grep implementation.',
);
strategyUsed = 'javascript fallback';
const globPattern = include ? include : '**/*';
const ignorePatterns = this.fileExclusions.getGlobExcludes();
const filesStream = globStream(globPattern, {
cwd: absolutePath,
dot: true,
ignore: ignorePatterns,
absolute: true,
nodir: true,
signal: options.signal,
});
const regex = new RegExp(pattern, 'i');
const allMatches: GrepMatch[] = [];
for await (const filePath of filesStream) {
const fileAbsolutePath = filePath as string;
try {
const content = await fsPromises.readFile(fileAbsolutePath, 'utf8');
const lines = content.split(/\r?\n/);
lines.forEach((line, index) => {
if (regex.test(line)) {
allMatches.push({
filePath:
path.relative(absolutePath, fileAbsolutePath) ||
path.basename(fileAbsolutePath),
lineNumber: index + 1,
line,
});
}
});
} catch (readError: unknown) {
// Ignore errors like permission denied or file gone during read
if (!isNodeError(readError) || readError.code !== 'ENOENT') {
debugLogger.debug(
`GrepLogic: Could not read/process ${fileAbsolutePath}: ${getErrorMessage(
readError,
)}`,
);
}
}
}
return allMatches;
} catch (error: unknown) {
debugLogger.warn(
`GrepLogic: Error in performGrepSearch (Strategy: ${strategyUsed}): ${getErrorMessage(
error,
)}`,
);
throw error; // Re-throw
}
}
}
// --- GrepLogic Class ---
/**
* Implementation of the Grep tool logic (moved from CLI)
*/
export class GrepTool extends BaseDeclarativeTool<GrepToolParams, ToolResult> {
static readonly Name = GREP_TOOL_NAME;
constructor(
private readonly config: Config,
messageBus?: MessageBus,
) {
super(
GrepTool.Name,
'SearchText',
'Searches for a regular expression pattern within the content of files in a specified directory (or current working directory). Can filter files by a glob pattern. Returns the lines containing matches, along with their file paths and line numbers.',
Kind.Search,
{
properties: {
pattern: {
description:
"The regular expression (regex) pattern to search for within file contents (e.g., 'function\\s+myFunction', 'import\\s+\\{.*\\}\\s+from\\s+.*').",
type: 'string',
},
2025-11-06 15:03:52 -08:00
dir_path: {
description:
'Optional: The absolute path to the directory to search within. If omitted, searches the current working directory.',
type: 'string',
},
include: {
description:
"Optional: A glob pattern to filter which files are searched (e.g., '*.js', '*.{ts,tsx}', 'src/**'). If omitted, searches all files (respecting potential global ignores).",
type: 'string',
},
},
required: ['pattern'],
type: 'object',
},
true,
false,
messageBus,
);
}
/**
* Checks if a path is within the root directory and resolves it.
* @param relativePath Path relative to the root directory (or undefined for root).
* @returns The absolute path if valid and exists, or null if no path specified (to search all directories).
* @throws {Error} If path is outside root, doesn't exist, or isn't a directory.
*/
private resolveAndValidatePath(relativePath?: string): string | null {
// If no path specified, return null to indicate searching all workspace directories
if (!relativePath) {
return null;
}
const targetPath = path.resolve(this.config.getTargetDir(), relativePath);
// Security Check: Ensure the resolved path is within workspace boundaries
const workspaceContext = this.config.getWorkspaceContext();
if (!workspaceContext.isPathWithinWorkspace(targetPath)) {
const directories = workspaceContext.getDirectories();
throw new Error(
`Path validation failed: Attempted path "${relativePath}" resolves outside the allowed workspace directories: ${directories.join(', ')}`,
);
}
// Check existence and type after resolving
try {
const stats = fs.statSync(targetPath);
if (!stats.isDirectory()) {
throw new Error(`Path is not a directory: ${targetPath}`);
}
} catch (error: unknown) {
if (isNodeError(error) && error.code !== 'ENOENT') {
throw new Error(`Path does not exist: ${targetPath}`);
}
throw new Error(
`Failed to access path stats for ${targetPath}: ${error}`,
);
}
return targetPath;
}
/**
* Validates the parameters for the tool
* @param params Parameters to validate
* @returns An error message string if invalid, null otherwise
*/
protected override validateToolParamValues(
params: GrepToolParams,
): string | null {
try {
new RegExp(params.pattern);
} catch (error) {
return `Invalid regular expression pattern provided: ${params.pattern}. Error: ${getErrorMessage(error)}`;
}
2025-11-06 15:03:52 -08:00
// Only validate dir_path if one is provided
if (params.dir_path) {
try {
2025-11-06 15:03:52 -08:00
this.resolveAndValidatePath(params.dir_path);
} catch (error) {
return getErrorMessage(error);
}
}
return null; // Parameters are valid
}
protected createInvocation(
params: GrepToolParams,
messageBus?: MessageBus,
_toolName?: string,
_toolDisplayName?: string,
): ToolInvocation<GrepToolParams, ToolResult> {
return new GrepToolInvocation(
this.config,
params,
messageBus,
_toolName,
_toolDisplayName,
);
}
}