Files
gemini-cli/packages/core/src/tools/ripGrep.ts

603 lines
18 KiB
TypeScript

/**
* @license
* Copyright 2025 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import type { MessageBus } from '../confirmation-bus/message-bus.js';
import fs from 'node:fs';
import path from 'node:path';
import { spawn } from 'node:child_process';
import { downloadRipGrep } from '@joshua.litt/get-ripgrep';
import type { ToolInvocation, ToolResult } from './tools.js';
import { BaseDeclarativeTool, BaseToolInvocation, Kind } from './tools.js';
import { SchemaValidator } from '../utils/schemaValidator.js';
import { makeRelative, shortenPath } from '../utils/paths.js';
import { getErrorMessage, isNodeError } from '../utils/errors.js';
import type { Config } from '../config/config.js';
import { fileExists } from '../utils/fileUtils.js';
import { Storage } from '../config/storage.js';
import { GREP_TOOL_NAME } from './tool-names.js';
import { debugLogger } from '../utils/debugLogger.js';
import {
FileExclusions,
COMMON_DIRECTORY_EXCLUDES,
} from '../utils/ignorePatterns.js';
import { GeminiIgnoreParser } from '../utils/geminiIgnoreParser.js';
const DEFAULT_TOTAL_MAX_MATCHES = 20000;
function getRgCandidateFilenames(): readonly string[] {
return process.platform === 'win32' ? ['rg.exe', 'rg'] : ['rg'];
}
async function resolveExistingRgPath(): Promise<string | null> {
const binDir = Storage.getGlobalBinDir();
for (const fileName of getRgCandidateFilenames()) {
const candidatePath = path.join(binDir, fileName);
if (await fileExists(candidatePath)) {
return candidatePath;
}
}
return null;
}
let ripgrepAcquisitionPromise: Promise<string | null> | null = null;
async function ensureRipgrepAvailable(): Promise<string | null> {
const existingPath = await resolveExistingRgPath();
if (existingPath) {
return existingPath;
}
if (!ripgrepAcquisitionPromise) {
ripgrepAcquisitionPromise = (async () => {
try {
await downloadRipGrep(Storage.getGlobalBinDir());
return await resolveExistingRgPath();
} finally {
ripgrepAcquisitionPromise = null;
}
})();
}
return ripgrepAcquisitionPromise;
}
/**
* Checks if `rg` exists, if not then attempt to download it.
*/
export async function canUseRipgrep(): Promise<boolean> {
return (await ensureRipgrepAvailable()) !== null;
}
/**
* Ensures `rg` is downloaded, or throws.
*/
export async function ensureRgPath(): Promise<string> {
const downloadedPath = await ensureRipgrepAvailable();
if (downloadedPath) {
return downloadedPath;
}
throw new Error('Cannot use ripgrep.');
}
/**
* Checks if a path is within the root directory and resolves it.
* @param config The configuration object.
* @param relativePath Path relative to the root directory (or undefined for root).
* @returns The absolute path if valid and exists, or null if no path specified.
* @throws {Error} If path is outside root, doesn't exist, or isn't a directory/file.
*/
function resolveAndValidatePath(
config: Config,
relativePath?: string,
): string | null {
if (!relativePath) {
return null;
}
const targetDir = config.getTargetDir();
const targetPath = path.resolve(targetDir, relativePath);
// Ensure the resolved path is within workspace boundaries
const workspaceContext = config.getWorkspaceContext();
if (!workspaceContext.isPathWithinWorkspace(targetPath)) {
const directories = workspaceContext.getDirectories();
throw new Error(
`Path validation failed: Attempted path "${relativePath}" resolves outside the allowed workspace directories: ${directories.join(', ')}`,
);
}
// Check existence and type after resolving
try {
const stats = fs.statSync(targetPath);
if (!stats.isDirectory() && !stats.isFile()) {
throw new Error(
`Path is not a valid directory or file: ${targetPath} (CWD: ${targetDir})`,
);
}
} catch (error: unknown) {
if (isNodeError(error) && error.code === 'ENOENT') {
throw new Error(`Path does not exist: ${targetPath} (CWD: ${targetDir})`);
}
throw new Error(`Failed to access path stats for ${targetPath}: ${error}`);
}
return targetPath;
}
/**
* Parameters for the GrepTool
*/
export interface RipGrepToolParams {
/**
* The regular expression pattern to search for in file contents
*/
pattern: string;
/**
* The directory to search in (optional, defaults to current directory relative to root)
*/
dir_path?: string;
/**
* File pattern to include in the search (e.g. "*.js", "*.{ts,tsx}")
*/
include?: string;
/**
* If true, searches case-sensitively. Defaults to false.
*/
case_sensitive?: boolean;
/**
* If true, treats pattern as a literal string. Defaults to false.
*/
fixed_strings?: boolean;
/**
* Show num lines of context around each match.
*/
context?: number;
/**
* Show num lines after each match.
*/
after?: number;
/**
* Show num lines before each match.
*/
before?: number;
/**
* If true, does not respect .gitignore or default ignores (like build/dist).
*/
no_ignore?: boolean;
}
/**
* Result object for a single grep match
*/
interface GrepMatch {
filePath: string;
lineNumber: number;
line: string;
}
class GrepToolInvocation extends BaseToolInvocation<
RipGrepToolParams,
ToolResult
> {
constructor(
private readonly config: Config,
private readonly geminiIgnoreParser: GeminiIgnoreParser,
params: RipGrepToolParams,
messageBus: MessageBus,
_toolName?: string,
_toolDisplayName?: string,
) {
super(params, messageBus, _toolName, _toolDisplayName);
}
async execute(signal: AbortSignal): Promise<ToolResult> {
try {
// Default to '.' if path is explicitly undefined/null.
// This forces CWD search instead of 'all workspaces' search by default.
const pathParam = this.params.dir_path || '.';
const searchDirAbs = resolveAndValidatePath(this.config, pathParam);
const searchDirDisplay = pathParam;
const totalMaxMatches = DEFAULT_TOTAL_MAX_MATCHES;
if (this.config.getDebugMode()) {
debugLogger.log(`[GrepTool] Total result limit: ${totalMaxMatches}`);
}
let allMatches = await this.performRipgrepSearch({
pattern: this.params.pattern,
path: searchDirAbs!,
include: this.params.include,
case_sensitive: this.params.case_sensitive,
fixed_strings: this.params.fixed_strings,
context: this.params.context,
after: this.params.after,
before: this.params.before,
no_ignore: this.params.no_ignore,
signal,
});
if (allMatches.length >= totalMaxMatches) {
allMatches = allMatches.slice(0, totalMaxMatches);
}
const searchLocationDescription = `in path "${searchDirDisplay}"`;
if (allMatches.length === 0) {
const noMatchMsg = `No matches found for pattern "${this.params.pattern}" ${searchLocationDescription}${this.params.include ? ` (filter: "${this.params.include}")` : ''}.`;
return { llmContent: noMatchMsg, returnDisplay: `No matches found` };
}
const wasTruncated = allMatches.length >= totalMaxMatches;
const matchesByFile = allMatches.reduce(
(acc, match) => {
const fileKey = match.filePath;
if (!acc[fileKey]) {
acc[fileKey] = [];
}
acc[fileKey].push(match);
acc[fileKey].sort((a, b) => a.lineNumber - b.lineNumber);
return acc;
},
{} as Record<string, GrepMatch[]>,
);
const matchCount = allMatches.length;
const matchTerm = matchCount === 1 ? 'match' : 'matches';
let llmContent = `Found ${matchCount} ${matchTerm} for pattern "${this.params.pattern}" ${searchLocationDescription}${this.params.include ? ` (filter: "${this.params.include}")` : ''}`;
if (wasTruncated) {
llmContent += ` (results limited to ${totalMaxMatches} matches for performance)`;
}
llmContent += `:\n---\n`;
for (const filePath in matchesByFile) {
llmContent += `File: ${filePath}\n`;
matchesByFile[filePath].forEach((match) => {
const trimmedLine = match.line.trim();
llmContent += `L${match.lineNumber}: ${trimmedLine}\n`;
});
llmContent += '---\n';
}
let displayMessage = `Found ${matchCount} ${matchTerm}`;
if (wasTruncated) {
displayMessage += ` (limited)`;
}
return {
llmContent: llmContent.trim(),
returnDisplay: displayMessage,
};
} catch (error) {
debugLogger.warn(`Error during GrepLogic execution: ${error}`);
const errorMessage = getErrorMessage(error);
return {
llmContent: `Error during grep search operation: ${errorMessage}`,
returnDisplay: `Error: ${errorMessage}`,
};
}
}
private parseRipgrepJsonOutput(
output: string,
basePath: string,
): GrepMatch[] {
const results: GrepMatch[] = [];
if (!output) return results;
const lines = output.trim().split('\n');
for (const line of lines) {
if (!line.trim()) continue;
try {
const json = JSON.parse(line);
if (json.type === 'match') {
const match = json.data;
// Defensive check: ensure text properties exist (skips binary/invalid encoding)
if (match.path?.text && match.lines?.text) {
const absoluteFilePath = path.resolve(basePath, match.path.text);
const relativeFilePath = path.relative(basePath, absoluteFilePath);
results.push({
filePath: relativeFilePath || path.basename(absoluteFilePath),
lineNumber: match.line_number,
line: match.lines.text.trimEnd(),
});
}
}
} catch (error) {
debugLogger.warn(`Failed to parse ripgrep JSON line: ${line}`, error);
}
}
return results;
}
private async performRipgrepSearch(options: {
pattern: string;
path: string;
include?: string;
case_sensitive?: boolean;
fixed_strings?: boolean;
context?: number;
after?: number;
before?: number;
no_ignore?: boolean;
signal: AbortSignal;
}): Promise<GrepMatch[]> {
const {
pattern,
path: absolutePath,
include,
case_sensitive,
fixed_strings,
context,
after,
before,
no_ignore,
} = options;
const rgArgs = ['--json'];
if (!case_sensitive) {
rgArgs.push('--ignore-case');
}
if (fixed_strings) {
rgArgs.push('--fixed-strings');
rgArgs.push(pattern);
} else {
rgArgs.push('--regexp', pattern);
}
if (context) {
rgArgs.push('--context', context.toString());
}
if (after) {
rgArgs.push('--after-context', after.toString());
}
if (before) {
rgArgs.push('--before-context', before.toString());
}
if (no_ignore) {
rgArgs.push('--no-ignore');
}
if (include) {
rgArgs.push('--glob', include);
}
if (!no_ignore) {
const fileExclusions = new FileExclusions(this.config);
const excludes = fileExclusions.getGlobExcludes([
...COMMON_DIRECTORY_EXCLUDES,
'*.log',
'*.tmp',
]);
excludes.forEach((exclude) => {
rgArgs.push('--glob', `!${exclude}`);
});
if (this.config.getFileFilteringRespectGeminiIgnore()) {
// Add .geminiignore support (ripgrep natively handles .gitignore)
const geminiIgnorePath = this.geminiIgnoreParser.getIgnoreFilePath();
if (geminiIgnorePath) {
rgArgs.push('--ignore-file', geminiIgnorePath);
}
}
}
rgArgs.push('--threads', '4');
rgArgs.push(absolutePath);
try {
const rgPath = await ensureRgPath();
const output = await new Promise<string>((resolve, reject) => {
const child = spawn(rgPath, rgArgs, {
windowsHide: true,
});
const stdoutChunks: Buffer[] = [];
const stderrChunks: Buffer[] = [];
const cleanup = () => {
if (options.signal.aborted) {
child.kill();
}
};
options.signal.addEventListener('abort', cleanup, { once: true });
child.stdout.on('data', (chunk) => stdoutChunks.push(chunk));
child.stderr.on('data', (chunk) => stderrChunks.push(chunk));
child.on('error', (err) => {
options.signal.removeEventListener('abort', cleanup);
reject(
new Error(
`Failed to start ripgrep: ${err.message}. Please ensure @lvce-editor/ripgrep is properly installed.`,
),
);
});
child.on('close', (code) => {
options.signal.removeEventListener('abort', cleanup);
const stdoutData = Buffer.concat(stdoutChunks).toString('utf8');
const stderrData = Buffer.concat(stderrChunks).toString('utf8');
if (code === 0) {
resolve(stdoutData);
} else if (code === 1) {
resolve(''); // No matches found
} else {
reject(
new Error(`ripgrep exited with code ${code}: ${stderrData}`),
);
}
});
});
return this.parseRipgrepJsonOutput(output, absolutePath);
} catch (error: unknown) {
debugLogger.debug(`GrepLogic: ripgrep failed: ${getErrorMessage(error)}`);
throw error;
}
}
/**
* Gets a description of the grep operation
* @param params Parameters for the grep operation
* @returns A string describing the grep
*/
getDescription(): string {
let description = `'${this.params.pattern}'`;
if (this.params.include) {
description += ` in ${this.params.include}`;
}
const pathParam = this.params.dir_path || '.';
const resolvedPath = path.resolve(this.config.getTargetDir(), pathParam);
if (resolvedPath === this.config.getTargetDir() || pathParam === '.') {
description += ` within ./`;
} else {
const relativePath = makeRelative(
resolvedPath,
this.config.getTargetDir(),
);
description += ` within ${shortenPath(relativePath)}`;
}
return description;
}
}
/**
* Implementation of the Grep tool logic (moved from CLI)
*/
export class RipGrepTool extends BaseDeclarativeTool<
RipGrepToolParams,
ToolResult
> {
static readonly Name = GREP_TOOL_NAME;
private readonly geminiIgnoreParser: GeminiIgnoreParser;
constructor(
private readonly config: Config,
messageBus: MessageBus,
) {
super(
RipGrepTool.Name,
'SearchText',
'FAST, optimized search powered by `ripgrep`. PREFERRED over standard `run_shell_command("grep ...")` due to better performance and automatic output limiting (max 20k matches).',
Kind.Search,
{
properties: {
pattern: {
description:
"The pattern to search for. By default, treated as a Rust-flavored regular expression. Use '\\b' for precise symbol matching (e.g., '\\bMatchMe\\b').",
type: 'string',
},
dir_path: {
description:
"Directory or file to search. Directories are searched recursively. Relative paths are resolved against current working directory. Defaults to current working directory ('.') if omitted.",
type: 'string',
},
include: {
description:
"Glob pattern to filter files (e.g., '*.ts', 'src/**'). Recommended for large repositories to reduce noise. Defaults to all files if omitted.",
type: 'string',
},
case_sensitive: {
description:
'If true, search is case-sensitive. Defaults to false (ignore case) if omitted.',
type: 'boolean',
},
fixed_strings: {
description:
'If true, treats the `pattern` as a literal string instead of a regular expression. Defaults to false (basic regex) if omitted.',
type: 'boolean',
},
context: {
description:
'Show this many lines of context around each match (equivalent to grep -C). Defaults to 0 if omitted.',
type: 'integer',
},
after: {
description:
'Show this many lines after each match (equivalent to grep -A). Defaults to 0 if omitted.',
type: 'integer',
},
before: {
description:
'Show this many lines before each match (equivalent to grep -B). Defaults to 0 if omitted.',
type: 'integer',
},
no_ignore: {
description:
'If true, searches all files including those usually ignored (like in .gitignore, build/, dist/, etc). Defaults to false if omitted.',
type: 'boolean',
},
},
required: ['pattern'],
type: 'object',
},
messageBus,
true, // isOutputMarkdown
false, // canUpdateOutput
);
this.geminiIgnoreParser = new GeminiIgnoreParser(config.getTargetDir());
}
/**
* Validates the parameters for the tool
* @param params Parameters to validate
* @returns An error message string if invalid, null otherwise
*/
override validateToolParams(params: RipGrepToolParams): string | null {
const errors = SchemaValidator.validate(
this.schema.parametersJsonSchema,
params,
);
if (errors) {
return errors;
}
// Only validate path if one is provided
if (params.dir_path) {
try {
resolveAndValidatePath(this.config, params.dir_path);
} catch (error) {
return getErrorMessage(error);
}
}
return null; // Parameters are valid
}
protected createInvocation(
params: RipGrepToolParams,
messageBus: MessageBus,
_toolName?: string,
_toolDisplayName?: string,
): ToolInvocation<RipGrepToolParams, ToolResult> {
return new GrepToolInvocation(
this.config,
this.geminiIgnoreParser,
params,
messageBus ?? this.messageBus,
_toolName,
_toolDisplayName,
);
}
}