/** * @license * Copyright 2026 Google LLC * SPDX-License-Identifier: Apache-2.0 */ import fsPromises from 'node:fs/promises'; import { debugLogger } from '../utils/debugLogger.js'; import { MAX_LINE_LENGTH_TEXT_FILE } from '../utils/constants.js'; /** * Result object for a single grep match */ export interface GrepMatch { filePath: string; absolutePath: string; lineNumber: number; line: string; isContext?: boolean; } /** * Groups matches by their file path and ensures they are sorted by line number. */ export function groupMatchesByFile( allMatches: GrepMatch[], ): Record { const groups: Record = {}; for (const match of allMatches) { if (!groups[match.filePath]) { groups[match.filePath] = []; } groups[match.filePath].push(match); } for (const filePath in groups) { groups[filePath].sort((a, b) => a.lineNumber - b.lineNumber); } return groups; } /** * Reads the content of a file and splits it into lines. * Returns null if the file cannot be read. */ export async function readFileLines( absolutePath: string, ): Promise { try { const content = await fsPromises.readFile(absolutePath, 'utf8'); return content.split(/\r?\n/); } catch (err) { debugLogger.warn(`Failed to read file for context: ${absolutePath}`, err); return null; } } /** * Automatically enriches grep results with surrounding context if the match count is low * and no specific context was requested. This optimization can enable the agent * to skip turns that would be spent reading files after grep calls. */ export async function enrichWithAutoContext( matchesByFile: Record, matchCount: number, params: { names_only?: boolean; context?: number; before?: number; after?: number; }, ): Promise { const { names_only, context, before, after } = params; if ( matchCount >= 1 && matchCount <= 3 && !names_only && context === undefined && before === undefined && after === undefined ) { const contextLines = matchCount === 1 ? 50 : 15; for (const filePath in matchesByFile) { const fileMatches = matchesByFile[filePath]; if (fileMatches.length === 0) continue; const fileLines = await readFileLines(fileMatches[0].absolutePath); if (fileLines) { const newFileMatches: GrepMatch[] = []; const seenLines = new Set(); // Sort matches to process them in order fileMatches.sort((a, b) => a.lineNumber - b.lineNumber); for (const match of fileMatches) { const startLine = Math.max(0, match.lineNumber - 1 - contextLines); const endLine = Math.min( fileLines.length, match.lineNumber - 1 + contextLines + 1, ); for (let i = startLine; i < endLine; i++) { const lineNum = i + 1; if (!seenLines.has(lineNum)) { newFileMatches.push({ absolutePath: match.absolutePath, filePath: match.filePath, lineNumber: lineNum, line: fileLines[i], isContext: lineNum !== match.lineNumber, }); seenLines.add(lineNum); } else if (lineNum === match.lineNumber) { const existing = newFileMatches.find( (m) => m.lineNumber === lineNum, ); if (existing) { existing.isContext = false; } } } } matchesByFile[filePath] = newFileMatches.sort( (a, b) => a.lineNumber - b.lineNumber, ); } } } } /** * Formats the grep results for the LLM, including optional context. */ export async function formatGrepResults( allMatches: GrepMatch[], params: { pattern: string; names_only?: boolean; include_pattern?: string; // Context params to determine if auto-context should be skipped context?: number; before?: number; after?: number; }, searchLocationDescription: string, totalMaxMatches: number, ): Promise<{ llmContent: string; returnDisplay: string }> { const { pattern, names_only, include_pattern } = params; if (allMatches.length === 0) { const noMatchMsg = `No matches found for pattern "${pattern}" ${searchLocationDescription}${include_pattern ? ` (filter: "${include_pattern}")` : ''}.`; return { llmContent: noMatchMsg, returnDisplay: `No matches found` }; } const matchesByFile = groupMatchesByFile(allMatches); const matchesOnly = allMatches.filter((m) => !m.isContext); const matchCount = matchesOnly.length; // Count actual matches, not context lines const matchTerm = matchCount === 1 ? 'match' : 'matches'; // If the result count is low and Gemini didn't request before/after lines of context // add a small amount anyways to enable the agent to avoid one or more extra turns // reading the matched files. This optimization reduces turns count by ~10% in SWEBench. await enrichWithAutoContext(matchesByFile, matchCount, params); const wasTruncated = matchCount >= totalMaxMatches; if (names_only) { const filePaths = Object.keys(matchesByFile).sort(); let llmContent = `Found ${filePaths.length} files with matches for pattern "${pattern}" ${searchLocationDescription}${ include_pattern ? ` (filter: "${include_pattern}")` : '' }${ wasTruncated ? ` (results limited to ${totalMaxMatches} matches for performance)` : '' }:\n`; llmContent += filePaths.join('\n'); return { llmContent: llmContent.trim(), returnDisplay: `Found ${filePaths.length} files${wasTruncated ? ' (limited)' : ''}`, }; } let llmContent = `Found ${matchCount} ${matchTerm} for pattern "${pattern}" ${searchLocationDescription}${include_pattern ? ` (filter: "${include_pattern}")` : ''}`; if (wasTruncated) { llmContent += ` (results limited to ${totalMaxMatches} matches for performance)`; } llmContent += `:\n---\n`; for (const filePath in matchesByFile) { llmContent += `File: ${filePath}\n`; matchesByFile[filePath].forEach((match) => { // If isContext is undefined, assume it's a match (false) const separator = match.isContext ? '-' : ':'; // trimEnd to avoid double newlines if line has them, but we want to preserve indentation let lineContent = match.line.trimEnd(); const graphemes = Array.from(lineContent); if (graphemes.length > MAX_LINE_LENGTH_TEXT_FILE) { lineContent = graphemes.slice(0, MAX_LINE_LENGTH_TEXT_FILE).join('') + '... [truncated]'; } llmContent += `L${match.lineNumber}${separator} ${lineContent}\n`; }); llmContent += '---\n'; } return { llmContent: llmContent.trim(), returnDisplay: `Found ${matchCount} ${matchTerm}${ wasTruncated ? ' (limited)' : '' }`, }; }