mirror of
https://github.com/google-gemini/gemini-cli.git
synced 2026-04-27 05:24:34 -07:00
Use ranged reads and limited searches and fuzzy editing improvements (#19240)
This commit is contained in:
committed by
GitHub
parent
55c628e967
commit
ce84b3cb5f
@@ -373,6 +373,182 @@ describe('EditTool', () => {
|
||||
expect(result.occurrences).toBe(1);
|
||||
});
|
||||
|
||||
it('should perform a fuzzy replacement when exact match fails but similarity is high', async () => {
|
||||
const content =
|
||||
'const myConfig = {\n enableFeature: true,\n retries: 3\n};';
|
||||
// Typo: missing comma after true
|
||||
const oldString =
|
||||
'const myConfig = {\n enableFeature: true\n retries: 3\n};';
|
||||
const newString =
|
||||
'const myConfig = {\n enableFeature: false,\n retries: 5\n};';
|
||||
|
||||
const result = await calculateReplacement(mockConfig, {
|
||||
params: {
|
||||
file_path: 'config.ts',
|
||||
instruction: 'update config',
|
||||
old_string: oldString,
|
||||
new_string: newString,
|
||||
},
|
||||
currentContent: content,
|
||||
abortSignal,
|
||||
});
|
||||
|
||||
expect(result.occurrences).toBe(1);
|
||||
expect(result.newContent).toBe(newString);
|
||||
});
|
||||
|
||||
it('should NOT perform a fuzzy replacement when similarity is below threshold', async () => {
|
||||
const content =
|
||||
'const myConfig = {\n enableFeature: true,\n retries: 3\n};';
|
||||
// Completely different string
|
||||
const oldString = 'function somethingElse() {\n return false;\n}';
|
||||
const newString =
|
||||
'const myConfig = {\n enableFeature: false,\n retries: 5\n};';
|
||||
|
||||
const result = await calculateReplacement(mockConfig, {
|
||||
params: {
|
||||
file_path: 'config.ts',
|
||||
instruction: 'update config',
|
||||
old_string: oldString,
|
||||
new_string: newString,
|
||||
},
|
||||
currentContent: content,
|
||||
abortSignal,
|
||||
});
|
||||
|
||||
expect(result.occurrences).toBe(0);
|
||||
expect(result.newContent).toBe(content);
|
||||
});
|
||||
|
||||
it('should NOT perform a fuzzy replacement when the complexity (length * size) is too high', async () => {
|
||||
// 2000 chars
|
||||
const longString = 'a'.repeat(2000);
|
||||
|
||||
// Create a file with enough lines to trigger the complexity limit
|
||||
// Complexity = Lines * Length^2
|
||||
// Threshold = 500,000,000
|
||||
// 2000^2 = 4,000,000.
|
||||
// Need > 125 lines. Let's use 200 lines.
|
||||
const lines = Array(200).fill(longString);
|
||||
const content = lines.join('\n');
|
||||
|
||||
// Mismatch at the end (making it a fuzzy match candidate)
|
||||
const oldString = longString + 'c';
|
||||
const newString = 'replacement';
|
||||
|
||||
const result = await calculateReplacement(mockConfig, {
|
||||
params: {
|
||||
file_path: 'test.ts',
|
||||
instruction: 'update',
|
||||
old_string: oldString,
|
||||
new_string: newString,
|
||||
},
|
||||
currentContent: content,
|
||||
abortSignal,
|
||||
});
|
||||
|
||||
// Should return 0 occurrences because fuzzy match is skipped
|
||||
expect(result.occurrences).toBe(0);
|
||||
expect(result.newContent).toBe(content);
|
||||
});
|
||||
|
||||
it('should perform multiple fuzzy replacements if multiple valid matches are found', async () => {
|
||||
const content = `
|
||||
function doIt() {
|
||||
console.log("hello");
|
||||
}
|
||||
|
||||
function doIt() {
|
||||
console.log("hello");
|
||||
}
|
||||
`;
|
||||
// old_string uses single quotes, file uses double.
|
||||
// This is a fuzzy match (quote difference).
|
||||
const oldString = `
|
||||
function doIt() {
|
||||
console.log('hello');
|
||||
}
|
||||
`.trim();
|
||||
|
||||
const newString = `
|
||||
function doIt() {
|
||||
console.log("bye");
|
||||
}
|
||||
`.trim();
|
||||
|
||||
const result = await calculateReplacement(mockConfig, {
|
||||
params: {
|
||||
file_path: 'test.ts',
|
||||
instruction: 'update',
|
||||
old_string: oldString,
|
||||
new_string: newString,
|
||||
},
|
||||
currentContent: content,
|
||||
abortSignal,
|
||||
});
|
||||
|
||||
expect(result.occurrences).toBe(2);
|
||||
const expectedContent = `
|
||||
function doIt() {
|
||||
console.log("bye");
|
||||
}
|
||||
|
||||
function doIt() {
|
||||
console.log("bye");
|
||||
}
|
||||
`;
|
||||
expect(result.newContent).toBe(expectedContent);
|
||||
});
|
||||
|
||||
it('should correctly rebase indentation in flexible replacement without double-indenting', async () => {
|
||||
const content = ' if (a) {\n foo();\n }\n';
|
||||
// old_string and new_string are unindented. They should be rebased to 4-space.
|
||||
const oldString = 'if (a) {\n foo();\n}';
|
||||
const newString = 'if (a) {\n bar();\n}';
|
||||
|
||||
const result = await calculateReplacement(mockConfig, {
|
||||
params: {
|
||||
file_path: 'test.ts',
|
||||
old_string: oldString,
|
||||
new_string: newString,
|
||||
},
|
||||
currentContent: content,
|
||||
abortSignal,
|
||||
});
|
||||
|
||||
expect(result.occurrences).toBe(1);
|
||||
// foo() was at 8 spaces (4 base + 4 indent).
|
||||
// newString has bar() at 4 spaces (0 base + 4 indent).
|
||||
// Rebased to 4 base, it should be 4 + 4 = 8 spaces.
|
||||
const expectedContent = ' if (a) {\n bar();\n }\n';
|
||||
expect(result.newContent).toBe(expectedContent);
|
||||
});
|
||||
|
||||
it('should correctly rebase indentation in fuzzy replacement without double-indenting', async () => {
|
||||
const content =
|
||||
' const myConfig = {\n enableFeature: true,\n retries: 3\n };';
|
||||
// Typo: missing comma. old_string/new_string are unindented.
|
||||
const fuzzyOld =
|
||||
'const myConfig = {\n enableFeature: true\n retries: 3\n};';
|
||||
const fuzzyNew =
|
||||
'const myConfig = {\n enableFeature: false,\n retries: 5\n};';
|
||||
|
||||
const result = await calculateReplacement(mockConfig, {
|
||||
params: {
|
||||
file_path: 'test.ts',
|
||||
old_string: fuzzyOld,
|
||||
new_string: fuzzyNew,
|
||||
},
|
||||
currentContent: content,
|
||||
abortSignal,
|
||||
});
|
||||
|
||||
expect(result.strategy).toBe('fuzzy');
|
||||
const expectedContent =
|
||||
' const myConfig = {\n enableFeature: false,\n retries: 5\n };';
|
||||
expect(result.newContent).toBe(expectedContent);
|
||||
});
|
||||
|
||||
it('should NOT insert extra newlines when replacing a block preceded by a blank line (regression)', async () => {
|
||||
const content = '\n function oldFunc() {\n // some code\n }';
|
||||
const result = await calculateReplacement(mockConfig, {
|
||||
|
||||
@@ -49,8 +49,13 @@ import {
|
||||
EDIT_DISPLAY_NAME,
|
||||
} from './tool-names.js';
|
||||
import { debugLogger } from '../utils/debugLogger.js';
|
||||
import levenshtein from 'fast-levenshtein';
|
||||
import { EDIT_DEFINITION } from './definitions/coreTools.js';
|
||||
import { resolveToolDeclaration } from './definitions/resolver.js';
|
||||
|
||||
const ENABLE_FUZZY_MATCH_RECOVERY = true;
|
||||
const FUZZY_MATCH_THRESHOLD = 0.1; // Allow up to 10% weighted difference
|
||||
const WHITESPACE_PENALTY_FACTOR = 0.1; // Whitespace differences cost 10% of a character difference
|
||||
interface ReplacementContext {
|
||||
params: EditToolParams;
|
||||
currentContent: string;
|
||||
@@ -62,6 +67,8 @@ interface ReplacementResult {
|
||||
occurrences: number;
|
||||
finalOldString: string;
|
||||
finalNewString: string;
|
||||
strategy?: 'exact' | 'flexible' | 'regex' | 'fuzzy';
|
||||
matchRanges?: Array<{ start: number; end: number }>;
|
||||
}
|
||||
|
||||
export function applyReplacement(
|
||||
@@ -176,9 +183,7 @@ async function calculateFlexibleReplacement(
|
||||
const firstLineInMatch = window[0];
|
||||
const indentationMatch = firstLineInMatch.match(/^([ \t]*)/);
|
||||
const indentation = indentationMatch ? indentationMatch[1] : '';
|
||||
const newBlockWithIndent = replaceLines.map(
|
||||
(line: string) => `${indentation}${line}`,
|
||||
);
|
||||
const newBlockWithIndent = applyIndentation(replaceLines, indentation);
|
||||
sourceLines.splice(
|
||||
i,
|
||||
searchLinesStripped.length,
|
||||
@@ -247,9 +252,7 @@ async function calculateRegexReplacement(
|
||||
|
||||
const indentation = match[1] || '';
|
||||
const newLines = normalizedReplace.split('\n');
|
||||
const newBlockWithIndent = newLines
|
||||
.map((line) => `${indentation}${line}`)
|
||||
.join('\n');
|
||||
const newBlockWithIndent = applyIndentation(newLines, indentation).join('\n');
|
||||
|
||||
// Use replace with the regex to substitute the matched content.
|
||||
// Since the regex doesn't have the 'g' flag, it will only replace the first occurrence.
|
||||
@@ -305,6 +308,14 @@ export async function calculateReplacement(
|
||||
return regexResult;
|
||||
}
|
||||
|
||||
let fuzzyResult;
|
||||
if (
|
||||
ENABLE_FUZZY_MATCH_RECOVERY &&
|
||||
(fuzzyResult = await calculateFuzzyReplacement(config, context))
|
||||
) {
|
||||
return fuzzyResult;
|
||||
}
|
||||
|
||||
return {
|
||||
newContent: currentContent,
|
||||
occurrences: 0,
|
||||
@@ -395,6 +406,8 @@ interface CalculatedEdit {
|
||||
error?: { display: string; raw: string; type: ToolErrorType };
|
||||
isNewFile: boolean;
|
||||
originalLineEnding: '\r\n' | '\n';
|
||||
strategy?: 'exact' | 'flexible' | 'regex' | 'fuzzy';
|
||||
matchRanges?: Array<{ start: number; end: number }>;
|
||||
}
|
||||
|
||||
class EditToolInvocation
|
||||
@@ -520,6 +533,8 @@ class EditToolInvocation
|
||||
isNewFile: false,
|
||||
error: undefined,
|
||||
originalLineEnding,
|
||||
strategy: secondAttemptResult.strategy,
|
||||
matchRanges: secondAttemptResult.matchRanges,
|
||||
};
|
||||
}
|
||||
|
||||
@@ -633,6 +648,8 @@ class EditToolInvocation
|
||||
isNewFile: false,
|
||||
error: undefined,
|
||||
originalLineEnding,
|
||||
strategy: replacementResult.strategy,
|
||||
matchRanges: replacementResult.matchRanges,
|
||||
};
|
||||
}
|
||||
|
||||
@@ -859,6 +876,10 @@ class EditToolInvocation
|
||||
? `Created new file: ${this.params.file_path} with provided content.`
|
||||
: `Successfully modified file: ${this.params.file_path} (${editData.occurrences} replacements).`,
|
||||
];
|
||||
const fuzzyFeedback = getFuzzyMatchFeedback(editData);
|
||||
if (fuzzyFeedback) {
|
||||
llmSuccessMessageParts.push(fuzzyFeedback);
|
||||
}
|
||||
if (this.params.modified_by_user) {
|
||||
llmSuccessMessageParts.push(
|
||||
`User modified the \`new_string\` content to be: ${this.params.new_string}.`,
|
||||
@@ -1011,3 +1032,188 @@ export class EditTool
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
function stripWhitespace(str: string): string {
|
||||
return str.replace(/\s/g, '');
|
||||
}
|
||||
|
||||
/**
|
||||
* Applies the target indentation to the lines, while preserving relative indentation.
|
||||
* It identifies the common indentation of the provided lines and replaces it with the target indentation.
|
||||
*/
|
||||
function applyIndentation(
|
||||
lines: string[],
|
||||
targetIndentation: string,
|
||||
): string[] {
|
||||
if (lines.length === 0) return [];
|
||||
|
||||
// Use the first line as the reference for indentation, even if it's empty/whitespace.
|
||||
// This is because flexible/fuzzy matching identifies the indentation of the START of the match.
|
||||
const referenceLine = lines[0];
|
||||
const refIndentMatch = referenceLine.match(/^([ \t]*)/);
|
||||
const refIndent = refIndentMatch ? refIndentMatch[1] : '';
|
||||
|
||||
return lines.map((line) => {
|
||||
if (line.trim() === '') {
|
||||
return '';
|
||||
}
|
||||
if (line.startsWith(refIndent)) {
|
||||
return targetIndentation + line.slice(refIndent.length);
|
||||
}
|
||||
return targetIndentation + line.trimStart();
|
||||
});
|
||||
}
|
||||
|
||||
function getFuzzyMatchFeedback(editData: CalculatedEdit): string | null {
|
||||
if (
|
||||
editData.strategy === 'fuzzy' &&
|
||||
editData.matchRanges &&
|
||||
editData.matchRanges.length > 0
|
||||
) {
|
||||
const ranges = editData.matchRanges
|
||||
.map((r) => (r.start === r.end ? `${r.start}` : `${r.start}-${r.end}`))
|
||||
.join(', ');
|
||||
return `Applied fuzzy match at line${editData.matchRanges.length > 1 ? 's' : ''} ${ranges}.`;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
async function calculateFuzzyReplacement(
|
||||
config: Config,
|
||||
context: ReplacementContext,
|
||||
): Promise<ReplacementResult | null> {
|
||||
const { currentContent, params } = context;
|
||||
const { old_string, new_string } = params;
|
||||
|
||||
// Pre-check: Don't fuzzy match very short strings to avoid false positives
|
||||
if (old_string.length < 10) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const normalizedCode = currentContent.replace(/\r\n/g, '\n');
|
||||
const normalizedSearch = old_string.replace(/\r\n/g, '\n');
|
||||
const normalizedReplace = new_string.replace(/\r\n/g, '\n');
|
||||
|
||||
const sourceLines = normalizedCode.match(/.*(?:\n|$)/g)?.slice(0, -1) ?? [];
|
||||
const searchLines = normalizedSearch
|
||||
.match(/.*(?:\n|$)/g)
|
||||
?.slice(0, -1)
|
||||
.map((l) => l.trimEnd()); // Trim end of search lines to be more robust
|
||||
|
||||
// Limit the scope of the fuzzy match to reduce impact on responsivesness.
|
||||
// Each comparison takes roughly O(L^2) time.
|
||||
// We perform sourceLines.length comparisons (sliding window).
|
||||
// Total complexity proxy: sourceLines.length * old_string.length^2
|
||||
// Limit to 4e8 for < 1 second.
|
||||
if (sourceLines.length * Math.pow(old_string.length, 2) > 400_000_000) {
|
||||
return null;
|
||||
}
|
||||
|
||||
if (!searchLines || searchLines.length === 0) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const N = searchLines.length;
|
||||
const candidates: Array<{ index: number; score: number }> = [];
|
||||
const searchBlock = searchLines.join('\n');
|
||||
|
||||
// Sliding window
|
||||
for (let i = 0; i <= sourceLines.length - N; i++) {
|
||||
const windowLines = sourceLines.slice(i, i + N);
|
||||
const windowText = windowLines.map((l) => l.trimEnd()).join('\n'); // Normalized join for comparison
|
||||
|
||||
// Length Heuristic Optimization
|
||||
const lengthDiff = Math.abs(windowText.length - searchBlock.length);
|
||||
if (
|
||||
lengthDiff / searchBlock.length >
|
||||
FUZZY_MATCH_THRESHOLD / WHITESPACE_PENALTY_FACTOR
|
||||
) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Tiered Scoring
|
||||
const d_raw = levenshtein.get(windowText, searchBlock);
|
||||
const d_norm = levenshtein.get(
|
||||
stripWhitespace(windowText),
|
||||
stripWhitespace(searchBlock),
|
||||
);
|
||||
|
||||
const weightedDist = d_norm + (d_raw - d_norm) * WHITESPACE_PENALTY_FACTOR;
|
||||
const score = weightedDist / searchBlock.length;
|
||||
|
||||
if (score <= FUZZY_MATCH_THRESHOLD) {
|
||||
candidates.push({ index: i, score });
|
||||
}
|
||||
}
|
||||
|
||||
if (candidates.length === 0) {
|
||||
return null;
|
||||
}
|
||||
|
||||
// Select best non-overlapping matches
|
||||
// Sort by score ascending. If scores equal, prefer earlier index (stable sort).
|
||||
candidates.sort((a, b) => a.score - b.score || a.index - b.index);
|
||||
|
||||
const selectedMatches: Array<{ index: number; score: number }> = [];
|
||||
for (const candidate of candidates) {
|
||||
// Check for overlap with already selected matches
|
||||
// Two windows overlap if their start indices are within N lines of each other
|
||||
// (Assuming window size N. Actually overlap is |i - j| < N)
|
||||
const overlaps = selectedMatches.some(
|
||||
(m) => Math.abs(m.index - candidate.index) < N,
|
||||
);
|
||||
if (!overlaps) {
|
||||
selectedMatches.push(candidate);
|
||||
}
|
||||
}
|
||||
|
||||
// If we found matches, apply them
|
||||
if (selectedMatches.length > 0) {
|
||||
const event = new EditStrategyEvent('fuzzy');
|
||||
logEditStrategy(config, event);
|
||||
|
||||
// Calculate match ranges before sorting for replacement
|
||||
// Indices in selectedMatches are 0-based line indices
|
||||
const matchRanges = selectedMatches
|
||||
.map((m) => ({ start: m.index + 1, end: m.index + N }))
|
||||
.sort((a, b) => a.start - b.start);
|
||||
|
||||
// Sort matches by index descending to apply replacements from bottom to top
|
||||
// so that indices remain valid
|
||||
selectedMatches.sort((a, b) => b.index - a.index);
|
||||
|
||||
const newLines = normalizedReplace.split('\n');
|
||||
|
||||
for (const match of selectedMatches) {
|
||||
// If we want to preserve the indentation of the first line of the match:
|
||||
const firstLineMatch = sourceLines[match.index];
|
||||
const indentationMatch = firstLineMatch.match(/^([ \t]*)/);
|
||||
const indentation = indentationMatch ? indentationMatch[1] : '';
|
||||
|
||||
const indentedReplaceLines = applyIndentation(newLines, indentation);
|
||||
|
||||
let replacementText = indentedReplaceLines.join('\n');
|
||||
// If the last line of the match had a newline, preserve it in the replacement
|
||||
// to avoid merging with the next line or losing a blank line separator.
|
||||
if (sourceLines[match.index + N - 1].endsWith('\n')) {
|
||||
replacementText += '\n';
|
||||
}
|
||||
|
||||
sourceLines.splice(match.index, N, replacementText);
|
||||
}
|
||||
|
||||
let modifiedCode = sourceLines.join('');
|
||||
modifiedCode = restoreTrailingNewline(currentContent, modifiedCode);
|
||||
|
||||
return {
|
||||
newContent: modifiedCode,
|
||||
occurrences: selectedMatches.length,
|
||||
finalOldString: normalizedSearch,
|
||||
finalNewString: normalizedReplace,
|
||||
strategy: 'fuzzy',
|
||||
matchRanges,
|
||||
};
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
@@ -235,8 +235,8 @@ describe('LSTool', () => {
|
||||
|
||||
expect(entries[0]).toBe('[DIR] x-dir');
|
||||
expect(entries[1]).toBe('[DIR] y-dir');
|
||||
expect(entries[2]).toBe('a-file.txt');
|
||||
expect(entries[3]).toBe('b-file.txt');
|
||||
expect(entries[2]).toBe('a-file.txt (8 bytes)');
|
||||
expect(entries[3]).toBe('b-file.txt (8 bytes)');
|
||||
});
|
||||
|
||||
it('should handle permission errors gracefully', async () => {
|
||||
|
||||
@@ -241,7 +241,12 @@ class LSToolInvocation extends BaseToolInvocation<LSToolParams, ToolResult> {
|
||||
|
||||
// Create formatted content for LLM
|
||||
const directoryContent = entries
|
||||
.map((entry) => `${entry.isDirectory ? '[DIR] ' : ''}${entry.name}`)
|
||||
.map((entry) => {
|
||||
if (entry.isDirectory) {
|
||||
return `[DIR] ${entry.name}`;
|
||||
}
|
||||
return `${entry.name} (${entry.size} bytes)`;
|
||||
})
|
||||
.join('\n');
|
||||
|
||||
let resultMessage = `Directory listing for ${resolvedDirPath}:\n${directoryContent}`;
|
||||
|
||||
Reference in New Issue
Block a user