Avoid overaggressive unescaping (#20520)

This commit is contained in:
Tommaso Sciortino
2026-02-26 17:50:21 -08:00
committed by GitHub
parent ecfa4e0437
commit 4b7ce1fe67
6 changed files with 110 additions and 1369 deletions

View File

@@ -67,6 +67,7 @@ describe('Tool Confirmation Policy Updates', () => {
getBaseLlmClient: () => ({}),
getDisableLLMCorrection: () => true,
getIdeMode: () => false,
getActiveModel: () => 'test-model',
getWorkspaceContext: () => ({
isPathWithinWorkspace: () => true,
getDirectories: () => [rootDir],

View File

@@ -25,10 +25,7 @@ import fs from 'node:fs';
import os from 'node:os';
import { GeminiClient } from '../core/client.js';
import type { BaseLlmClient } from '../core/baseLlmClient.js';
import {
ensureCorrectEdit,
ensureCorrectFileContent,
} from '../utils/editCorrector.js';
import { ensureCorrectFileContent } from '../utils/editCorrector.js';
import { StandardFileSystemService } from '../services/fileSystemService.js';
import { WorkspaceContext } from '../utils/workspaceContext.js';
import {
@@ -52,7 +49,6 @@ vi.mock('../ide/ide-client.js', () => ({
let mockGeminiClientInstance: Mocked<GeminiClient>;
let mockBaseLlmClientInstance: Mocked<BaseLlmClient>;
const mockEnsureCorrectEdit = vi.fn<typeof ensureCorrectEdit>();
const mockEnsureCorrectFileContent = vi.fn<typeof ensureCorrectFileContent>();
// Mock Config
@@ -81,6 +77,7 @@ const mockConfigInternal = {
getGeminiMdFileCount: () => 0,
setGeminiMdFileCount: vi.fn(),
getDisableLLMCorrection: vi.fn(() => false),
getActiveModel: () => 'test-model',
validatePathAccess: vi.fn().mockReturnValue(null),
getToolRegistry: () =>
({
@@ -120,7 +117,6 @@ describe('Line Ending Preservation', () => {
generateJson: vi.fn(),
} as unknown as Mocked<BaseLlmClient>;
vi.mocked(ensureCorrectEdit).mockImplementation(mockEnsureCorrectEdit);
vi.mocked(ensureCorrectFileContent).mockImplementation(
mockEnsureCorrectFileContent,
);
@@ -177,14 +173,7 @@ describe('Line Ending Preservation', () => {
const proposedContent = 'line1\nline2\nline3\n';
// Mock corrections to return proposed content as-is (but usually normalized)
mockEnsureCorrectEdit.mockResolvedValue({
params: {
file_path: filePath,
old_string: originalContent,
new_string: proposedContent,
},
occurrences: 1,
});
mockEnsureCorrectFileContent.mockResolvedValue(proposedContent);
const params = { file_path: filePath, content: proposedContent };
const invocation = tool.build(params);

View File

@@ -23,7 +23,6 @@ import type {
ToolResult,
} from './tools.js';
import { ToolConfirmationOutcome } from './tools.js';
import { type EditToolParams } from './edit.js';
import type { Config } from '../config/config.js';
import { ApprovalMode } from '../policy/types.js';
import type { ToolRegistry } from './tool-registry.js';
@@ -33,11 +32,7 @@ import fs from 'node:fs';
import os from 'node:os';
import { GeminiClient } from '../core/client.js';
import type { BaseLlmClient } from '../core/baseLlmClient.js';
import type { CorrectedEditResult } from '../utils/editCorrector.js';
import {
ensureCorrectEdit,
ensureCorrectFileContent,
} from '../utils/editCorrector.js';
import { ensureCorrectFileContent } from '../utils/editCorrector.js';
import { StandardFileSystemService } from '../services/fileSystemService.js';
import type { DiffUpdateResult } from '../ide/ide-client.js';
import { IdeClient } from '../ide/ide-client.js';
@@ -61,7 +56,6 @@ vi.mock('../ide/ide-client.js', () => ({
let mockGeminiClientInstance: Mocked<GeminiClient>;
let mockBaseLlmClientInstance: Mocked<BaseLlmClient>;
let mockConfig: Config;
const mockEnsureCorrectEdit = vi.fn<typeof ensureCorrectEdit>();
const mockEnsureCorrectFileContent = vi.fn<typeof ensureCorrectFileContent>();
const mockIdeClient = {
openDiff: vi.fn(),
@@ -69,7 +63,6 @@ const mockIdeClient = {
};
// Wire up the mocked functions to be used by the actual module imports
vi.mocked(ensureCorrectEdit).mockImplementation(mockEnsureCorrectEdit);
vi.mocked(ensureCorrectFileContent).mockImplementation(
mockEnsureCorrectFileContent,
);
@@ -110,6 +103,7 @@ const mockConfigInternal = {
}) as unknown as ToolRegistry,
isInteractive: () => false,
getDisableLLMCorrection: vi.fn(() => true),
getActiveModel: () => 'test-model',
storage: {
getProjectTempDir: vi.fn().mockReturnValue('/tmp/project'),
},
@@ -179,7 +173,6 @@ describe('WriteFileTool', () => {
generateJson: vi.fn(),
} as unknown as Mocked<BaseLlmClient>;
vi.mocked(ensureCorrectEdit).mockImplementation(mockEnsureCorrectEdit);
vi.mocked(ensureCorrectFileContent).mockImplementation(
mockEnsureCorrectFileContent,
);
@@ -199,28 +192,9 @@ describe('WriteFileTool', () => {
// Reset mocks before each test
mockConfigInternal.getApprovalMode.mockReturnValue(ApprovalMode.DEFAULT);
mockConfigInternal.setApprovalMode.mockClear();
mockEnsureCorrectEdit.mockReset();
mockEnsureCorrectFileContent.mockReset();
// Default mock implementations that return valid structures
mockEnsureCorrectEdit.mockImplementation(
async (
filePath: string,
_currentContent: string,
params: EditToolParams,
_client: GeminiClient,
_baseClient: BaseLlmClient,
signal?: AbortSignal,
): Promise<CorrectedEditResult> => {
if (signal?.aborted) {
return Promise.reject(new Error('Aborted'));
}
return Promise.resolve({
params: { ...params, new_string: params.new_string ?? '' },
occurrences: 1,
});
},
);
mockEnsureCorrectFileContent.mockImplementation(
async (
content: string,
@@ -369,15 +343,43 @@ describe('WriteFileTool', () => {
mockBaseLlmClientInstance,
abortSignal,
true,
true, // aggressiveUnescape
);
expect(mockEnsureCorrectEdit).not.toHaveBeenCalled();
expect(result.correctedContent).toBe(correctedContent);
expect(result.originalContent).toBe('');
expect(result.fileExists).toBe(false);
expect(result.error).toBeUndefined();
});
it('should call ensureCorrectEdit for an existing file', async () => {
it('should set aggressiveUnescape to false for gemini-3 models', async () => {
const filePath = path.join(rootDir, 'gemini3_file.txt');
const proposedContent = 'Proposed new content.';
const abortSignal = new AbortController().signal;
const mockGemini3Config = {
...mockConfig,
getActiveModel: () => 'gemini-3.0-pro',
} as unknown as Config;
mockEnsureCorrectFileContent.mockResolvedValue('Corrected new content.');
await getCorrectedFileContent(
mockGemini3Config,
filePath,
proposedContent,
abortSignal,
);
expect(mockEnsureCorrectFileContent).toHaveBeenCalledWith(
proposedContent,
mockBaseLlmClientInstance,
abortSignal,
true,
false, // aggressiveUnescape
);
});
it('should call ensureCorrectFileContent for an existing file', async () => {
const filePath = path.join(rootDir, 'existing_corrected_file.txt');
const originalContent = 'Original existing content.';
const proposedContent = 'Proposed replacement content.';
@@ -386,14 +388,7 @@ describe('WriteFileTool', () => {
fs.writeFileSync(filePath, originalContent, 'utf8');
// Ensure this mock is active and returns the correct structure
mockEnsureCorrectEdit.mockResolvedValue({
params: {
file_path: filePath,
old_string: originalContent,
new_string: correctedProposedContent,
},
occurrences: 1,
} as CorrectedEditResult);
mockEnsureCorrectFileContent.mockResolvedValue(correctedProposedContent);
const result = await getCorrectedFileContent(
mockConfig,
@@ -402,20 +397,13 @@ describe('WriteFileTool', () => {
abortSignal,
);
expect(mockEnsureCorrectEdit).toHaveBeenCalledWith(
filePath,
originalContent,
{
old_string: originalContent,
new_string: proposedContent,
file_path: filePath,
},
mockGeminiClientInstance,
expect(mockEnsureCorrectFileContent).toHaveBeenCalledWith(
proposedContent,
mockBaseLlmClientInstance,
abortSignal,
true,
true, // aggressiveUnescape
);
expect(mockEnsureCorrectFileContent).not.toHaveBeenCalled();
expect(result.correctedContent).toBe(correctedProposedContent);
expect(result.originalContent).toBe(originalContent);
expect(result.fileExists).toBe(true);
@@ -441,7 +429,6 @@ describe('WriteFileTool', () => {
);
expect(fsService.readTextFile).toHaveBeenCalledWith(filePath);
expect(mockEnsureCorrectEdit).not.toHaveBeenCalled();
expect(mockEnsureCorrectFileContent).not.toHaveBeenCalled();
expect(result.correctedContent).toBe(proposedContent);
expect(result.originalContent).toBe('');
@@ -492,6 +479,7 @@ describe('WriteFileTool', () => {
mockBaseLlmClientInstance,
abortSignal,
true,
true, // aggressiveUnescape
);
expect(confirmation).toEqual(
expect.objectContaining({
@@ -516,14 +504,7 @@ describe('WriteFileTool', () => {
'Corrected replacement for confirmation.';
fs.writeFileSync(filePath, originalContent, 'utf8');
mockEnsureCorrectEdit.mockResolvedValue({
params: {
file_path: filePath,
old_string: originalContent,
new_string: correctedProposedContent,
},
occurrences: 1,
});
mockEnsureCorrectFileContent.mockResolvedValue(correctedProposedContent);
const params = { file_path: filePath, content: proposedContent };
const invocation = tool.build(params);
@@ -531,18 +512,12 @@ describe('WriteFileTool', () => {
abortSignal,
)) as ToolEditConfirmationDetails;
expect(mockEnsureCorrectEdit).toHaveBeenCalledWith(
filePath,
originalContent,
{
old_string: originalContent,
new_string: proposedContent,
file_path: filePath,
},
mockGeminiClientInstance,
expect(mockEnsureCorrectFileContent).toHaveBeenCalledWith(
proposedContent,
mockBaseLlmClientInstance,
abortSignal,
true,
true, // aggressiveUnescape
);
expect(confirmation).toEqual(
expect.objectContaining({
@@ -738,6 +713,7 @@ describe('WriteFileTool', () => {
mockBaseLlmClientInstance,
abortSignal,
true,
true, // aggressiveUnescape
);
expect(result.llmContent).toMatch(
/Successfully created and wrote to new file/,
@@ -768,14 +744,7 @@ describe('WriteFileTool', () => {
const correctedProposedContent = 'Corrected overwrite for execute.';
fs.writeFileSync(filePath, initialContent, 'utf8');
mockEnsureCorrectEdit.mockResolvedValue({
params: {
file_path: filePath,
old_string: initialContent,
new_string: correctedProposedContent,
},
occurrences: 1,
});
mockEnsureCorrectFileContent.mockResolvedValue(correctedProposedContent);
const params = { file_path: filePath, content: proposedContent };
const invocation = tool.build(params);
@@ -784,18 +753,12 @@ describe('WriteFileTool', () => {
const result = await invocation.execute(abortSignal);
expect(mockEnsureCorrectEdit).toHaveBeenCalledWith(
filePath,
initialContent,
{
old_string: initialContent,
new_string: proposedContent,
file_path: filePath,
},
mockGeminiClientInstance,
expect(mockEnsureCorrectFileContent).toHaveBeenCalledWith(
proposedContent,
mockBaseLlmClientInstance,
abortSignal,
true,
true, // aggressiveUnescape
);
expect(result.llmContent).toMatch(/Successfully overwrote file/);
const writtenContent = await fsService.readTextFile(filePath);
@@ -892,14 +855,7 @@ describe('WriteFileTool', () => {
newLines[50] = 'Line 51 Modified'; // Modify one line in the middle
const newContent = newLines.join('\n');
mockEnsureCorrectEdit.mockResolvedValue({
params: {
file_path: filePath,
old_string: originalContent,
new_string: newContent,
},
occurrences: 1,
});
mockEnsureCorrectFileContent.mockResolvedValue(newContent);
const params = { file_path: filePath, content: newContent };
const invocation = tool.build(params);
@@ -1072,13 +1028,13 @@ describe('WriteFileTool', () => {
mockBaseLlmClientInstance,
abortSignal,
true,
true, // aggressiveUnescape
);
expect(mockEnsureCorrectEdit).not.toHaveBeenCalled();
expect(result.correctedContent).toBe(proposedContent);
expect(result.fileExists).toBe(false);
});
it('should call ensureCorrectEdit with disableLLMCorrection=true for an existing file when disabled', async () => {
it('should call ensureCorrectFileContent with disableLLMCorrection=true for an existing file when disabled', async () => {
const filePath = path.join(rootDir, 'existing_file_no_correction.txt');
const originalContent = 'Original content.';
const proposedContent = 'Proposed content.';
@@ -1086,14 +1042,7 @@ describe('WriteFileTool', () => {
mockConfigInternal.getDisableLLMCorrection.mockReturnValue(true);
// Ensure the mock returns the content passed to it
mockEnsureCorrectEdit.mockResolvedValue({
params: {
file_path: filePath,
old_string: originalContent,
new_string: proposedContent,
},
occurrences: 1,
});
mockEnsureCorrectFileContent.mockResolvedValue(proposedContent);
const result = await getCorrectedFileContent(
mockConfig,
@@ -1102,16 +1051,13 @@ describe('WriteFileTool', () => {
abortSignal,
);
expect(mockEnsureCorrectEdit).toHaveBeenCalledWith(
filePath,
originalContent,
expect.anything(), // params object
mockGeminiClientInstance,
expect(mockEnsureCorrectFileContent).toHaveBeenCalledWith(
proposedContent,
mockBaseLlmClientInstance,
abortSignal,
true,
true, // aggressiveUnescape
);
expect(mockEnsureCorrectFileContent).not.toHaveBeenCalled();
expect(result.correctedContent).toBe(proposedContent);
expect(result.originalContent).toBe(originalContent);
expect(result.fileExists).toBe(true);

View File

@@ -26,10 +26,7 @@ import { BaseDeclarativeTool, BaseToolInvocation, Kind } from './tools.js';
import { ToolErrorType } from './tool-error.js';
import { makeRelative, shortenPath } from '../utils/paths.js';
import { getErrorMessage, isNodeError } from '../utils/errors.js';
import {
ensureCorrectEdit,
ensureCorrectFileContent,
} from '../utils/editCorrector.js';
import { ensureCorrectFileContent } from '../utils/editCorrector.js';
import { detectLineEnding } from '../utils/textUtils.js';
import { DEFAULT_DIFF_OPTIONS, getDiffStat } from './diffOptions.js';
import { getDiffContextSnippet } from './diff-utils.js';
@@ -48,6 +45,7 @@ import { debugLogger } from '../utils/debugLogger.js';
import { WRITE_FILE_DEFINITION } from './definitions/coreTools.js';
import { resolveToolDeclaration } from './definitions/resolver.js';
import { detectOmissionPlaceholders } from './omissionPlaceholderDetector.js';
import { isGemini3Model } from '../config/models.js';
/**
* Parameters for the WriteFile tool
@@ -113,35 +111,16 @@ export async function getCorrectedFileContent(
}
}
// If readError is set, we have returned.
// So, file was either read successfully (fileExists=true, originalContent set)
// or it was ENOENT (fileExists=false, originalContent='').
const aggressiveUnescape = !isGemini3Model(config.getActiveModel());
correctedContent = await ensureCorrectFileContent(
proposedContent,
config.getBaseLlmClient(),
abortSignal,
config.getDisableLLMCorrection(),
aggressiveUnescape,
);
if (fileExists) {
// This implies originalContent is available
const { params: correctedParams } = await ensureCorrectEdit(
filePath,
originalContent,
{
old_string: originalContent, // Treat entire current content as old_string
new_string: proposedContent,
file_path: filePath,
},
config.getGeminiClient(),
config.getBaseLlmClient(),
abortSignal,
config.getDisableLLMCorrection(),
);
correctedContent = correctedParams.new_string;
} else {
// This implies new file (ENOENT)
correctedContent = await ensureCorrectFileContent(
proposedContent,
config.getBaseLlmClient(),
abortSignal,
config.getDisableLLMCorrection(),
);
}
return { originalContent, correctedContent, fileExists };
}

View File

@@ -5,10 +5,8 @@
*/
/* eslint-disable @typescript-eslint/no-explicit-any */
import type { Mock, Mocked } from 'vitest';
import type { Mocked } from 'vitest';
import { vi, describe, it, expect, beforeEach } from 'vitest';
import * as fs from 'node:fs';
import { EDIT_TOOL_NAME } from '../tools/tool-names.js';
import type { BaseLlmClient } from '../core/baseLlmClient.js';
// MOCKS
@@ -16,75 +14,16 @@ let callCount = 0;
const mockResponses: any[] = [];
let mockGenerateJson: any;
let mockStartChat: any;
let mockSendMessageStream: any;
vi.mock('fs', () => ({
statSync: vi.fn(),
mkdirSync: vi.fn(),
createWriteStream: vi.fn(() => ({
write: vi.fn(),
on: vi.fn(),
})),
}));
vi.mock('../core/client.js', () => ({
GeminiClient: vi.fn().mockImplementation(function (
this: any,
_config: Config,
) {
this.startChat = (...params: any[]) => mockStartChat(...params);
this.sendMessageStream = (...params: any[]) =>
mockSendMessageStream(...params);
return this;
}),
}));
// END MOCKS
import {
countOccurrences,
ensureCorrectEdit,
ensureCorrectFileContent,
unescapeStringForGeminiBug,
resetEditCorrectorCaches_TEST_ONLY,
} from './editCorrector.js';
import { GeminiClient } from '../core/client.js';
import type { Config } from '../config/config.js';
import { ToolRegistry } from '../tools/tool-registry.js';
vi.mock('../tools/tool-registry.js');
describe('editCorrector', () => {
describe('countOccurrences', () => {
it('should return 0 for empty string', () => {
expect(countOccurrences('', 'a')).toBe(0);
});
it('should return 0 for empty substring', () => {
expect(countOccurrences('abc', '')).toBe(0);
});
it('should return 0 if substring is not found', () => {
expect(countOccurrences('abc', 'd')).toBe(0);
});
it('should return 1 if substring is found once', () => {
expect(countOccurrences('abc', 'b')).toBe(1);
});
it('should return correct count for multiple occurrences', () => {
expect(countOccurrences('ababa', 'a')).toBe(3);
expect(countOccurrences('ababab', 'ab')).toBe(3);
});
it('should count non-overlapping occurrences', () => {
expect(countOccurrences('aaaaa', 'aa')).toBe(2);
expect(countOccurrences('ababab', 'aba')).toBe(1);
});
it('should correctly count occurrences when substring is longer', () => {
expect(countOccurrences('abc', 'abcdef')).toBe(0);
});
it('should be case-sensitive', () => {
expect(countOccurrences('abcABC', 'a')).toBe(1);
expect(countOccurrences('abcABC', 'A')).toBe(1);
});
});
describe('unescapeStringForGeminiBug', () => {
it('should unescape common sequences', () => {
expect(unescapeStringForGeminiBug('\\n')).toBe('\n');
@@ -156,542 +95,6 @@ describe('editCorrector', () => {
});
});
describe('ensureCorrectEdit', () => {
let mockGeminiClientInstance: Mocked<GeminiClient>;
let mockBaseLlmClientInstance: Mocked<BaseLlmClient>;
let mockToolRegistry: Mocked<ToolRegistry>;
let mockConfigInstance: Config;
const abortSignal = new AbortController().signal;
beforeEach(() => {
mockToolRegistry = new ToolRegistry(
{} as Config,
{} as any,
) as Mocked<ToolRegistry>;
const configParams = {
apiKey: 'test-api-key',
model: 'test-model',
sandbox: false as boolean | string,
targetDir: '/test',
debugMode: false,
question: undefined as string | undefined,
coreTools: undefined as string[] | undefined,
toolDiscoveryCommand: undefined as string | undefined,
toolCallCommand: undefined as string | undefined,
mcpServerCommand: undefined as string | undefined,
mcpServers: undefined as Record<string, any> | undefined,
userAgent: 'test-agent',
userMemory: '',
geminiMdFileCount: 0,
alwaysSkipModificationConfirmation: false,
};
mockConfigInstance = {
...configParams,
getApiKey: vi.fn(() => configParams.apiKey),
getModel: vi.fn(() => configParams.model),
getSandbox: vi.fn(() => configParams.sandbox),
getTargetDir: vi.fn(() => configParams.targetDir),
getToolRegistry: vi.fn(() => mockToolRegistry),
getDebugMode: vi.fn(() => configParams.debugMode),
getQuestion: vi.fn(() => configParams.question),
getCoreTools: vi.fn(() => configParams.coreTools),
getToolDiscoveryCommand: vi.fn(() => configParams.toolDiscoveryCommand),
getToolCallCommand: vi.fn(() => configParams.toolCallCommand),
getMcpServerCommand: vi.fn(() => configParams.mcpServerCommand),
getMcpServers: vi.fn(() => configParams.mcpServers),
getUserAgent: vi.fn(() => configParams.userAgent),
getUserMemory: vi.fn(() => configParams.userMemory),
setUserMemory: vi.fn((mem: string) => {
configParams.userMemory = mem;
}),
getGeminiMdFileCount: vi.fn(() => configParams.geminiMdFileCount),
setGeminiMdFileCount: vi.fn((count: number) => {
configParams.geminiMdFileCount = count;
}),
getAlwaysSkipModificationConfirmation: vi.fn(
() => configParams.alwaysSkipModificationConfirmation,
),
setAlwaysSkipModificationConfirmation: vi.fn((skip: boolean) => {
configParams.alwaysSkipModificationConfirmation = skip;
}),
getQuotaErrorOccurred: vi.fn().mockReturnValue(false),
setQuotaErrorOccurred: vi.fn(),
} as unknown as Config;
callCount = 0;
mockResponses.length = 0;
mockGenerateJson = vi
.fn()
.mockImplementation((_contents, _schema, signal) => {
// Check if the signal is aborted. If so, throw an error or return a specific response.
if (signal && signal.aborted) {
return Promise.reject(new Error('Aborted')); // Or some other specific error/response
}
const response = mockResponses[callCount];
callCount++;
if (response === undefined) return Promise.resolve({});
return Promise.resolve(response);
});
mockStartChat = vi.fn();
mockSendMessageStream = vi.fn();
mockGeminiClientInstance = new GeminiClient(
mockConfigInstance,
) as Mocked<GeminiClient>;
mockGeminiClientInstance.getHistory = vi.fn().mockReturnValue([]);
mockBaseLlmClientInstance = {
generateJson: mockGenerateJson,
config: {
generationConfigService: {
getResolvedConfig: vi.fn().mockReturnValue({
model: 'edit-corrector',
generateContentConfig: {},
}),
},
},
} as unknown as Mocked<BaseLlmClient>;
resetEditCorrectorCaches_TEST_ONLY();
});
describe('Scenario Group 1: originalParams.old_string matches currentContent directly', () => {
it('Test 1.1: old_string (no literal \\), new_string (escaped by Gemini) -> new_string unescaped', async () => {
const currentContent = 'This is a test string to find me.';
const originalParams = {
file_path: '/test/file.txt',
old_string: 'find me',
new_string: 'replace with \\"this\\"',
};
mockResponses.push({
corrected_new_string_escaping: 'replace with "this"',
});
const result = await ensureCorrectEdit(
'/test/file.txt',
currentContent,
originalParams,
mockGeminiClientInstance,
mockBaseLlmClientInstance,
abortSignal,
false,
);
expect(mockGenerateJson).toHaveBeenCalledTimes(1);
expect(result.params.new_string).toBe('replace with "this"');
expect(result.params.old_string).toBe('find me');
expect(result.occurrences).toBe(1);
});
it('Test 1.2: old_string (no literal \\), new_string (correctly formatted) -> new_string unchanged', async () => {
const currentContent = 'This is a test string to find me.';
const originalParams = {
file_path: '/test/file.txt',
old_string: 'find me',
new_string: 'replace with this',
};
const result = await ensureCorrectEdit(
'/test/file.txt',
currentContent,
originalParams,
mockGeminiClientInstance,
mockBaseLlmClientInstance,
abortSignal,
false,
);
expect(mockGenerateJson).toHaveBeenCalledTimes(0);
expect(result.params.new_string).toBe('replace with this');
expect(result.params.old_string).toBe('find me');
expect(result.occurrences).toBe(1);
});
it('Test 1.3: old_string (with literal \\), new_string (escaped by Gemini) -> new_string unchanged (still escaped)', async () => {
const currentContent = 'This is a test string to find\\me.';
const originalParams = {
file_path: '/test/file.txt',
old_string: 'find\\me',
new_string: 'replace with \\"this\\"',
};
mockResponses.push({
corrected_new_string_escaping: 'replace with "this"',
});
const result = await ensureCorrectEdit(
'/test/file.txt',
currentContent,
originalParams,
mockGeminiClientInstance,
mockBaseLlmClientInstance,
abortSignal,
false,
);
expect(mockGenerateJson).toHaveBeenCalledTimes(1);
expect(result.params.new_string).toBe('replace with "this"');
expect(result.params.old_string).toBe('find\\me');
expect(result.occurrences).toBe(1);
});
it('Test 1.4: old_string (with literal \\), new_string (correctly formatted) -> new_string unchanged', async () => {
const currentContent = 'This is a test string to find\\me.';
const originalParams = {
file_path: '/test/file.txt',
old_string: 'find\\me',
new_string: 'replace with this',
};
const result = await ensureCorrectEdit(
'/test/file.txt',
currentContent,
originalParams,
mockGeminiClientInstance,
mockBaseLlmClientInstance,
abortSignal,
false,
);
expect(mockGenerateJson).toHaveBeenCalledTimes(0);
expect(result.params.new_string).toBe('replace with this');
expect(result.params.old_string).toBe('find\\me');
expect(result.occurrences).toBe(1);
});
});
describe('Scenario Group 2: originalParams.old_string does NOT match, but unescapeStringForGeminiBug(originalParams.old_string) DOES match', () => {
it('Test 2.1: old_string (over-escaped, no intended literal \\), new_string (escaped by Gemini) -> new_string unescaped', async () => {
const currentContent = 'This is a test string to find "me".';
const originalParams = {
file_path: '/test/file.txt',
old_string: 'find \\"me\\"',
new_string: 'replace with \\"this\\"',
};
mockResponses.push({ corrected_new_string: 'replace with "this"' });
const result = await ensureCorrectEdit(
'/test/file.txt',
currentContent,
originalParams,
mockGeminiClientInstance,
mockBaseLlmClientInstance,
abortSignal,
false,
);
expect(mockGenerateJson).toHaveBeenCalledTimes(1);
expect(result.params.new_string).toBe('replace with "this"');
expect(result.params.old_string).toBe('find "me"');
expect(result.occurrences).toBe(1);
});
it('Test 2.2: old_string (over-escaped, no intended literal \\), new_string (correctly formatted) -> new_string unescaped (harmlessly)', async () => {
const currentContent = 'This is a test string to find "me".';
const originalParams = {
file_path: '/test/file.txt',
old_string: 'find \\"me\\"',
new_string: 'replace with this',
};
const result = await ensureCorrectEdit(
'/test/file.txt',
currentContent,
originalParams,
mockGeminiClientInstance,
mockBaseLlmClientInstance,
abortSignal,
false,
);
expect(mockGenerateJson).toHaveBeenCalledTimes(0);
expect(result.params.new_string).toBe('replace with this');
expect(result.params.old_string).toBe('find "me"');
expect(result.occurrences).toBe(1);
});
it('Test 2.3: old_string (over-escaped, with intended literal \\), new_string (simple) -> new_string corrected', async () => {
const currentContent = 'This is a test string to find \\me.';
const originalParams = {
file_path: '/test/file.txt',
old_string: 'find \\\\me',
new_string: 'replace with foobar',
};
const result = await ensureCorrectEdit(
'/test/file.txt',
currentContent,
originalParams,
mockGeminiClientInstance,
mockBaseLlmClientInstance,
abortSignal,
false,
);
expect(mockGenerateJson).toHaveBeenCalledTimes(0);
expect(result.params.new_string).toBe('replace with foobar');
expect(result.params.old_string).toBe('find \\me');
expect(result.occurrences).toBe(1);
});
});
describe('Scenario Group 3: LLM Correction Path', () => {
it('Test 3.1: old_string (no literal \\), new_string (escaped by Gemini), LLM re-escapes new_string -> final new_string is double unescaped', async () => {
const currentContent = 'This is a test string to corrected find me.';
const originalParams = {
file_path: '/test/file.txt',
old_string: 'find me',
new_string: 'replace with \\\\"this\\\\"',
};
const llmNewString = 'LLM says replace with "that"';
mockResponses.push({ corrected_new_string_escaping: llmNewString });
const result = await ensureCorrectEdit(
'/test/file.txt',
currentContent,
originalParams,
mockGeminiClientInstance,
mockBaseLlmClientInstance,
abortSignal,
false,
);
expect(mockGenerateJson).toHaveBeenCalledTimes(1);
expect(result.params.new_string).toBe(llmNewString);
expect(result.params.old_string).toBe('find me');
expect(result.occurrences).toBe(1);
});
it('Test 3.2: old_string (with literal \\), new_string (escaped by Gemini), LLM re-escapes new_string -> final new_string is unescaped once', async () => {
const currentContent = 'This is a test string to corrected find me.';
const originalParams = {
file_path: '/test/file.txt',
old_string: 'find\\me',
new_string: 'replace with \\\\"this\\\\"',
};
const llmCorrectedOldString = 'corrected find me';
const llmNewString = 'LLM says replace with "that"';
mockResponses.push({ corrected_target_snippet: llmCorrectedOldString });
mockResponses.push({ corrected_new_string: llmNewString });
const result = await ensureCorrectEdit(
'/test/file.txt',
currentContent,
originalParams,
mockGeminiClientInstance,
mockBaseLlmClientInstance,
abortSignal,
false,
);
expect(mockGenerateJson).toHaveBeenCalledTimes(2);
expect(result.params.new_string).toBe(llmNewString);
expect(result.params.old_string).toBe(llmCorrectedOldString);
expect(result.occurrences).toBe(1);
});
it('Test 3.3: old_string needs LLM, new_string is fine -> old_string corrected, new_string original', async () => {
const currentContent = 'This is a test string to be corrected.';
const originalParams = {
file_path: '/test/file.txt',
old_string: 'fiiind me',
new_string: 'replace with "this"',
};
const llmCorrectedOldString = 'to be corrected';
mockResponses.push({ corrected_target_snippet: llmCorrectedOldString });
const result = await ensureCorrectEdit(
'/test/file.txt',
currentContent,
originalParams,
mockGeminiClientInstance,
mockBaseLlmClientInstance,
abortSignal,
false,
);
expect(mockGenerateJson).toHaveBeenCalledTimes(1);
expect(result.params.new_string).toBe('replace with "this"');
expect(result.params.old_string).toBe(llmCorrectedOldString);
expect(result.occurrences).toBe(1);
});
it('Test 3.4: LLM correction path, correctNewString returns the originalNewString it was passed (which was unescaped) -> final new_string is unescaped', async () => {
const currentContent = 'This is a test string to corrected find me.';
const originalParams = {
file_path: '/test/file.txt',
old_string: 'find me',
new_string: 'replace with \\\\"this\\\\"',
};
const newStringForLLMAndReturnedByLLM = 'replace with "this"';
mockResponses.push({
corrected_new_string_escaping: newStringForLLMAndReturnedByLLM,
});
const result = await ensureCorrectEdit(
'/test/file.txt',
currentContent,
originalParams,
mockGeminiClientInstance,
mockBaseLlmClientInstance,
abortSignal,
false,
);
expect(mockGenerateJson).toHaveBeenCalledTimes(1);
expect(result.params.new_string).toBe(newStringForLLMAndReturnedByLLM);
expect(result.occurrences).toBe(1);
});
});
describe('Scenario Group 4: No Match Found / Multiple Matches', () => {
it('Test 4.1: No version of old_string (original, unescaped, LLM-corrected) matches -> returns original params, 0 occurrences', async () => {
const currentContent = 'This content has nothing to find.';
const originalParams = {
file_path: '/test/file.txt',
old_string: 'nonexistent string',
new_string: 'some new string',
};
mockResponses.push({ corrected_target_snippet: 'still nonexistent' });
const result = await ensureCorrectEdit(
'/test/file.txt',
currentContent,
originalParams,
mockGeminiClientInstance,
mockBaseLlmClientInstance,
abortSignal,
false,
);
expect(mockGenerateJson).toHaveBeenCalledTimes(1);
expect(result.params).toEqual(originalParams);
expect(result.occurrences).toBe(0);
});
it('Test 4.2: unescapedOldStringAttempt results in >1 occurrences -> returns original params, count occurrences', async () => {
const currentContent =
'This content has find "me" and also find "me" again.';
const originalParams = {
file_path: '/test/file.txt',
old_string: 'find "me"',
new_string: 'some new string',
};
const result = await ensureCorrectEdit(
'/test/file.txt',
currentContent,
originalParams,
mockGeminiClientInstance,
mockBaseLlmClientInstance,
abortSignal,
false,
);
expect(mockGenerateJson).toHaveBeenCalledTimes(0);
expect(result.params).toEqual(originalParams);
expect(result.occurrences).toBe(2);
});
});
describe('Scenario Group 5: Specific unescapeStringForGeminiBug checks (integrated into ensureCorrectEdit)', () => {
it('Test 5.1: old_string needs LLM to become currentContent, new_string also needs correction', async () => {
const currentContent = 'const x = "a\nbc\\"def\\"';
const originalParams = {
file_path: '/test/file.txt',
old_string: 'const x = \\"a\\nbc\\\\"def\\\\"',
new_string: 'const y = \\"new\\nval\\\\"content\\\\"',
};
const expectedFinalNewString = 'const y = "new\nval\\"content\\"';
mockResponses.push({ corrected_target_snippet: currentContent });
mockResponses.push({ corrected_new_string: expectedFinalNewString });
const result = await ensureCorrectEdit(
'/test/file.txt',
currentContent,
originalParams,
mockGeminiClientInstance,
mockBaseLlmClientInstance,
abortSignal,
false,
);
expect(mockGenerateJson).toHaveBeenCalledTimes(2);
expect(result.params.old_string).toBe(currentContent);
expect(result.params.new_string).toBe(expectedFinalNewString);
expect(result.occurrences).toBe(1);
});
});
describe('Scenario Group 6: Concurrent Edits', () => {
it('Test 6.1: should return early if file was modified by another process', async () => {
const filePath = '/test/file.txt';
const currentContent =
'This content has been modified by someone else.';
const originalParams = {
file_path: filePath,
old_string: 'nonexistent string',
new_string: 'some new string',
};
const now = Date.now();
const lastEditTime = now - 5000; // 5 seconds ago
// Mock the file's modification time to be recent
vi.spyOn(fs, 'statSync').mockReturnValue({
mtimeMs: now,
} as fs.Stats);
// Mock the last edit timestamp from our history to be in the past
const history = [
{
role: 'model',
parts: [
{
functionResponse: {
name: EDIT_TOOL_NAME,
id: `${EDIT_TOOL_NAME}-${lastEditTime}-123`,
response: {
output: {
llmContent: `Successfully modified file: ${filePath}`,
},
},
},
},
],
},
];
(mockGeminiClientInstance.getHistory as Mock).mockReturnValue(history);
const result = await ensureCorrectEdit(
filePath,
currentContent,
originalParams,
mockGeminiClientInstance,
mockBaseLlmClientInstance,
abortSignal,
false,
);
expect(result.occurrences).toBe(0);
expect(result.params).toEqual(originalParams);
});
});
describe('Scenario Group 7: Trimming with Newline Preservation', () => {
it('Test 7.1: should preserve trailing newlines in new_string when trimming is applied', async () => {
const currentContent = ' find me'; // Matches old_string initially
const originalParams = {
file_path: '/test/file.txt',
old_string: ' find me', // Matches, but has whitespace to trim
new_string: ' replaced\n\n', // Needs trimming but preserve newlines
};
const result = await ensureCorrectEdit(
'/test/file.txt',
currentContent,
originalParams,
mockGeminiClientInstance,
mockBaseLlmClientInstance,
abortSignal,
false,
);
// old_string should be trimmed to 'find me' because 'find me' also exists uniquely in ' find me'
expect(result.params.old_string).toBe('find me');
// new_string should be trimmed of spaces but keep ALL newlines
expect(result.params.new_string).toBe('replaced\n\n');
expect(result.occurrences).toBe(1);
});
it('Test 7.2: should handle trailing newlines separated by spaces (regression fix)', async () => {
const currentContent = 'find me '; // Matches old_string initially
const originalParams = {
file_path: '/test/file.txt',
old_string: 'find me ', // Trailing space
new_string: 'replaced \n \n', // Trailing newlines with spaces
};
const result = await ensureCorrectEdit(
'/test/file.txt',
currentContent,
originalParams,
mockGeminiClientInstance,
mockBaseLlmClientInstance,
abortSignal,
false,
);
expect(result.params.old_string).toBe('find me');
// Should capture both newlines and join them, stripping the space between
expect(result.params.new_string).toBe('replaced\n\n');
expect(result.occurrences).toBe(1);
});
});
});
describe('ensureCorrectFileContent', () => {
let mockBaseLlmClientInstance: Mocked<BaseLlmClient>;
const abortSignal = new AbortController().signal;
@@ -811,5 +214,37 @@ describe('editCorrector', () => {
expect(result).toBe(correctedContent);
});
it('should return unescaped content when LLM is disabled and aggressiveUnescape is true', async () => {
const content = 'LaTeX command \\\\title{Example}';
// unescapeStringForGeminiBug would change \\\\title to \title (literal tab and "itle")
const expected = 'LaTeX command \title{Example}';
const result = await ensureCorrectFileContent(
content,
mockBaseLlmClientInstance,
abortSignal,
true, // disableLLMCorrection
true, // aggressiveUnescape
);
expect(result).toBe(expected);
expect(mockGenerateJson).not.toHaveBeenCalled();
});
it('should return original content when LLM is disabled and aggressiveUnescape is false', async () => {
const content = 'LaTeX command \\\\title{Example}';
const result = await ensureCorrectFileContent(
content,
mockBaseLlmClientInstance,
abortSignal,
true, // disableLLMCorrection
false, // aggressiveUnescape
);
expect(result).toBe(content);
expect(mockGenerateJson).not.toHaveBeenCalled();
});
});
});

View File

@@ -5,21 +5,7 @@
*/
import type { Content } from '@google/genai';
import type { GeminiClient } from '../core/client.js';
import type { BaseLlmClient } from '../core/baseLlmClient.js';
import type { EditToolParams } from '../tools/edit.js';
import {
EDIT_TOOL_NAME,
GREP_TOOL_NAME,
READ_FILE_TOOL_NAME,
READ_MANY_FILES_TOOL_NAME,
WRITE_FILE_TOOL_NAME,
} from '../tools/tool-names.js';
import {
isFunctionResponse,
isFunctionCall,
} from '../utils/messageInspectors.js';
import * as fs from 'node:fs';
import { promptIdContext } from './promptIdContext.js';
import { debugLogger } from './debugLogger.js';
import { LRUCache } from 'mnemonist';
@@ -39,336 +25,34 @@ function getPromptId(): string {
const MAX_CACHE_SIZE = 50;
// Cache for ensureCorrectEdit results
const editCorrectionCache = new LRUCache<string, CorrectedEditResult>(
MAX_CACHE_SIZE,
);
// Cache for ensureCorrectFileContent results
const fileContentCorrectionCache = new LRUCache<string, string>(MAX_CACHE_SIZE);
/**
* Defines the structure of the parameters within CorrectedEditResult
*/
interface CorrectedEditParams {
file_path: string;
old_string: string;
new_string: string;
}
/**
* Defines the result structure for ensureCorrectEdit.
*/
export interface CorrectedEditResult {
params: CorrectedEditParams;
occurrences: number;
}
/**
* Extracts the timestamp from the .id value, which is in format
* <tool.name>-<timestamp>-<uuid>
* @param fcnId the ID value of a functionCall or functionResponse object
* @returns -1 if the timestamp could not be extracted, else the timestamp (as a number)
*/
function getTimestampFromFunctionId(fcnId: string): number {
const idParts = fcnId.split('-');
if (idParts.length > 2) {
const timestamp = parseInt(idParts[1], 10);
if (!isNaN(timestamp)) {
return timestamp;
}
}
return -1;
}
/**
* Will look through the gemini client history and determine when the most recent
* edit to a target file occurred. If no edit happened, it will return -1
* @param filePath the path to the file
* @param client the geminiClient, so that we can get the history
* @returns a DateTime (as a number) of when the last edit occurred, or -1 if no edit was found.
*/
async function findLastEditTimestamp(
filePath: string,
client: GeminiClient,
): Promise<number> {
const history = client.getHistory() ?? [];
// Tools that may reference the file path in their FunctionResponse `output`.
const toolsInResp = new Set([
WRITE_FILE_TOOL_NAME,
EDIT_TOOL_NAME,
READ_MANY_FILES_TOOL_NAME,
GREP_TOOL_NAME,
]);
// Tools that may reference the file path in their FunctionCall `args`.
const toolsInCall = new Set([...toolsInResp, READ_FILE_TOOL_NAME]);
// Iterate backwards to find the most recent relevant action.
for (const entry of history.slice().reverse()) {
if (!entry.parts) continue;
for (const part of entry.parts) {
let id: string | undefined;
let content: unknown;
// Check for a relevant FunctionCall with the file path in its arguments.
if (
isFunctionCall(entry) &&
part.functionCall?.name &&
toolsInCall.has(part.functionCall.name)
) {
id = part.functionCall.id;
content = part.functionCall.args;
}
// Check for a relevant FunctionResponse with the file path in its output.
else if (
isFunctionResponse(entry) &&
part.functionResponse?.name &&
toolsInResp.has(part.functionResponse.name)
) {
const { response } = part.functionResponse;
if (response && !('error' in response) && 'output' in response) {
id = part.functionResponse.id;
content = response['output'];
}
}
if (!id || content === undefined) continue;
// Use the "blunt hammer" approach to find the file path in the content.
// Note that the tool response data is inconsistent in their formatting
// with successes and errors - so, we just check for the existence
// as the best guess to if error/failed occurred with the response.
const stringified = JSON.stringify(content);
if (
!stringified.includes('Error') && // only applicable for functionResponse
!stringified.includes('Failed') && // only applicable for functionResponse
stringified.includes(filePath)
) {
return getTimestampFromFunctionId(id);
}
}
}
return -1;
}
/**
* Attempts to correct edit parameters if the original old_string is not found.
* It tries unescaping, and then LLM-based correction.
* Results are cached to avoid redundant processing.
*
* @param currentContent The current content of the file.
* @param originalParams The original EditToolParams
* @param client The GeminiClient for LLM calls.
* @returns A promise resolving to an object containing the (potentially corrected)
* EditToolParams (as CorrectedEditParams) and the final occurrences count.
*/
export async function ensureCorrectEdit(
filePath: string,
currentContent: string,
originalParams: EditToolParams, // This is the EditToolParams from edit.ts, without 'corrected'
geminiClient: GeminiClient,
baseLlmClient: BaseLlmClient,
abortSignal: AbortSignal,
disableLLMCorrection: boolean,
): Promise<CorrectedEditResult> {
const cacheKey = `${currentContent}---${originalParams.old_string}---${originalParams.new_string}`;
const cachedResult = editCorrectionCache.get(cacheKey);
if (cachedResult) {
return cachedResult;
}
let finalNewString = originalParams.new_string;
const newStringPotentiallyEscaped =
unescapeStringForGeminiBug(originalParams.new_string) !==
originalParams.new_string;
const allowMultiple = originalParams.allow_multiple ?? false;
let finalOldString = originalParams.old_string;
let occurrences = countOccurrences(currentContent, finalOldString);
const isOccurrencesMatch = allowMultiple
? occurrences > 0
: occurrences === 1;
if (isOccurrencesMatch) {
if (newStringPotentiallyEscaped && !disableLLMCorrection) {
finalNewString = await correctNewStringEscaping(
baseLlmClient,
finalOldString,
originalParams.new_string,
abortSignal,
);
}
} else if (occurrences > 1 && !allowMultiple) {
// If user doesn't allow multiple but found multiple, return as-is (will fail validation later)
const result: CorrectedEditResult = {
params: { ...originalParams },
occurrences,
};
editCorrectionCache.set(cacheKey, result);
return result;
} else {
// occurrences is 0 or some other unexpected state initially
const unescapedOldStringAttempt = unescapeStringForGeminiBug(
originalParams.old_string,
);
occurrences = countOccurrences(currentContent, unescapedOldStringAttempt);
const isUnescapedOccurrencesMatch = allowMultiple
? occurrences > 0
: occurrences === 1;
if (isUnescapedOccurrencesMatch) {
finalOldString = unescapedOldStringAttempt;
if (newStringPotentiallyEscaped && !disableLLMCorrection) {
finalNewString = await correctNewString(
baseLlmClient,
originalParams.old_string, // original old
unescapedOldStringAttempt, // corrected old
originalParams.new_string, // original new (which is potentially escaped)
abortSignal,
);
}
} else if (occurrences === 0) {
if (filePath) {
// In order to keep from clobbering edits made outside our system,
// let's check if there was a more recent edit to the file than what
// our system has done
const lastEditedByUsTime = await findLastEditTimestamp(
filePath,
geminiClient,
);
// Add a 1-second buffer to account for timing inaccuracies. If the file
// was modified more than a second after the last edit tool was run, we
// can assume it was modified by something else.
if (lastEditedByUsTime > 0) {
const stats = fs.statSync(filePath);
const diff = stats.mtimeMs - lastEditedByUsTime;
if (diff > 2000) {
// Hard coded for 2 seconds
// This file was edited sooner
const result: CorrectedEditResult = {
params: { ...originalParams },
occurrences: 0, // Explicitly 0 as LLM failed
};
editCorrectionCache.set(cacheKey, result);
return result;
}
}
}
if (disableLLMCorrection) {
const result: CorrectedEditResult = {
params: { ...originalParams },
occurrences: 0,
};
editCorrectionCache.set(cacheKey, result);
return result;
}
const llmCorrectedOldString = await correctOldStringMismatch(
baseLlmClient,
currentContent,
unescapedOldStringAttempt,
abortSignal,
);
const llmOldOccurrences = countOccurrences(
currentContent,
llmCorrectedOldString,
);
const isLlmOccurrencesMatch = allowMultiple
? llmOldOccurrences > 0
: llmOldOccurrences === 1;
if (isLlmOccurrencesMatch) {
finalOldString = llmCorrectedOldString;
occurrences = llmOldOccurrences;
if (newStringPotentiallyEscaped) {
const baseNewStringForLLMCorrection = unescapeStringForGeminiBug(
originalParams.new_string,
);
finalNewString = await correctNewString(
baseLlmClient,
originalParams.old_string, // original old
llmCorrectedOldString, // corrected old
baseNewStringForLLMCorrection, // base new for correction
abortSignal,
);
}
} else {
// LLM correction also failed for old_string
const result: CorrectedEditResult = {
params: { ...originalParams },
occurrences: 0, // Explicitly 0 as LLM failed
};
editCorrectionCache.set(cacheKey, result);
return result;
}
} else {
// Unescaping old_string resulted in > 1 occurrence but not allowMultiple
const result: CorrectedEditResult = {
params: { ...originalParams },
occurrences, // This will be > 1
};
editCorrectionCache.set(cacheKey, result);
return result;
}
}
const { targetString, pair } = trimPairIfPossible(
finalOldString,
finalNewString,
currentContent,
allowMultiple,
);
finalOldString = targetString;
finalNewString = pair;
// Final result construction
const result: CorrectedEditResult = {
params: {
file_path: originalParams.file_path,
old_string: finalOldString,
new_string: finalNewString,
},
occurrences: countOccurrences(currentContent, finalOldString), // Recalculate occurrences with the final old_string
};
editCorrectionCache.set(cacheKey, result);
return result;
}
export async function ensureCorrectFileContent(
content: string,
baseLlmClient: BaseLlmClient,
abortSignal: AbortSignal,
disableLLMCorrection: boolean = true,
aggressiveUnescape: boolean = false,
): Promise<string> {
const cachedResult = fileContentCorrectionCache.get(content);
if (cachedResult) {
return cachedResult;
}
const contentPotentiallyEscaped =
unescapeStringForGeminiBug(content) !== content;
if (!contentPotentiallyEscaped) {
const unescapedContent = unescapeStringForGeminiBug(content);
if (unescapedContent === content) {
fileContentCorrectionCache.set(content, content);
return content;
}
if (disableLLMCorrection) {
// If we can't use LLM, we should at least use the unescaped content
// as it's likely better than the original if it was detected as potentially escaped.
// unescapeStringForGeminiBug is a heuristic, not an LLM call.
const unescaped = unescapeStringForGeminiBug(content);
fileContentCorrectionCache.set(content, unescaped);
return unescaped;
if (aggressiveUnescape) {
fileContentCorrectionCache.set(content, unescapedContent);
return unescapedContent;
}
fileContentCorrectionCache.set(content, content);
return content;
}
const correctedContent = await correctStringEscaping(
@@ -380,242 +64,6 @@ export async function ensureCorrectFileContent(
return correctedContent;
}
// Define the expected JSON schema for the LLM response for old_string correction
const OLD_STRING_CORRECTION_SCHEMA: Record<string, unknown> = {
type: 'object',
properties: {
corrected_target_snippet: {
type: 'string',
description:
'The corrected version of the target snippet that exactly and uniquely matches a segment within the provided file content.',
},
},
required: ['corrected_target_snippet'],
};
export async function correctOldStringMismatch(
baseLlmClient: BaseLlmClient,
fileContent: string,
problematicSnippet: string,
abortSignal: AbortSignal,
): Promise<string> {
const prompt = `
Context: A process needs to find an exact literal, unique match for a specific text snippet within a file's content. The provided snippet failed to match exactly. This is most likely because it has been overly escaped.
Task: Analyze the provided file content and the problematic target snippet. Identify the segment in the file content that the snippet was *most likely* intended to match. Output the *exact*, literal text of that segment from the file content. Focus *only* on removing extra escape characters and correcting formatting, whitespace, or minor differences to achieve a PERFECT literal match. The output must be the exact literal text as it appears in the file.
Problematic target snippet:
\`\`\`
${problematicSnippet}
\`\`\`
File Content:
\`\`\`
${fileContent}
\`\`\`
For example, if the problematic target snippet was "\\\\\\nconst greeting = \`Hello \\\\\`\${name}\\\\\`\`;" and the file content had content that looked like "\nconst greeting = \`Hello ${'\\`'}\${name}${'\\`'}\`;", then corrected_target_snippet should likely be "\nconst greeting = \`Hello ${'\\`'}\${name}${'\\`'}\`;" to fix the incorrect escaping to match the original file content.
If the differences are only in whitespace or formatting, apply similar whitespace/formatting changes to the corrected_target_snippet.
Return ONLY the corrected target snippet in the specified JSON format with the key 'corrected_target_snippet'. If no clear, unique match can be found, return an empty string for 'corrected_target_snippet'.
`.trim();
const contents: Content[] = [{ role: 'user', parts: [{ text: prompt }] }];
try {
const result = await baseLlmClient.generateJson({
modelConfigKey: { model: 'edit-corrector' },
contents,
schema: OLD_STRING_CORRECTION_SCHEMA,
abortSignal,
systemInstruction: CODE_CORRECTION_SYSTEM_PROMPT,
promptId: getPromptId(),
role: LlmRole.UTILITY_EDIT_CORRECTOR,
});
if (
result &&
typeof result['corrected_target_snippet'] === 'string' &&
result['corrected_target_snippet'].length > 0
) {
return result['corrected_target_snippet'];
} else {
return problematicSnippet;
}
} catch (error) {
if (abortSignal.aborted) {
throw error;
}
debugLogger.warn(
'Error during LLM call for old string snippet correction:',
error,
);
return problematicSnippet;
}
}
// Define the expected JSON schema for the new_string correction LLM response
const NEW_STRING_CORRECTION_SCHEMA: Record<string, unknown> = {
type: 'object',
properties: {
corrected_new_string: {
type: 'string',
description:
'The original_new_string adjusted to be a suitable replacement for the corrected_old_string, while maintaining the original intent of the change.',
},
},
required: ['corrected_new_string'],
};
/**
* Adjusts the new_string to align with a corrected old_string, maintaining the original intent.
*/
export async function correctNewString(
baseLlmClient: BaseLlmClient,
originalOldString: string,
correctedOldString: string,
originalNewString: string,
abortSignal: AbortSignal,
): Promise<string> {
if (originalOldString === correctedOldString) {
return originalNewString;
}
const prompt = `
Context: A text replacement operation was planned. The original text to be replaced (original_old_string) was slightly different from the actual text in the file (corrected_old_string). The original_old_string has now been corrected to match the file content.
We now need to adjust the replacement text (original_new_string) so that it makes sense as a replacement for the corrected_old_string, while preserving the original intent of the change.
original_old_string (what was initially intended to be found):
\`\`\`
${originalOldString}
\`\`\`
corrected_old_string (what was actually found in the file and will be replaced):
\`\`\`
${correctedOldString}
\`\`\`
original_new_string (what was intended to replace original_old_string):
\`\`\`
${originalNewString}
\`\`\`
Task: Based on the differences between original_old_string and corrected_old_string, and the content of original_new_string, generate a corrected_new_string. This corrected_new_string should be what original_new_string would have been if it was designed to replace corrected_old_string directly, while maintaining the spirit of the original transformation.
For example, if original_old_string was "\\\\\\nconst greeting = \`Hello \\\\\`\${name}\\\\\`\`;" and corrected_old_string is "\nconst greeting = \`Hello ${'\\`'}\${name}${'\\`'}\`;", and original_new_string was "\\\\\\nconst greeting = \`Hello \\\\\`\${name} \${lastName}\\\\\`\`;", then corrected_new_string should likely be "\nconst greeting = \`Hello ${'\\`'}\${name} \${lastName}${'\\`'}\`;" to fix the incorrect escaping.
If the differences are only in whitespace or formatting, apply similar whitespace/formatting changes to the corrected_new_string.
Return ONLY the corrected string in the specified JSON format with the key 'corrected_new_string'. If no adjustment is deemed necessary or possible, return the original_new_string.
`.trim();
const contents: Content[] = [{ role: 'user', parts: [{ text: prompt }] }];
try {
const result = await baseLlmClient.generateJson({
modelConfigKey: { model: 'edit-corrector' },
contents,
schema: NEW_STRING_CORRECTION_SCHEMA,
abortSignal,
systemInstruction: CODE_CORRECTION_SYSTEM_PROMPT,
promptId: getPromptId(),
role: LlmRole.UTILITY_EDIT_CORRECTOR,
});
if (
result &&
typeof result['corrected_new_string'] === 'string' &&
result['corrected_new_string'].length > 0
) {
return result['corrected_new_string'];
} else {
return originalNewString;
}
} catch (error) {
if (abortSignal.aborted) {
throw error;
}
debugLogger.warn('Error during LLM call for new_string correction:', error);
return originalNewString;
}
}
const CORRECT_NEW_STRING_ESCAPING_SCHEMA: Record<string, unknown> = {
type: 'object',
properties: {
corrected_new_string_escaping: {
type: 'string',
description:
'The new_string with corrected escaping, ensuring it is a proper replacement for the old_string, especially considering potential over-escaping issues from previous LLM generations.',
},
},
required: ['corrected_new_string_escaping'],
};
export async function correctNewStringEscaping(
baseLlmClient: BaseLlmClient,
oldString: string,
potentiallyProblematicNewString: string,
abortSignal: AbortSignal,
): Promise<string> {
const prompt = `
Context: A text replacement operation is planned. The text to be replaced (old_string) has been correctly identified in the file. However, the replacement text (new_string) might have been improperly escaped by a previous LLM generation (e.g. too many backslashes for newlines like \\n instead of \n, or unnecessarily quotes like \\"Hello\\" instead of "Hello").
old_string (this is the exact text that will be replaced):
\`\`\`
${oldString}
\`\`\`
potentially_problematic_new_string (this is the text that should replace old_string, but MIGHT have bad escaping, or might be entirely correct):
\`\`\`
${potentiallyProblematicNewString}
\`\`\`
Task: Analyze the potentially_problematic_new_string. If it's syntactically invalid due to incorrect escaping (e.g., "\n", "\t", "\\", "\\'", "\\""), correct the invalid syntax. The goal is to ensure the new_string, when inserted into the code, will be a valid and correctly interpreted.
For example, if old_string is "foo" and potentially_problematic_new_string is "bar\\nbaz", the corrected_new_string_escaping should be "bar\nbaz".
If potentially_problematic_new_string is console.log(\\"Hello World\\"), it should be console.log("Hello World").
Return ONLY the corrected string in the specified JSON format with the key 'corrected_new_string_escaping'. If no escaping correction is needed, return the original potentially_problematic_new_string.
`.trim();
const contents: Content[] = [{ role: 'user', parts: [{ text: prompt }] }];
try {
const result = await baseLlmClient.generateJson({
modelConfigKey: { model: 'edit-corrector' },
contents,
schema: CORRECT_NEW_STRING_ESCAPING_SCHEMA,
abortSignal,
systemInstruction: CODE_CORRECTION_SYSTEM_PROMPT,
promptId: getPromptId(),
role: LlmRole.UTILITY_EDIT_CORRECTOR,
});
if (
result &&
typeof result['corrected_new_string_escaping'] === 'string' &&
result['corrected_new_string_escaping'].length > 0
) {
return result['corrected_new_string_escaping'];
} else {
return potentiallyProblematicNewString;
}
} catch (error) {
if (abortSignal.aborted) {
throw error;
}
debugLogger.warn(
'Error during LLM call for new_string escaping correction:',
error,
);
return potentiallyProblematicNewString;
}
}
const CORRECT_STRING_ESCAPING_SCHEMA: Record<string, unknown> = {
type: 'object',
properties: {
@@ -684,46 +132,6 @@ Return ONLY the corrected string in the specified JSON format with the key 'corr
}
}
function trimPreservingTrailingNewline(str: string): string {
const trimmedEnd = str.trimEnd();
const trailingWhitespace = str.slice(trimmedEnd.length);
const trailingNewlines = trailingWhitespace.replace(/[^\r\n]/g, '');
return str.trim() + trailingNewlines;
}
function trimPairIfPossible(
target: string,
trimIfTargetTrims: string,
currentContent: string,
allowMultiple: boolean,
) {
const trimmedTargetString = trimPreservingTrailingNewline(target);
if (target.length !== trimmedTargetString.length) {
const trimmedTargetOccurrences = countOccurrences(
currentContent,
trimmedTargetString,
);
const isMatch = allowMultiple
? trimmedTargetOccurrences > 0
: trimmedTargetOccurrences === 1;
if (isMatch) {
const trimmedReactiveString =
trimPreservingTrailingNewline(trimIfTargetTrims);
return {
targetString: trimmedTargetString,
pair: trimmedReactiveString,
};
}
}
return {
targetString: target,
pair: trimIfTargetTrims,
};
}
/**
* Unescapes a string that might have been overly escaped by an LLM.
*/
@@ -770,23 +178,6 @@ export function unescapeStringForGeminiBug(inputString: string): string {
);
}
/**
* Counts occurrences of a substring in a string
*/
export function countOccurrences(str: string, substr: string): number {
if (substr === '') {
return 0;
}
let count = 0;
let pos = str.indexOf(substr);
while (pos !== -1) {
count++;
pos = str.indexOf(substr, pos + substr.length); // Start search after the current match
}
return count;
}
export function resetEditCorrectorCaches_TEST_ONLY() {
editCorrectionCache.clear();
fileContentCorrectionCache.clear();
}