From 4b7ce1fe67516eba17b63e2db1947b55ecd5cccb Mon Sep 17 00:00:00 2001 From: Tommaso Sciortino Date: Thu, 26 Feb 2026 17:50:21 -0800 Subject: [PATCH] Avoid overaggressive unescaping (#20520) --- .../src/tools/confirmation-policy.test.ts | 1 + packages/core/src/tools/line-endings.test.ts | 17 +- packages/core/src/tools/write-file.test.ts | 160 ++--- packages/core/src/tools/write-file.ts | 43 +- packages/core/src/utils/editCorrector.test.ts | 631 +----------------- packages/core/src/utils/editCorrector.ts | 627 +---------------- 6 files changed, 110 insertions(+), 1369 deletions(-) diff --git a/packages/core/src/tools/confirmation-policy.test.ts b/packages/core/src/tools/confirmation-policy.test.ts index c6ad1f5e94..a20bb611e3 100644 --- a/packages/core/src/tools/confirmation-policy.test.ts +++ b/packages/core/src/tools/confirmation-policy.test.ts @@ -67,6 +67,7 @@ describe('Tool Confirmation Policy Updates', () => { getBaseLlmClient: () => ({}), getDisableLLMCorrection: () => true, getIdeMode: () => false, + getActiveModel: () => 'test-model', getWorkspaceContext: () => ({ isPathWithinWorkspace: () => true, getDirectories: () => [rootDir], diff --git a/packages/core/src/tools/line-endings.test.ts b/packages/core/src/tools/line-endings.test.ts index f62d684712..981e602b5b 100644 --- a/packages/core/src/tools/line-endings.test.ts +++ b/packages/core/src/tools/line-endings.test.ts @@ -25,10 +25,7 @@ import fs from 'node:fs'; import os from 'node:os'; import { GeminiClient } from '../core/client.js'; import type { BaseLlmClient } from '../core/baseLlmClient.js'; -import { - ensureCorrectEdit, - ensureCorrectFileContent, -} from '../utils/editCorrector.js'; +import { ensureCorrectFileContent } from '../utils/editCorrector.js'; import { StandardFileSystemService } from '../services/fileSystemService.js'; import { WorkspaceContext } from '../utils/workspaceContext.js'; import { @@ -52,7 +49,6 @@ vi.mock('../ide/ide-client.js', () => ({ let mockGeminiClientInstance: Mocked; let mockBaseLlmClientInstance: Mocked; -const mockEnsureCorrectEdit = vi.fn(); const mockEnsureCorrectFileContent = vi.fn(); // Mock Config @@ -81,6 +77,7 @@ const mockConfigInternal = { getGeminiMdFileCount: () => 0, setGeminiMdFileCount: vi.fn(), getDisableLLMCorrection: vi.fn(() => false), + getActiveModel: () => 'test-model', validatePathAccess: vi.fn().mockReturnValue(null), getToolRegistry: () => ({ @@ -120,7 +117,6 @@ describe('Line Ending Preservation', () => { generateJson: vi.fn(), } as unknown as Mocked; - vi.mocked(ensureCorrectEdit).mockImplementation(mockEnsureCorrectEdit); vi.mocked(ensureCorrectFileContent).mockImplementation( mockEnsureCorrectFileContent, ); @@ -177,14 +173,7 @@ describe('Line Ending Preservation', () => { const proposedContent = 'line1\nline2\nline3\n'; // Mock corrections to return proposed content as-is (but usually normalized) - mockEnsureCorrectEdit.mockResolvedValue({ - params: { - file_path: filePath, - old_string: originalContent, - new_string: proposedContent, - }, - occurrences: 1, - }); + mockEnsureCorrectFileContent.mockResolvedValue(proposedContent); const params = { file_path: filePath, content: proposedContent }; const invocation = tool.build(params); diff --git a/packages/core/src/tools/write-file.test.ts b/packages/core/src/tools/write-file.test.ts index 84fd4d93d7..0b978f14f9 100644 --- a/packages/core/src/tools/write-file.test.ts +++ b/packages/core/src/tools/write-file.test.ts @@ -23,7 +23,6 @@ import type { ToolResult, } from './tools.js'; import { ToolConfirmationOutcome } from './tools.js'; -import { type EditToolParams } from './edit.js'; import type { Config } from '../config/config.js'; import { ApprovalMode } from '../policy/types.js'; import type { ToolRegistry } from './tool-registry.js'; @@ -33,11 +32,7 @@ import fs from 'node:fs'; import os from 'node:os'; import { GeminiClient } from '../core/client.js'; import type { BaseLlmClient } from '../core/baseLlmClient.js'; -import type { CorrectedEditResult } from '../utils/editCorrector.js'; -import { - ensureCorrectEdit, - ensureCorrectFileContent, -} from '../utils/editCorrector.js'; +import { ensureCorrectFileContent } from '../utils/editCorrector.js'; import { StandardFileSystemService } from '../services/fileSystemService.js'; import type { DiffUpdateResult } from '../ide/ide-client.js'; import { IdeClient } from '../ide/ide-client.js'; @@ -61,7 +56,6 @@ vi.mock('../ide/ide-client.js', () => ({ let mockGeminiClientInstance: Mocked; let mockBaseLlmClientInstance: Mocked; let mockConfig: Config; -const mockEnsureCorrectEdit = vi.fn(); const mockEnsureCorrectFileContent = vi.fn(); const mockIdeClient = { openDiff: vi.fn(), @@ -69,7 +63,6 @@ const mockIdeClient = { }; // Wire up the mocked functions to be used by the actual module imports -vi.mocked(ensureCorrectEdit).mockImplementation(mockEnsureCorrectEdit); vi.mocked(ensureCorrectFileContent).mockImplementation( mockEnsureCorrectFileContent, ); @@ -110,6 +103,7 @@ const mockConfigInternal = { }) as unknown as ToolRegistry, isInteractive: () => false, getDisableLLMCorrection: vi.fn(() => true), + getActiveModel: () => 'test-model', storage: { getProjectTempDir: vi.fn().mockReturnValue('/tmp/project'), }, @@ -179,7 +173,6 @@ describe('WriteFileTool', () => { generateJson: vi.fn(), } as unknown as Mocked; - vi.mocked(ensureCorrectEdit).mockImplementation(mockEnsureCorrectEdit); vi.mocked(ensureCorrectFileContent).mockImplementation( mockEnsureCorrectFileContent, ); @@ -199,28 +192,9 @@ describe('WriteFileTool', () => { // Reset mocks before each test mockConfigInternal.getApprovalMode.mockReturnValue(ApprovalMode.DEFAULT); mockConfigInternal.setApprovalMode.mockClear(); - mockEnsureCorrectEdit.mockReset(); mockEnsureCorrectFileContent.mockReset(); // Default mock implementations that return valid structures - mockEnsureCorrectEdit.mockImplementation( - async ( - filePath: string, - _currentContent: string, - params: EditToolParams, - _client: GeminiClient, - _baseClient: BaseLlmClient, - signal?: AbortSignal, - ): Promise => { - if (signal?.aborted) { - return Promise.reject(new Error('Aborted')); - } - return Promise.resolve({ - params: { ...params, new_string: params.new_string ?? '' }, - occurrences: 1, - }); - }, - ); mockEnsureCorrectFileContent.mockImplementation( async ( content: string, @@ -369,15 +343,43 @@ describe('WriteFileTool', () => { mockBaseLlmClientInstance, abortSignal, true, + true, // aggressiveUnescape ); - expect(mockEnsureCorrectEdit).not.toHaveBeenCalled(); expect(result.correctedContent).toBe(correctedContent); expect(result.originalContent).toBe(''); expect(result.fileExists).toBe(false); expect(result.error).toBeUndefined(); }); - it('should call ensureCorrectEdit for an existing file', async () => { + it('should set aggressiveUnescape to false for gemini-3 models', async () => { + const filePath = path.join(rootDir, 'gemini3_file.txt'); + const proposedContent = 'Proposed new content.'; + const abortSignal = new AbortController().signal; + + const mockGemini3Config = { + ...mockConfig, + getActiveModel: () => 'gemini-3.0-pro', + } as unknown as Config; + + mockEnsureCorrectFileContent.mockResolvedValue('Corrected new content.'); + + await getCorrectedFileContent( + mockGemini3Config, + filePath, + proposedContent, + abortSignal, + ); + + expect(mockEnsureCorrectFileContent).toHaveBeenCalledWith( + proposedContent, + mockBaseLlmClientInstance, + abortSignal, + true, + false, // aggressiveUnescape + ); + }); + + it('should call ensureCorrectFileContent for an existing file', async () => { const filePath = path.join(rootDir, 'existing_corrected_file.txt'); const originalContent = 'Original existing content.'; const proposedContent = 'Proposed replacement content.'; @@ -386,14 +388,7 @@ describe('WriteFileTool', () => { fs.writeFileSync(filePath, originalContent, 'utf8'); // Ensure this mock is active and returns the correct structure - mockEnsureCorrectEdit.mockResolvedValue({ - params: { - file_path: filePath, - old_string: originalContent, - new_string: correctedProposedContent, - }, - occurrences: 1, - } as CorrectedEditResult); + mockEnsureCorrectFileContent.mockResolvedValue(correctedProposedContent); const result = await getCorrectedFileContent( mockConfig, @@ -402,20 +397,13 @@ describe('WriteFileTool', () => { abortSignal, ); - expect(mockEnsureCorrectEdit).toHaveBeenCalledWith( - filePath, - originalContent, - { - old_string: originalContent, - new_string: proposedContent, - file_path: filePath, - }, - mockGeminiClientInstance, + expect(mockEnsureCorrectFileContent).toHaveBeenCalledWith( + proposedContent, mockBaseLlmClientInstance, abortSignal, true, + true, // aggressiveUnescape ); - expect(mockEnsureCorrectFileContent).not.toHaveBeenCalled(); expect(result.correctedContent).toBe(correctedProposedContent); expect(result.originalContent).toBe(originalContent); expect(result.fileExists).toBe(true); @@ -441,7 +429,6 @@ describe('WriteFileTool', () => { ); expect(fsService.readTextFile).toHaveBeenCalledWith(filePath); - expect(mockEnsureCorrectEdit).not.toHaveBeenCalled(); expect(mockEnsureCorrectFileContent).not.toHaveBeenCalled(); expect(result.correctedContent).toBe(proposedContent); expect(result.originalContent).toBe(''); @@ -492,6 +479,7 @@ describe('WriteFileTool', () => { mockBaseLlmClientInstance, abortSignal, true, + true, // aggressiveUnescape ); expect(confirmation).toEqual( expect.objectContaining({ @@ -516,14 +504,7 @@ describe('WriteFileTool', () => { 'Corrected replacement for confirmation.'; fs.writeFileSync(filePath, originalContent, 'utf8'); - mockEnsureCorrectEdit.mockResolvedValue({ - params: { - file_path: filePath, - old_string: originalContent, - new_string: correctedProposedContent, - }, - occurrences: 1, - }); + mockEnsureCorrectFileContent.mockResolvedValue(correctedProposedContent); const params = { file_path: filePath, content: proposedContent }; const invocation = tool.build(params); @@ -531,18 +512,12 @@ describe('WriteFileTool', () => { abortSignal, )) as ToolEditConfirmationDetails; - expect(mockEnsureCorrectEdit).toHaveBeenCalledWith( - filePath, - originalContent, - { - old_string: originalContent, - new_string: proposedContent, - file_path: filePath, - }, - mockGeminiClientInstance, + expect(mockEnsureCorrectFileContent).toHaveBeenCalledWith( + proposedContent, mockBaseLlmClientInstance, abortSignal, true, + true, // aggressiveUnescape ); expect(confirmation).toEqual( expect.objectContaining({ @@ -738,6 +713,7 @@ describe('WriteFileTool', () => { mockBaseLlmClientInstance, abortSignal, true, + true, // aggressiveUnescape ); expect(result.llmContent).toMatch( /Successfully created and wrote to new file/, @@ -768,14 +744,7 @@ describe('WriteFileTool', () => { const correctedProposedContent = 'Corrected overwrite for execute.'; fs.writeFileSync(filePath, initialContent, 'utf8'); - mockEnsureCorrectEdit.mockResolvedValue({ - params: { - file_path: filePath, - old_string: initialContent, - new_string: correctedProposedContent, - }, - occurrences: 1, - }); + mockEnsureCorrectFileContent.mockResolvedValue(correctedProposedContent); const params = { file_path: filePath, content: proposedContent }; const invocation = tool.build(params); @@ -784,18 +753,12 @@ describe('WriteFileTool', () => { const result = await invocation.execute(abortSignal); - expect(mockEnsureCorrectEdit).toHaveBeenCalledWith( - filePath, - initialContent, - { - old_string: initialContent, - new_string: proposedContent, - file_path: filePath, - }, - mockGeminiClientInstance, + expect(mockEnsureCorrectFileContent).toHaveBeenCalledWith( + proposedContent, mockBaseLlmClientInstance, abortSignal, true, + true, // aggressiveUnescape ); expect(result.llmContent).toMatch(/Successfully overwrote file/); const writtenContent = await fsService.readTextFile(filePath); @@ -892,14 +855,7 @@ describe('WriteFileTool', () => { newLines[50] = 'Line 51 Modified'; // Modify one line in the middle const newContent = newLines.join('\n'); - mockEnsureCorrectEdit.mockResolvedValue({ - params: { - file_path: filePath, - old_string: originalContent, - new_string: newContent, - }, - occurrences: 1, - }); + mockEnsureCorrectFileContent.mockResolvedValue(newContent); const params = { file_path: filePath, content: newContent }; const invocation = tool.build(params); @@ -1072,13 +1028,13 @@ describe('WriteFileTool', () => { mockBaseLlmClientInstance, abortSignal, true, + true, // aggressiveUnescape ); - expect(mockEnsureCorrectEdit).not.toHaveBeenCalled(); expect(result.correctedContent).toBe(proposedContent); expect(result.fileExists).toBe(false); }); - it('should call ensureCorrectEdit with disableLLMCorrection=true for an existing file when disabled', async () => { + it('should call ensureCorrectFileContent with disableLLMCorrection=true for an existing file when disabled', async () => { const filePath = path.join(rootDir, 'existing_file_no_correction.txt'); const originalContent = 'Original content.'; const proposedContent = 'Proposed content.'; @@ -1086,14 +1042,7 @@ describe('WriteFileTool', () => { mockConfigInternal.getDisableLLMCorrection.mockReturnValue(true); // Ensure the mock returns the content passed to it - mockEnsureCorrectEdit.mockResolvedValue({ - params: { - file_path: filePath, - old_string: originalContent, - new_string: proposedContent, - }, - occurrences: 1, - }); + mockEnsureCorrectFileContent.mockResolvedValue(proposedContent); const result = await getCorrectedFileContent( mockConfig, @@ -1102,16 +1051,13 @@ describe('WriteFileTool', () => { abortSignal, ); - expect(mockEnsureCorrectEdit).toHaveBeenCalledWith( - filePath, - originalContent, - expect.anything(), // params object - mockGeminiClientInstance, + expect(mockEnsureCorrectFileContent).toHaveBeenCalledWith( + proposedContent, mockBaseLlmClientInstance, abortSignal, true, + true, // aggressiveUnescape ); - expect(mockEnsureCorrectFileContent).not.toHaveBeenCalled(); expect(result.correctedContent).toBe(proposedContent); expect(result.originalContent).toBe(originalContent); expect(result.fileExists).toBe(true); diff --git a/packages/core/src/tools/write-file.ts b/packages/core/src/tools/write-file.ts index d7708d767a..1c8a230001 100644 --- a/packages/core/src/tools/write-file.ts +++ b/packages/core/src/tools/write-file.ts @@ -26,10 +26,7 @@ import { BaseDeclarativeTool, BaseToolInvocation, Kind } from './tools.js'; import { ToolErrorType } from './tool-error.js'; import { makeRelative, shortenPath } from '../utils/paths.js'; import { getErrorMessage, isNodeError } from '../utils/errors.js'; -import { - ensureCorrectEdit, - ensureCorrectFileContent, -} from '../utils/editCorrector.js'; +import { ensureCorrectFileContent } from '../utils/editCorrector.js'; import { detectLineEnding } from '../utils/textUtils.js'; import { DEFAULT_DIFF_OPTIONS, getDiffStat } from './diffOptions.js'; import { getDiffContextSnippet } from './diff-utils.js'; @@ -48,6 +45,7 @@ import { debugLogger } from '../utils/debugLogger.js'; import { WRITE_FILE_DEFINITION } from './definitions/coreTools.js'; import { resolveToolDeclaration } from './definitions/resolver.js'; import { detectOmissionPlaceholders } from './omissionPlaceholderDetector.js'; +import { isGemini3Model } from '../config/models.js'; /** * Parameters for the WriteFile tool @@ -113,35 +111,16 @@ export async function getCorrectedFileContent( } } - // If readError is set, we have returned. - // So, file was either read successfully (fileExists=true, originalContent set) - // or it was ENOENT (fileExists=false, originalContent=''). + const aggressiveUnescape = !isGemini3Model(config.getActiveModel()); + + correctedContent = await ensureCorrectFileContent( + proposedContent, + config.getBaseLlmClient(), + abortSignal, + config.getDisableLLMCorrection(), + aggressiveUnescape, + ); - if (fileExists) { - // This implies originalContent is available - const { params: correctedParams } = await ensureCorrectEdit( - filePath, - originalContent, - { - old_string: originalContent, // Treat entire current content as old_string - new_string: proposedContent, - file_path: filePath, - }, - config.getGeminiClient(), - config.getBaseLlmClient(), - abortSignal, - config.getDisableLLMCorrection(), - ); - correctedContent = correctedParams.new_string; - } else { - // This implies new file (ENOENT) - correctedContent = await ensureCorrectFileContent( - proposedContent, - config.getBaseLlmClient(), - abortSignal, - config.getDisableLLMCorrection(), - ); - } return { originalContent, correctedContent, fileExists }; } diff --git a/packages/core/src/utils/editCorrector.test.ts b/packages/core/src/utils/editCorrector.test.ts index 35b126a5ea..533b49b9e4 100644 --- a/packages/core/src/utils/editCorrector.test.ts +++ b/packages/core/src/utils/editCorrector.test.ts @@ -5,10 +5,8 @@ */ /* eslint-disable @typescript-eslint/no-explicit-any */ -import type { Mock, Mocked } from 'vitest'; +import type { Mocked } from 'vitest'; import { vi, describe, it, expect, beforeEach } from 'vitest'; -import * as fs from 'node:fs'; -import { EDIT_TOOL_NAME } from '../tools/tool-names.js'; import type { BaseLlmClient } from '../core/baseLlmClient.js'; // MOCKS @@ -16,75 +14,16 @@ let callCount = 0; const mockResponses: any[] = []; let mockGenerateJson: any; -let mockStartChat: any; -let mockSendMessageStream: any; -vi.mock('fs', () => ({ - statSync: vi.fn(), - mkdirSync: vi.fn(), - createWriteStream: vi.fn(() => ({ - write: vi.fn(), - on: vi.fn(), - })), -})); - -vi.mock('../core/client.js', () => ({ - GeminiClient: vi.fn().mockImplementation(function ( - this: any, - _config: Config, - ) { - this.startChat = (...params: any[]) => mockStartChat(...params); - this.sendMessageStream = (...params: any[]) => - mockSendMessageStream(...params); - return this; - }), -})); // END MOCKS import { - countOccurrences, - ensureCorrectEdit, ensureCorrectFileContent, unescapeStringForGeminiBug, resetEditCorrectorCaches_TEST_ONLY, } from './editCorrector.js'; -import { GeminiClient } from '../core/client.js'; -import type { Config } from '../config/config.js'; -import { ToolRegistry } from '../tools/tool-registry.js'; - -vi.mock('../tools/tool-registry.js'); describe('editCorrector', () => { - describe('countOccurrences', () => { - it('should return 0 for empty string', () => { - expect(countOccurrences('', 'a')).toBe(0); - }); - it('should return 0 for empty substring', () => { - expect(countOccurrences('abc', '')).toBe(0); - }); - it('should return 0 if substring is not found', () => { - expect(countOccurrences('abc', 'd')).toBe(0); - }); - it('should return 1 if substring is found once', () => { - expect(countOccurrences('abc', 'b')).toBe(1); - }); - it('should return correct count for multiple occurrences', () => { - expect(countOccurrences('ababa', 'a')).toBe(3); - expect(countOccurrences('ababab', 'ab')).toBe(3); - }); - it('should count non-overlapping occurrences', () => { - expect(countOccurrences('aaaaa', 'aa')).toBe(2); - expect(countOccurrences('ababab', 'aba')).toBe(1); - }); - it('should correctly count occurrences when substring is longer', () => { - expect(countOccurrences('abc', 'abcdef')).toBe(0); - }); - it('should be case-sensitive', () => { - expect(countOccurrences('abcABC', 'a')).toBe(1); - expect(countOccurrences('abcABC', 'A')).toBe(1); - }); - }); - describe('unescapeStringForGeminiBug', () => { it('should unescape common sequences', () => { expect(unescapeStringForGeminiBug('\\n')).toBe('\n'); @@ -156,542 +95,6 @@ describe('editCorrector', () => { }); }); - describe('ensureCorrectEdit', () => { - let mockGeminiClientInstance: Mocked; - let mockBaseLlmClientInstance: Mocked; - let mockToolRegistry: Mocked; - let mockConfigInstance: Config; - const abortSignal = new AbortController().signal; - - beforeEach(() => { - mockToolRegistry = new ToolRegistry( - {} as Config, - {} as any, - ) as Mocked; - const configParams = { - apiKey: 'test-api-key', - model: 'test-model', - sandbox: false as boolean | string, - targetDir: '/test', - debugMode: false, - question: undefined as string | undefined, - - coreTools: undefined as string[] | undefined, - toolDiscoveryCommand: undefined as string | undefined, - toolCallCommand: undefined as string | undefined, - mcpServerCommand: undefined as string | undefined, - mcpServers: undefined as Record | undefined, - userAgent: 'test-agent', - userMemory: '', - geminiMdFileCount: 0, - alwaysSkipModificationConfirmation: false, - }; - mockConfigInstance = { - ...configParams, - getApiKey: vi.fn(() => configParams.apiKey), - getModel: vi.fn(() => configParams.model), - getSandbox: vi.fn(() => configParams.sandbox), - getTargetDir: vi.fn(() => configParams.targetDir), - getToolRegistry: vi.fn(() => mockToolRegistry), - getDebugMode: vi.fn(() => configParams.debugMode), - getQuestion: vi.fn(() => configParams.question), - - getCoreTools: vi.fn(() => configParams.coreTools), - getToolDiscoveryCommand: vi.fn(() => configParams.toolDiscoveryCommand), - getToolCallCommand: vi.fn(() => configParams.toolCallCommand), - getMcpServerCommand: vi.fn(() => configParams.mcpServerCommand), - getMcpServers: vi.fn(() => configParams.mcpServers), - getUserAgent: vi.fn(() => configParams.userAgent), - getUserMemory: vi.fn(() => configParams.userMemory), - setUserMemory: vi.fn((mem: string) => { - configParams.userMemory = mem; - }), - getGeminiMdFileCount: vi.fn(() => configParams.geminiMdFileCount), - setGeminiMdFileCount: vi.fn((count: number) => { - configParams.geminiMdFileCount = count; - }), - getAlwaysSkipModificationConfirmation: vi.fn( - () => configParams.alwaysSkipModificationConfirmation, - ), - setAlwaysSkipModificationConfirmation: vi.fn((skip: boolean) => { - configParams.alwaysSkipModificationConfirmation = skip; - }), - getQuotaErrorOccurred: vi.fn().mockReturnValue(false), - setQuotaErrorOccurred: vi.fn(), - } as unknown as Config; - - callCount = 0; - mockResponses.length = 0; - mockGenerateJson = vi - .fn() - .mockImplementation((_contents, _schema, signal) => { - // Check if the signal is aborted. If so, throw an error or return a specific response. - if (signal && signal.aborted) { - return Promise.reject(new Error('Aborted')); // Or some other specific error/response - } - const response = mockResponses[callCount]; - callCount++; - if (response === undefined) return Promise.resolve({}); - return Promise.resolve(response); - }); - mockStartChat = vi.fn(); - mockSendMessageStream = vi.fn(); - - mockGeminiClientInstance = new GeminiClient( - mockConfigInstance, - ) as Mocked; - mockGeminiClientInstance.getHistory = vi.fn().mockReturnValue([]); - mockBaseLlmClientInstance = { - generateJson: mockGenerateJson, - config: { - generationConfigService: { - getResolvedConfig: vi.fn().mockReturnValue({ - model: 'edit-corrector', - generateContentConfig: {}, - }), - }, - }, - } as unknown as Mocked; - resetEditCorrectorCaches_TEST_ONLY(); - }); - - describe('Scenario Group 1: originalParams.old_string matches currentContent directly', () => { - it('Test 1.1: old_string (no literal \\), new_string (escaped by Gemini) -> new_string unescaped', async () => { - const currentContent = 'This is a test string to find me.'; - const originalParams = { - file_path: '/test/file.txt', - old_string: 'find me', - new_string: 'replace with \\"this\\"', - }; - mockResponses.push({ - corrected_new_string_escaping: 'replace with "this"', - }); - const result = await ensureCorrectEdit( - '/test/file.txt', - currentContent, - originalParams, - mockGeminiClientInstance, - mockBaseLlmClientInstance, - abortSignal, - false, - ); - expect(mockGenerateJson).toHaveBeenCalledTimes(1); - expect(result.params.new_string).toBe('replace with "this"'); - expect(result.params.old_string).toBe('find me'); - expect(result.occurrences).toBe(1); - }); - it('Test 1.2: old_string (no literal \\), new_string (correctly formatted) -> new_string unchanged', async () => { - const currentContent = 'This is a test string to find me.'; - const originalParams = { - file_path: '/test/file.txt', - old_string: 'find me', - new_string: 'replace with this', - }; - const result = await ensureCorrectEdit( - '/test/file.txt', - currentContent, - originalParams, - mockGeminiClientInstance, - mockBaseLlmClientInstance, - abortSignal, - false, - ); - expect(mockGenerateJson).toHaveBeenCalledTimes(0); - expect(result.params.new_string).toBe('replace with this'); - expect(result.params.old_string).toBe('find me'); - expect(result.occurrences).toBe(1); - }); - it('Test 1.3: old_string (with literal \\), new_string (escaped by Gemini) -> new_string unchanged (still escaped)', async () => { - const currentContent = 'This is a test string to find\\me.'; - const originalParams = { - file_path: '/test/file.txt', - old_string: 'find\\me', - new_string: 'replace with \\"this\\"', - }; - mockResponses.push({ - corrected_new_string_escaping: 'replace with "this"', - }); - const result = await ensureCorrectEdit( - '/test/file.txt', - currentContent, - originalParams, - mockGeminiClientInstance, - mockBaseLlmClientInstance, - abortSignal, - false, - ); - expect(mockGenerateJson).toHaveBeenCalledTimes(1); - expect(result.params.new_string).toBe('replace with "this"'); - expect(result.params.old_string).toBe('find\\me'); - expect(result.occurrences).toBe(1); - }); - it('Test 1.4: old_string (with literal \\), new_string (correctly formatted) -> new_string unchanged', async () => { - const currentContent = 'This is a test string to find\\me.'; - const originalParams = { - file_path: '/test/file.txt', - old_string: 'find\\me', - new_string: 'replace with this', - }; - const result = await ensureCorrectEdit( - '/test/file.txt', - currentContent, - originalParams, - mockGeminiClientInstance, - mockBaseLlmClientInstance, - abortSignal, - false, - ); - expect(mockGenerateJson).toHaveBeenCalledTimes(0); - expect(result.params.new_string).toBe('replace with this'); - expect(result.params.old_string).toBe('find\\me'); - expect(result.occurrences).toBe(1); - }); - }); - - describe('Scenario Group 2: originalParams.old_string does NOT match, but unescapeStringForGeminiBug(originalParams.old_string) DOES match', () => { - it('Test 2.1: old_string (over-escaped, no intended literal \\), new_string (escaped by Gemini) -> new_string unescaped', async () => { - const currentContent = 'This is a test string to find "me".'; - const originalParams = { - file_path: '/test/file.txt', - old_string: 'find \\"me\\"', - new_string: 'replace with \\"this\\"', - }; - mockResponses.push({ corrected_new_string: 'replace with "this"' }); - const result = await ensureCorrectEdit( - '/test/file.txt', - currentContent, - originalParams, - mockGeminiClientInstance, - mockBaseLlmClientInstance, - abortSignal, - false, - ); - expect(mockGenerateJson).toHaveBeenCalledTimes(1); - expect(result.params.new_string).toBe('replace with "this"'); - expect(result.params.old_string).toBe('find "me"'); - expect(result.occurrences).toBe(1); - }); - it('Test 2.2: old_string (over-escaped, no intended literal \\), new_string (correctly formatted) -> new_string unescaped (harmlessly)', async () => { - const currentContent = 'This is a test string to find "me".'; - const originalParams = { - file_path: '/test/file.txt', - old_string: 'find \\"me\\"', - new_string: 'replace with this', - }; - const result = await ensureCorrectEdit( - '/test/file.txt', - currentContent, - originalParams, - mockGeminiClientInstance, - mockBaseLlmClientInstance, - abortSignal, - false, - ); - expect(mockGenerateJson).toHaveBeenCalledTimes(0); - expect(result.params.new_string).toBe('replace with this'); - expect(result.params.old_string).toBe('find "me"'); - expect(result.occurrences).toBe(1); - }); - it('Test 2.3: old_string (over-escaped, with intended literal \\), new_string (simple) -> new_string corrected', async () => { - const currentContent = 'This is a test string to find \\me.'; - const originalParams = { - file_path: '/test/file.txt', - old_string: 'find \\\\me', - new_string: 'replace with foobar', - }; - const result = await ensureCorrectEdit( - '/test/file.txt', - currentContent, - originalParams, - mockGeminiClientInstance, - mockBaseLlmClientInstance, - abortSignal, - false, - ); - expect(mockGenerateJson).toHaveBeenCalledTimes(0); - expect(result.params.new_string).toBe('replace with foobar'); - expect(result.params.old_string).toBe('find \\me'); - expect(result.occurrences).toBe(1); - }); - }); - - describe('Scenario Group 3: LLM Correction Path', () => { - it('Test 3.1: old_string (no literal \\), new_string (escaped by Gemini), LLM re-escapes new_string -> final new_string is double unescaped', async () => { - const currentContent = 'This is a test string to corrected find me.'; - const originalParams = { - file_path: '/test/file.txt', - old_string: 'find me', - new_string: 'replace with \\\\"this\\\\"', - }; - const llmNewString = 'LLM says replace with "that"'; - mockResponses.push({ corrected_new_string_escaping: llmNewString }); - const result = await ensureCorrectEdit( - '/test/file.txt', - currentContent, - originalParams, - mockGeminiClientInstance, - mockBaseLlmClientInstance, - abortSignal, - false, - ); - expect(mockGenerateJson).toHaveBeenCalledTimes(1); - expect(result.params.new_string).toBe(llmNewString); - expect(result.params.old_string).toBe('find me'); - expect(result.occurrences).toBe(1); - }); - it('Test 3.2: old_string (with literal \\), new_string (escaped by Gemini), LLM re-escapes new_string -> final new_string is unescaped once', async () => { - const currentContent = 'This is a test string to corrected find me.'; - const originalParams = { - file_path: '/test/file.txt', - old_string: 'find\\me', - new_string: 'replace with \\\\"this\\\\"', - }; - const llmCorrectedOldString = 'corrected find me'; - const llmNewString = 'LLM says replace with "that"'; - mockResponses.push({ corrected_target_snippet: llmCorrectedOldString }); - mockResponses.push({ corrected_new_string: llmNewString }); - const result = await ensureCorrectEdit( - '/test/file.txt', - currentContent, - originalParams, - mockGeminiClientInstance, - mockBaseLlmClientInstance, - abortSignal, - false, - ); - expect(mockGenerateJson).toHaveBeenCalledTimes(2); - expect(result.params.new_string).toBe(llmNewString); - expect(result.params.old_string).toBe(llmCorrectedOldString); - expect(result.occurrences).toBe(1); - }); - it('Test 3.3: old_string needs LLM, new_string is fine -> old_string corrected, new_string original', async () => { - const currentContent = 'This is a test string to be corrected.'; - const originalParams = { - file_path: '/test/file.txt', - old_string: 'fiiind me', - new_string: 'replace with "this"', - }; - const llmCorrectedOldString = 'to be corrected'; - mockResponses.push({ corrected_target_snippet: llmCorrectedOldString }); - const result = await ensureCorrectEdit( - '/test/file.txt', - currentContent, - originalParams, - mockGeminiClientInstance, - mockBaseLlmClientInstance, - abortSignal, - false, - ); - expect(mockGenerateJson).toHaveBeenCalledTimes(1); - expect(result.params.new_string).toBe('replace with "this"'); - expect(result.params.old_string).toBe(llmCorrectedOldString); - expect(result.occurrences).toBe(1); - }); - it('Test 3.4: LLM correction path, correctNewString returns the originalNewString it was passed (which was unescaped) -> final new_string is unescaped', async () => { - const currentContent = 'This is a test string to corrected find me.'; - const originalParams = { - file_path: '/test/file.txt', - old_string: 'find me', - new_string: 'replace with \\\\"this\\\\"', - }; - const newStringForLLMAndReturnedByLLM = 'replace with "this"'; - mockResponses.push({ - corrected_new_string_escaping: newStringForLLMAndReturnedByLLM, - }); - const result = await ensureCorrectEdit( - '/test/file.txt', - currentContent, - originalParams, - mockGeminiClientInstance, - mockBaseLlmClientInstance, - abortSignal, - false, - ); - expect(mockGenerateJson).toHaveBeenCalledTimes(1); - expect(result.params.new_string).toBe(newStringForLLMAndReturnedByLLM); - expect(result.occurrences).toBe(1); - }); - }); - - describe('Scenario Group 4: No Match Found / Multiple Matches', () => { - it('Test 4.1: No version of old_string (original, unescaped, LLM-corrected) matches -> returns original params, 0 occurrences', async () => { - const currentContent = 'This content has nothing to find.'; - const originalParams = { - file_path: '/test/file.txt', - old_string: 'nonexistent string', - new_string: 'some new string', - }; - mockResponses.push({ corrected_target_snippet: 'still nonexistent' }); - const result = await ensureCorrectEdit( - '/test/file.txt', - currentContent, - originalParams, - mockGeminiClientInstance, - mockBaseLlmClientInstance, - abortSignal, - false, - ); - expect(mockGenerateJson).toHaveBeenCalledTimes(1); - expect(result.params).toEqual(originalParams); - expect(result.occurrences).toBe(0); - }); - it('Test 4.2: unescapedOldStringAttempt results in >1 occurrences -> returns original params, count occurrences', async () => { - const currentContent = - 'This content has find "me" and also find "me" again.'; - const originalParams = { - file_path: '/test/file.txt', - old_string: 'find "me"', - new_string: 'some new string', - }; - const result = await ensureCorrectEdit( - '/test/file.txt', - currentContent, - originalParams, - mockGeminiClientInstance, - mockBaseLlmClientInstance, - abortSignal, - false, - ); - expect(mockGenerateJson).toHaveBeenCalledTimes(0); - expect(result.params).toEqual(originalParams); - expect(result.occurrences).toBe(2); - }); - }); - - describe('Scenario Group 5: Specific unescapeStringForGeminiBug checks (integrated into ensureCorrectEdit)', () => { - it('Test 5.1: old_string needs LLM to become currentContent, new_string also needs correction', async () => { - const currentContent = 'const x = "a\nbc\\"def\\"'; - const originalParams = { - file_path: '/test/file.txt', - old_string: 'const x = \\"a\\nbc\\\\"def\\\\"', - new_string: 'const y = \\"new\\nval\\\\"content\\\\"', - }; - const expectedFinalNewString = 'const y = "new\nval\\"content\\"'; - mockResponses.push({ corrected_target_snippet: currentContent }); - mockResponses.push({ corrected_new_string: expectedFinalNewString }); - const result = await ensureCorrectEdit( - '/test/file.txt', - currentContent, - originalParams, - mockGeminiClientInstance, - mockBaseLlmClientInstance, - abortSignal, - false, - ); - expect(mockGenerateJson).toHaveBeenCalledTimes(2); - expect(result.params.old_string).toBe(currentContent); - expect(result.params.new_string).toBe(expectedFinalNewString); - expect(result.occurrences).toBe(1); - }); - }); - - describe('Scenario Group 6: Concurrent Edits', () => { - it('Test 6.1: should return early if file was modified by another process', async () => { - const filePath = '/test/file.txt'; - const currentContent = - 'This content has been modified by someone else.'; - const originalParams = { - file_path: filePath, - old_string: 'nonexistent string', - new_string: 'some new string', - }; - - const now = Date.now(); - const lastEditTime = now - 5000; // 5 seconds ago - - // Mock the file's modification time to be recent - vi.spyOn(fs, 'statSync').mockReturnValue({ - mtimeMs: now, - } as fs.Stats); - - // Mock the last edit timestamp from our history to be in the past - const history = [ - { - role: 'model', - parts: [ - { - functionResponse: { - name: EDIT_TOOL_NAME, - id: `${EDIT_TOOL_NAME}-${lastEditTime}-123`, - response: { - output: { - llmContent: `Successfully modified file: ${filePath}`, - }, - }, - }, - }, - ], - }, - ]; - (mockGeminiClientInstance.getHistory as Mock).mockReturnValue(history); - - const result = await ensureCorrectEdit( - filePath, - currentContent, - originalParams, - mockGeminiClientInstance, - mockBaseLlmClientInstance, - abortSignal, - false, - ); - - expect(result.occurrences).toBe(0); - expect(result.params).toEqual(originalParams); - }); - }); - - describe('Scenario Group 7: Trimming with Newline Preservation', () => { - it('Test 7.1: should preserve trailing newlines in new_string when trimming is applied', async () => { - const currentContent = ' find me'; // Matches old_string initially - const originalParams = { - file_path: '/test/file.txt', - old_string: ' find me', // Matches, but has whitespace to trim - new_string: ' replaced\n\n', // Needs trimming but preserve newlines - }; - - const result = await ensureCorrectEdit( - '/test/file.txt', - currentContent, - originalParams, - mockGeminiClientInstance, - mockBaseLlmClientInstance, - abortSignal, - false, - ); - - // old_string should be trimmed to 'find me' because 'find me' also exists uniquely in ' find me' - expect(result.params.old_string).toBe('find me'); - // new_string should be trimmed of spaces but keep ALL newlines - expect(result.params.new_string).toBe('replaced\n\n'); - expect(result.occurrences).toBe(1); - }); - - it('Test 7.2: should handle trailing newlines separated by spaces (regression fix)', async () => { - const currentContent = 'find me '; // Matches old_string initially - const originalParams = { - file_path: '/test/file.txt', - old_string: 'find me ', // Trailing space - new_string: 'replaced \n \n', // Trailing newlines with spaces - }; - - const result = await ensureCorrectEdit( - '/test/file.txt', - currentContent, - originalParams, - mockGeminiClientInstance, - mockBaseLlmClientInstance, - abortSignal, - false, - ); - - expect(result.params.old_string).toBe('find me'); - // Should capture both newlines and join them, stripping the space between - expect(result.params.new_string).toBe('replaced\n\n'); - expect(result.occurrences).toBe(1); - }); - }); - }); - describe('ensureCorrectFileContent', () => { let mockBaseLlmClientInstance: Mocked; const abortSignal = new AbortController().signal; @@ -811,5 +214,37 @@ describe('editCorrector', () => { expect(result).toBe(correctedContent); }); + + it('should return unescaped content when LLM is disabled and aggressiveUnescape is true', async () => { + const content = 'LaTeX command \\\\title{Example}'; + // unescapeStringForGeminiBug would change \\\\title to \title (literal tab and "itle") + const expected = 'LaTeX command \title{Example}'; + + const result = await ensureCorrectFileContent( + content, + mockBaseLlmClientInstance, + abortSignal, + true, // disableLLMCorrection + true, // aggressiveUnescape + ); + + expect(result).toBe(expected); + expect(mockGenerateJson).not.toHaveBeenCalled(); + }); + + it('should return original content when LLM is disabled and aggressiveUnescape is false', async () => { + const content = 'LaTeX command \\\\title{Example}'; + + const result = await ensureCorrectFileContent( + content, + mockBaseLlmClientInstance, + abortSignal, + true, // disableLLMCorrection + false, // aggressiveUnescape + ); + + expect(result).toBe(content); + expect(mockGenerateJson).not.toHaveBeenCalled(); + }); }); }); diff --git a/packages/core/src/utils/editCorrector.ts b/packages/core/src/utils/editCorrector.ts index 660bff0b17..f8ff81b97e 100644 --- a/packages/core/src/utils/editCorrector.ts +++ b/packages/core/src/utils/editCorrector.ts @@ -5,21 +5,7 @@ */ import type { Content } from '@google/genai'; -import type { GeminiClient } from '../core/client.js'; import type { BaseLlmClient } from '../core/baseLlmClient.js'; -import type { EditToolParams } from '../tools/edit.js'; -import { - EDIT_TOOL_NAME, - GREP_TOOL_NAME, - READ_FILE_TOOL_NAME, - READ_MANY_FILES_TOOL_NAME, - WRITE_FILE_TOOL_NAME, -} from '../tools/tool-names.js'; -import { - isFunctionResponse, - isFunctionCall, -} from '../utils/messageInspectors.js'; -import * as fs from 'node:fs'; import { promptIdContext } from './promptIdContext.js'; import { debugLogger } from './debugLogger.js'; import { LRUCache } from 'mnemonist'; @@ -39,336 +25,34 @@ function getPromptId(): string { const MAX_CACHE_SIZE = 50; -// Cache for ensureCorrectEdit results -const editCorrectionCache = new LRUCache( - MAX_CACHE_SIZE, -); - // Cache for ensureCorrectFileContent results const fileContentCorrectionCache = new LRUCache(MAX_CACHE_SIZE); -/** - * Defines the structure of the parameters within CorrectedEditResult - */ -interface CorrectedEditParams { - file_path: string; - old_string: string; - new_string: string; -} - -/** - * Defines the result structure for ensureCorrectEdit. - */ -export interface CorrectedEditResult { - params: CorrectedEditParams; - occurrences: number; -} - -/** - * Extracts the timestamp from the .id value, which is in format - * -- - * @param fcnId the ID value of a functionCall or functionResponse object - * @returns -1 if the timestamp could not be extracted, else the timestamp (as a number) - */ -function getTimestampFromFunctionId(fcnId: string): number { - const idParts = fcnId.split('-'); - if (idParts.length > 2) { - const timestamp = parseInt(idParts[1], 10); - if (!isNaN(timestamp)) { - return timestamp; - } - } - return -1; -} - -/** - * Will look through the gemini client history and determine when the most recent - * edit to a target file occurred. If no edit happened, it will return -1 - * @param filePath the path to the file - * @param client the geminiClient, so that we can get the history - * @returns a DateTime (as a number) of when the last edit occurred, or -1 if no edit was found. - */ -async function findLastEditTimestamp( - filePath: string, - client: GeminiClient, -): Promise { - const history = client.getHistory() ?? []; - - // Tools that may reference the file path in their FunctionResponse `output`. - const toolsInResp = new Set([ - WRITE_FILE_TOOL_NAME, - EDIT_TOOL_NAME, - READ_MANY_FILES_TOOL_NAME, - GREP_TOOL_NAME, - ]); - // Tools that may reference the file path in their FunctionCall `args`. - const toolsInCall = new Set([...toolsInResp, READ_FILE_TOOL_NAME]); - - // Iterate backwards to find the most recent relevant action. - for (const entry of history.slice().reverse()) { - if (!entry.parts) continue; - - for (const part of entry.parts) { - let id: string | undefined; - let content: unknown; - - // Check for a relevant FunctionCall with the file path in its arguments. - if ( - isFunctionCall(entry) && - part.functionCall?.name && - toolsInCall.has(part.functionCall.name) - ) { - id = part.functionCall.id; - content = part.functionCall.args; - } - // Check for a relevant FunctionResponse with the file path in its output. - else if ( - isFunctionResponse(entry) && - part.functionResponse?.name && - toolsInResp.has(part.functionResponse.name) - ) { - const { response } = part.functionResponse; - if (response && !('error' in response) && 'output' in response) { - id = part.functionResponse.id; - content = response['output']; - } - } - - if (!id || content === undefined) continue; - - // Use the "blunt hammer" approach to find the file path in the content. - // Note that the tool response data is inconsistent in their formatting - // with successes and errors - so, we just check for the existence - // as the best guess to if error/failed occurred with the response. - const stringified = JSON.stringify(content); - if ( - !stringified.includes('Error') && // only applicable for functionResponse - !stringified.includes('Failed') && // only applicable for functionResponse - stringified.includes(filePath) - ) { - return getTimestampFromFunctionId(id); - } - } - } - - return -1; -} - -/** - * Attempts to correct edit parameters if the original old_string is not found. - * It tries unescaping, and then LLM-based correction. - * Results are cached to avoid redundant processing. - * - * @param currentContent The current content of the file. - * @param originalParams The original EditToolParams - * @param client The GeminiClient for LLM calls. - * @returns A promise resolving to an object containing the (potentially corrected) - * EditToolParams (as CorrectedEditParams) and the final occurrences count. - */ -export async function ensureCorrectEdit( - filePath: string, - currentContent: string, - originalParams: EditToolParams, // This is the EditToolParams from edit.ts, without 'corrected' - geminiClient: GeminiClient, - baseLlmClient: BaseLlmClient, - abortSignal: AbortSignal, - disableLLMCorrection: boolean, -): Promise { - const cacheKey = `${currentContent}---${originalParams.old_string}---${originalParams.new_string}`; - const cachedResult = editCorrectionCache.get(cacheKey); - if (cachedResult) { - return cachedResult; - } - - let finalNewString = originalParams.new_string; - const newStringPotentiallyEscaped = - unescapeStringForGeminiBug(originalParams.new_string) !== - originalParams.new_string; - - const allowMultiple = originalParams.allow_multiple ?? false; - - let finalOldString = originalParams.old_string; - let occurrences = countOccurrences(currentContent, finalOldString); - - const isOccurrencesMatch = allowMultiple - ? occurrences > 0 - : occurrences === 1; - - if (isOccurrencesMatch) { - if (newStringPotentiallyEscaped && !disableLLMCorrection) { - finalNewString = await correctNewStringEscaping( - baseLlmClient, - finalOldString, - originalParams.new_string, - abortSignal, - ); - } - } else if (occurrences > 1 && !allowMultiple) { - // If user doesn't allow multiple but found multiple, return as-is (will fail validation later) - const result: CorrectedEditResult = { - params: { ...originalParams }, - occurrences, - }; - editCorrectionCache.set(cacheKey, result); - return result; - } else { - // occurrences is 0 or some other unexpected state initially - const unescapedOldStringAttempt = unescapeStringForGeminiBug( - originalParams.old_string, - ); - occurrences = countOccurrences(currentContent, unescapedOldStringAttempt); - - const isUnescapedOccurrencesMatch = allowMultiple - ? occurrences > 0 - : occurrences === 1; - - if (isUnescapedOccurrencesMatch) { - finalOldString = unescapedOldStringAttempt; - if (newStringPotentiallyEscaped && !disableLLMCorrection) { - finalNewString = await correctNewString( - baseLlmClient, - originalParams.old_string, // original old - unescapedOldStringAttempt, // corrected old - originalParams.new_string, // original new (which is potentially escaped) - abortSignal, - ); - } - } else if (occurrences === 0) { - if (filePath) { - // In order to keep from clobbering edits made outside our system, - // let's check if there was a more recent edit to the file than what - // our system has done - const lastEditedByUsTime = await findLastEditTimestamp( - filePath, - geminiClient, - ); - - // Add a 1-second buffer to account for timing inaccuracies. If the file - // was modified more than a second after the last edit tool was run, we - // can assume it was modified by something else. - if (lastEditedByUsTime > 0) { - const stats = fs.statSync(filePath); - const diff = stats.mtimeMs - lastEditedByUsTime; - if (diff > 2000) { - // Hard coded for 2 seconds - // This file was edited sooner - const result: CorrectedEditResult = { - params: { ...originalParams }, - occurrences: 0, // Explicitly 0 as LLM failed - }; - editCorrectionCache.set(cacheKey, result); - return result; - } - } - } - - if (disableLLMCorrection) { - const result: CorrectedEditResult = { - params: { ...originalParams }, - occurrences: 0, - }; - editCorrectionCache.set(cacheKey, result); - return result; - } - - const llmCorrectedOldString = await correctOldStringMismatch( - baseLlmClient, - currentContent, - unescapedOldStringAttempt, - abortSignal, - ); - const llmOldOccurrences = countOccurrences( - currentContent, - llmCorrectedOldString, - ); - - const isLlmOccurrencesMatch = allowMultiple - ? llmOldOccurrences > 0 - : llmOldOccurrences === 1; - - if (isLlmOccurrencesMatch) { - finalOldString = llmCorrectedOldString; - occurrences = llmOldOccurrences; - - if (newStringPotentiallyEscaped) { - const baseNewStringForLLMCorrection = unescapeStringForGeminiBug( - originalParams.new_string, - ); - finalNewString = await correctNewString( - baseLlmClient, - originalParams.old_string, // original old - llmCorrectedOldString, // corrected old - baseNewStringForLLMCorrection, // base new for correction - abortSignal, - ); - } - } else { - // LLM correction also failed for old_string - const result: CorrectedEditResult = { - params: { ...originalParams }, - occurrences: 0, // Explicitly 0 as LLM failed - }; - editCorrectionCache.set(cacheKey, result); - return result; - } - } else { - // Unescaping old_string resulted in > 1 occurrence but not allowMultiple - const result: CorrectedEditResult = { - params: { ...originalParams }, - occurrences, // This will be > 1 - }; - editCorrectionCache.set(cacheKey, result); - return result; - } - } - - const { targetString, pair } = trimPairIfPossible( - finalOldString, - finalNewString, - currentContent, - allowMultiple, - ); - finalOldString = targetString; - finalNewString = pair; - - // Final result construction - const result: CorrectedEditResult = { - params: { - file_path: originalParams.file_path, - old_string: finalOldString, - new_string: finalNewString, - }, - occurrences: countOccurrences(currentContent, finalOldString), // Recalculate occurrences with the final old_string - }; - editCorrectionCache.set(cacheKey, result); - return result; -} - export async function ensureCorrectFileContent( content: string, baseLlmClient: BaseLlmClient, abortSignal: AbortSignal, disableLLMCorrection: boolean = true, + aggressiveUnescape: boolean = false, ): Promise { const cachedResult = fileContentCorrectionCache.get(content); if (cachedResult) { return cachedResult; } - const contentPotentiallyEscaped = - unescapeStringForGeminiBug(content) !== content; - if (!contentPotentiallyEscaped) { + const unescapedContent = unescapeStringForGeminiBug(content); + if (unescapedContent === content) { fileContentCorrectionCache.set(content, content); return content; } if (disableLLMCorrection) { - // If we can't use LLM, we should at least use the unescaped content - // as it's likely better than the original if it was detected as potentially escaped. - // unescapeStringForGeminiBug is a heuristic, not an LLM call. - const unescaped = unescapeStringForGeminiBug(content); - fileContentCorrectionCache.set(content, unescaped); - return unescaped; + if (aggressiveUnescape) { + fileContentCorrectionCache.set(content, unescapedContent); + return unescapedContent; + } + fileContentCorrectionCache.set(content, content); + return content; } const correctedContent = await correctStringEscaping( @@ -380,242 +64,6 @@ export async function ensureCorrectFileContent( return correctedContent; } -// Define the expected JSON schema for the LLM response for old_string correction -const OLD_STRING_CORRECTION_SCHEMA: Record = { - type: 'object', - properties: { - corrected_target_snippet: { - type: 'string', - description: - 'The corrected version of the target snippet that exactly and uniquely matches a segment within the provided file content.', - }, - }, - required: ['corrected_target_snippet'], -}; - -export async function correctOldStringMismatch( - baseLlmClient: BaseLlmClient, - fileContent: string, - problematicSnippet: string, - abortSignal: AbortSignal, -): Promise { - const prompt = ` -Context: A process needs to find an exact literal, unique match for a specific text snippet within a file's content. The provided snippet failed to match exactly. This is most likely because it has been overly escaped. - -Task: Analyze the provided file content and the problematic target snippet. Identify the segment in the file content that the snippet was *most likely* intended to match. Output the *exact*, literal text of that segment from the file content. Focus *only* on removing extra escape characters and correcting formatting, whitespace, or minor differences to achieve a PERFECT literal match. The output must be the exact literal text as it appears in the file. - -Problematic target snippet: -\`\`\` -${problematicSnippet} -\`\`\` - -File Content: -\`\`\` -${fileContent} -\`\`\` - -For example, if the problematic target snippet was "\\\\\\nconst greeting = \`Hello \\\\\`\${name}\\\\\`\`;" and the file content had content that looked like "\nconst greeting = \`Hello ${'\\`'}\${name}${'\\`'}\`;", then corrected_target_snippet should likely be "\nconst greeting = \`Hello ${'\\`'}\${name}${'\\`'}\`;" to fix the incorrect escaping to match the original file content. -If the differences are only in whitespace or formatting, apply similar whitespace/formatting changes to the corrected_target_snippet. - -Return ONLY the corrected target snippet in the specified JSON format with the key 'corrected_target_snippet'. If no clear, unique match can be found, return an empty string for 'corrected_target_snippet'. -`.trim(); - - const contents: Content[] = [{ role: 'user', parts: [{ text: prompt }] }]; - - try { - const result = await baseLlmClient.generateJson({ - modelConfigKey: { model: 'edit-corrector' }, - contents, - schema: OLD_STRING_CORRECTION_SCHEMA, - abortSignal, - systemInstruction: CODE_CORRECTION_SYSTEM_PROMPT, - promptId: getPromptId(), - role: LlmRole.UTILITY_EDIT_CORRECTOR, - }); - - if ( - result && - typeof result['corrected_target_snippet'] === 'string' && - result['corrected_target_snippet'].length > 0 - ) { - return result['corrected_target_snippet']; - } else { - return problematicSnippet; - } - } catch (error) { - if (abortSignal.aborted) { - throw error; - } - - debugLogger.warn( - 'Error during LLM call for old string snippet correction:', - error, - ); - - return problematicSnippet; - } -} - -// Define the expected JSON schema for the new_string correction LLM response -const NEW_STRING_CORRECTION_SCHEMA: Record = { - type: 'object', - properties: { - corrected_new_string: { - type: 'string', - description: - 'The original_new_string adjusted to be a suitable replacement for the corrected_old_string, while maintaining the original intent of the change.', - }, - }, - required: ['corrected_new_string'], -}; - -/** - * Adjusts the new_string to align with a corrected old_string, maintaining the original intent. - */ -export async function correctNewString( - baseLlmClient: BaseLlmClient, - originalOldString: string, - correctedOldString: string, - originalNewString: string, - abortSignal: AbortSignal, -): Promise { - if (originalOldString === correctedOldString) { - return originalNewString; - } - - const prompt = ` -Context: A text replacement operation was planned. The original text to be replaced (original_old_string) was slightly different from the actual text in the file (corrected_old_string). The original_old_string has now been corrected to match the file content. -We now need to adjust the replacement text (original_new_string) so that it makes sense as a replacement for the corrected_old_string, while preserving the original intent of the change. - -original_old_string (what was initially intended to be found): -\`\`\` -${originalOldString} -\`\`\` - -corrected_old_string (what was actually found in the file and will be replaced): -\`\`\` -${correctedOldString} -\`\`\` - -original_new_string (what was intended to replace original_old_string): -\`\`\` -${originalNewString} -\`\`\` - -Task: Based on the differences between original_old_string and corrected_old_string, and the content of original_new_string, generate a corrected_new_string. This corrected_new_string should be what original_new_string would have been if it was designed to replace corrected_old_string directly, while maintaining the spirit of the original transformation. - -For example, if original_old_string was "\\\\\\nconst greeting = \`Hello \\\\\`\${name}\\\\\`\`;" and corrected_old_string is "\nconst greeting = \`Hello ${'\\`'}\${name}${'\\`'}\`;", and original_new_string was "\\\\\\nconst greeting = \`Hello \\\\\`\${name} \${lastName}\\\\\`\`;", then corrected_new_string should likely be "\nconst greeting = \`Hello ${'\\`'}\${name} \${lastName}${'\\`'}\`;" to fix the incorrect escaping. -If the differences are only in whitespace or formatting, apply similar whitespace/formatting changes to the corrected_new_string. - -Return ONLY the corrected string in the specified JSON format with the key 'corrected_new_string'. If no adjustment is deemed necessary or possible, return the original_new_string. - `.trim(); - - const contents: Content[] = [{ role: 'user', parts: [{ text: prompt }] }]; - - try { - const result = await baseLlmClient.generateJson({ - modelConfigKey: { model: 'edit-corrector' }, - contents, - schema: NEW_STRING_CORRECTION_SCHEMA, - abortSignal, - systemInstruction: CODE_CORRECTION_SYSTEM_PROMPT, - promptId: getPromptId(), - role: LlmRole.UTILITY_EDIT_CORRECTOR, - }); - - if ( - result && - typeof result['corrected_new_string'] === 'string' && - result['corrected_new_string'].length > 0 - ) { - return result['corrected_new_string']; - } else { - return originalNewString; - } - } catch (error) { - if (abortSignal.aborted) { - throw error; - } - - debugLogger.warn('Error during LLM call for new_string correction:', error); - return originalNewString; - } -} - -const CORRECT_NEW_STRING_ESCAPING_SCHEMA: Record = { - type: 'object', - properties: { - corrected_new_string_escaping: { - type: 'string', - description: - 'The new_string with corrected escaping, ensuring it is a proper replacement for the old_string, especially considering potential over-escaping issues from previous LLM generations.', - }, - }, - required: ['corrected_new_string_escaping'], -}; - -export async function correctNewStringEscaping( - baseLlmClient: BaseLlmClient, - oldString: string, - potentiallyProblematicNewString: string, - abortSignal: AbortSignal, -): Promise { - const prompt = ` -Context: A text replacement operation is planned. The text to be replaced (old_string) has been correctly identified in the file. However, the replacement text (new_string) might have been improperly escaped by a previous LLM generation (e.g. too many backslashes for newlines like \\n instead of \n, or unnecessarily quotes like \\"Hello\\" instead of "Hello"). - -old_string (this is the exact text that will be replaced): -\`\`\` -${oldString} -\`\`\` - -potentially_problematic_new_string (this is the text that should replace old_string, but MIGHT have bad escaping, or might be entirely correct): -\`\`\` -${potentiallyProblematicNewString} -\`\`\` - -Task: Analyze the potentially_problematic_new_string. If it's syntactically invalid due to incorrect escaping (e.g., "\n", "\t", "\\", "\\'", "\\""), correct the invalid syntax. The goal is to ensure the new_string, when inserted into the code, will be a valid and correctly interpreted. - -For example, if old_string is "foo" and potentially_problematic_new_string is "bar\\nbaz", the corrected_new_string_escaping should be "bar\nbaz". -If potentially_problematic_new_string is console.log(\\"Hello World\\"), it should be console.log("Hello World"). - -Return ONLY the corrected string in the specified JSON format with the key 'corrected_new_string_escaping'. If no escaping correction is needed, return the original potentially_problematic_new_string. - `.trim(); - - const contents: Content[] = [{ role: 'user', parts: [{ text: prompt }] }]; - - try { - const result = await baseLlmClient.generateJson({ - modelConfigKey: { model: 'edit-corrector' }, - contents, - schema: CORRECT_NEW_STRING_ESCAPING_SCHEMA, - abortSignal, - systemInstruction: CODE_CORRECTION_SYSTEM_PROMPT, - promptId: getPromptId(), - role: LlmRole.UTILITY_EDIT_CORRECTOR, - }); - - if ( - result && - typeof result['corrected_new_string_escaping'] === 'string' && - result['corrected_new_string_escaping'].length > 0 - ) { - return result['corrected_new_string_escaping']; - } else { - return potentiallyProblematicNewString; - } - } catch (error) { - if (abortSignal.aborted) { - throw error; - } - - debugLogger.warn( - 'Error during LLM call for new_string escaping correction:', - error, - ); - return potentiallyProblematicNewString; - } -} - const CORRECT_STRING_ESCAPING_SCHEMA: Record = { type: 'object', properties: { @@ -684,46 +132,6 @@ Return ONLY the corrected string in the specified JSON format with the key 'corr } } -function trimPreservingTrailingNewline(str: string): string { - const trimmedEnd = str.trimEnd(); - const trailingWhitespace = str.slice(trimmedEnd.length); - const trailingNewlines = trailingWhitespace.replace(/[^\r\n]/g, ''); - return str.trim() + trailingNewlines; -} - -function trimPairIfPossible( - target: string, - trimIfTargetTrims: string, - currentContent: string, - allowMultiple: boolean, -) { - const trimmedTargetString = trimPreservingTrailingNewline(target); - if (target.length !== trimmedTargetString.length) { - const trimmedTargetOccurrences = countOccurrences( - currentContent, - trimmedTargetString, - ); - - const isMatch = allowMultiple - ? trimmedTargetOccurrences > 0 - : trimmedTargetOccurrences === 1; - - if (isMatch) { - const trimmedReactiveString = - trimPreservingTrailingNewline(trimIfTargetTrims); - return { - targetString: trimmedTargetString, - pair: trimmedReactiveString, - }; - } - } - - return { - targetString: target, - pair: trimIfTargetTrims, - }; -} - /** * Unescapes a string that might have been overly escaped by an LLM. */ @@ -770,23 +178,6 @@ export function unescapeStringForGeminiBug(inputString: string): string { ); } -/** - * Counts occurrences of a substring in a string - */ -export function countOccurrences(str: string, substr: string): number { - if (substr === '') { - return 0; - } - let count = 0; - let pos = str.indexOf(substr); - while (pos !== -1) { - count++; - pos = str.indexOf(substr, pos + substr.length); // Start search after the current match - } - return count; -} - export function resetEditCorrectorCaches_TEST_ONLY() { - editCorrectionCache.clear(); fileContentCorrectionCache.clear(); }