mirror of
https://github.com/google-gemini/gemini-cli.git
synced 2026-03-30 07:51:07 -07:00
Avoid overaggressive unescaping (#20520)
This commit is contained in:
committed by
GitHub
parent
ecfa4e0437
commit
4b7ce1fe67
@@ -5,10 +5,8 @@
|
||||
*/
|
||||
|
||||
/* eslint-disable @typescript-eslint/no-explicit-any */
|
||||
import type { Mock, Mocked } from 'vitest';
|
||||
import type { Mocked } from 'vitest';
|
||||
import { vi, describe, it, expect, beforeEach } from 'vitest';
|
||||
import * as fs from 'node:fs';
|
||||
import { EDIT_TOOL_NAME } from '../tools/tool-names.js';
|
||||
import type { BaseLlmClient } from '../core/baseLlmClient.js';
|
||||
|
||||
// MOCKS
|
||||
@@ -16,75 +14,16 @@ let callCount = 0;
|
||||
const mockResponses: any[] = [];
|
||||
|
||||
let mockGenerateJson: any;
|
||||
let mockStartChat: any;
|
||||
let mockSendMessageStream: any;
|
||||
|
||||
vi.mock('fs', () => ({
|
||||
statSync: vi.fn(),
|
||||
mkdirSync: vi.fn(),
|
||||
createWriteStream: vi.fn(() => ({
|
||||
write: vi.fn(),
|
||||
on: vi.fn(),
|
||||
})),
|
||||
}));
|
||||
|
||||
vi.mock('../core/client.js', () => ({
|
||||
GeminiClient: vi.fn().mockImplementation(function (
|
||||
this: any,
|
||||
_config: Config,
|
||||
) {
|
||||
this.startChat = (...params: any[]) => mockStartChat(...params);
|
||||
this.sendMessageStream = (...params: any[]) =>
|
||||
mockSendMessageStream(...params);
|
||||
return this;
|
||||
}),
|
||||
}));
|
||||
// END MOCKS
|
||||
|
||||
import {
|
||||
countOccurrences,
|
||||
ensureCorrectEdit,
|
||||
ensureCorrectFileContent,
|
||||
unescapeStringForGeminiBug,
|
||||
resetEditCorrectorCaches_TEST_ONLY,
|
||||
} from './editCorrector.js';
|
||||
import { GeminiClient } from '../core/client.js';
|
||||
import type { Config } from '../config/config.js';
|
||||
import { ToolRegistry } from '../tools/tool-registry.js';
|
||||
|
||||
vi.mock('../tools/tool-registry.js');
|
||||
|
||||
describe('editCorrector', () => {
|
||||
describe('countOccurrences', () => {
|
||||
it('should return 0 for empty string', () => {
|
||||
expect(countOccurrences('', 'a')).toBe(0);
|
||||
});
|
||||
it('should return 0 for empty substring', () => {
|
||||
expect(countOccurrences('abc', '')).toBe(0);
|
||||
});
|
||||
it('should return 0 if substring is not found', () => {
|
||||
expect(countOccurrences('abc', 'd')).toBe(0);
|
||||
});
|
||||
it('should return 1 if substring is found once', () => {
|
||||
expect(countOccurrences('abc', 'b')).toBe(1);
|
||||
});
|
||||
it('should return correct count for multiple occurrences', () => {
|
||||
expect(countOccurrences('ababa', 'a')).toBe(3);
|
||||
expect(countOccurrences('ababab', 'ab')).toBe(3);
|
||||
});
|
||||
it('should count non-overlapping occurrences', () => {
|
||||
expect(countOccurrences('aaaaa', 'aa')).toBe(2);
|
||||
expect(countOccurrences('ababab', 'aba')).toBe(1);
|
||||
});
|
||||
it('should correctly count occurrences when substring is longer', () => {
|
||||
expect(countOccurrences('abc', 'abcdef')).toBe(0);
|
||||
});
|
||||
it('should be case-sensitive', () => {
|
||||
expect(countOccurrences('abcABC', 'a')).toBe(1);
|
||||
expect(countOccurrences('abcABC', 'A')).toBe(1);
|
||||
});
|
||||
});
|
||||
|
||||
describe('unescapeStringForGeminiBug', () => {
|
||||
it('should unescape common sequences', () => {
|
||||
expect(unescapeStringForGeminiBug('\\n')).toBe('\n');
|
||||
@@ -156,542 +95,6 @@ describe('editCorrector', () => {
|
||||
});
|
||||
});
|
||||
|
||||
describe('ensureCorrectEdit', () => {
|
||||
let mockGeminiClientInstance: Mocked<GeminiClient>;
|
||||
let mockBaseLlmClientInstance: Mocked<BaseLlmClient>;
|
||||
let mockToolRegistry: Mocked<ToolRegistry>;
|
||||
let mockConfigInstance: Config;
|
||||
const abortSignal = new AbortController().signal;
|
||||
|
||||
beforeEach(() => {
|
||||
mockToolRegistry = new ToolRegistry(
|
||||
{} as Config,
|
||||
{} as any,
|
||||
) as Mocked<ToolRegistry>;
|
||||
const configParams = {
|
||||
apiKey: 'test-api-key',
|
||||
model: 'test-model',
|
||||
sandbox: false as boolean | string,
|
||||
targetDir: '/test',
|
||||
debugMode: false,
|
||||
question: undefined as string | undefined,
|
||||
|
||||
coreTools: undefined as string[] | undefined,
|
||||
toolDiscoveryCommand: undefined as string | undefined,
|
||||
toolCallCommand: undefined as string | undefined,
|
||||
mcpServerCommand: undefined as string | undefined,
|
||||
mcpServers: undefined as Record<string, any> | undefined,
|
||||
userAgent: 'test-agent',
|
||||
userMemory: '',
|
||||
geminiMdFileCount: 0,
|
||||
alwaysSkipModificationConfirmation: false,
|
||||
};
|
||||
mockConfigInstance = {
|
||||
...configParams,
|
||||
getApiKey: vi.fn(() => configParams.apiKey),
|
||||
getModel: vi.fn(() => configParams.model),
|
||||
getSandbox: vi.fn(() => configParams.sandbox),
|
||||
getTargetDir: vi.fn(() => configParams.targetDir),
|
||||
getToolRegistry: vi.fn(() => mockToolRegistry),
|
||||
getDebugMode: vi.fn(() => configParams.debugMode),
|
||||
getQuestion: vi.fn(() => configParams.question),
|
||||
|
||||
getCoreTools: vi.fn(() => configParams.coreTools),
|
||||
getToolDiscoveryCommand: vi.fn(() => configParams.toolDiscoveryCommand),
|
||||
getToolCallCommand: vi.fn(() => configParams.toolCallCommand),
|
||||
getMcpServerCommand: vi.fn(() => configParams.mcpServerCommand),
|
||||
getMcpServers: vi.fn(() => configParams.mcpServers),
|
||||
getUserAgent: vi.fn(() => configParams.userAgent),
|
||||
getUserMemory: vi.fn(() => configParams.userMemory),
|
||||
setUserMemory: vi.fn((mem: string) => {
|
||||
configParams.userMemory = mem;
|
||||
}),
|
||||
getGeminiMdFileCount: vi.fn(() => configParams.geminiMdFileCount),
|
||||
setGeminiMdFileCount: vi.fn((count: number) => {
|
||||
configParams.geminiMdFileCount = count;
|
||||
}),
|
||||
getAlwaysSkipModificationConfirmation: vi.fn(
|
||||
() => configParams.alwaysSkipModificationConfirmation,
|
||||
),
|
||||
setAlwaysSkipModificationConfirmation: vi.fn((skip: boolean) => {
|
||||
configParams.alwaysSkipModificationConfirmation = skip;
|
||||
}),
|
||||
getQuotaErrorOccurred: vi.fn().mockReturnValue(false),
|
||||
setQuotaErrorOccurred: vi.fn(),
|
||||
} as unknown as Config;
|
||||
|
||||
callCount = 0;
|
||||
mockResponses.length = 0;
|
||||
mockGenerateJson = vi
|
||||
.fn()
|
||||
.mockImplementation((_contents, _schema, signal) => {
|
||||
// Check if the signal is aborted. If so, throw an error or return a specific response.
|
||||
if (signal && signal.aborted) {
|
||||
return Promise.reject(new Error('Aborted')); // Or some other specific error/response
|
||||
}
|
||||
const response = mockResponses[callCount];
|
||||
callCount++;
|
||||
if (response === undefined) return Promise.resolve({});
|
||||
return Promise.resolve(response);
|
||||
});
|
||||
mockStartChat = vi.fn();
|
||||
mockSendMessageStream = vi.fn();
|
||||
|
||||
mockGeminiClientInstance = new GeminiClient(
|
||||
mockConfigInstance,
|
||||
) as Mocked<GeminiClient>;
|
||||
mockGeminiClientInstance.getHistory = vi.fn().mockReturnValue([]);
|
||||
mockBaseLlmClientInstance = {
|
||||
generateJson: mockGenerateJson,
|
||||
config: {
|
||||
generationConfigService: {
|
||||
getResolvedConfig: vi.fn().mockReturnValue({
|
||||
model: 'edit-corrector',
|
||||
generateContentConfig: {},
|
||||
}),
|
||||
},
|
||||
},
|
||||
} as unknown as Mocked<BaseLlmClient>;
|
||||
resetEditCorrectorCaches_TEST_ONLY();
|
||||
});
|
||||
|
||||
describe('Scenario Group 1: originalParams.old_string matches currentContent directly', () => {
|
||||
it('Test 1.1: old_string (no literal \\), new_string (escaped by Gemini) -> new_string unescaped', async () => {
|
||||
const currentContent = 'This is a test string to find me.';
|
||||
const originalParams = {
|
||||
file_path: '/test/file.txt',
|
||||
old_string: 'find me',
|
||||
new_string: 'replace with \\"this\\"',
|
||||
};
|
||||
mockResponses.push({
|
||||
corrected_new_string_escaping: 'replace with "this"',
|
||||
});
|
||||
const result = await ensureCorrectEdit(
|
||||
'/test/file.txt',
|
||||
currentContent,
|
||||
originalParams,
|
||||
mockGeminiClientInstance,
|
||||
mockBaseLlmClientInstance,
|
||||
abortSignal,
|
||||
false,
|
||||
);
|
||||
expect(mockGenerateJson).toHaveBeenCalledTimes(1);
|
||||
expect(result.params.new_string).toBe('replace with "this"');
|
||||
expect(result.params.old_string).toBe('find me');
|
||||
expect(result.occurrences).toBe(1);
|
||||
});
|
||||
it('Test 1.2: old_string (no literal \\), new_string (correctly formatted) -> new_string unchanged', async () => {
|
||||
const currentContent = 'This is a test string to find me.';
|
||||
const originalParams = {
|
||||
file_path: '/test/file.txt',
|
||||
old_string: 'find me',
|
||||
new_string: 'replace with this',
|
||||
};
|
||||
const result = await ensureCorrectEdit(
|
||||
'/test/file.txt',
|
||||
currentContent,
|
||||
originalParams,
|
||||
mockGeminiClientInstance,
|
||||
mockBaseLlmClientInstance,
|
||||
abortSignal,
|
||||
false,
|
||||
);
|
||||
expect(mockGenerateJson).toHaveBeenCalledTimes(0);
|
||||
expect(result.params.new_string).toBe('replace with this');
|
||||
expect(result.params.old_string).toBe('find me');
|
||||
expect(result.occurrences).toBe(1);
|
||||
});
|
||||
it('Test 1.3: old_string (with literal \\), new_string (escaped by Gemini) -> new_string unchanged (still escaped)', async () => {
|
||||
const currentContent = 'This is a test string to find\\me.';
|
||||
const originalParams = {
|
||||
file_path: '/test/file.txt',
|
||||
old_string: 'find\\me',
|
||||
new_string: 'replace with \\"this\\"',
|
||||
};
|
||||
mockResponses.push({
|
||||
corrected_new_string_escaping: 'replace with "this"',
|
||||
});
|
||||
const result = await ensureCorrectEdit(
|
||||
'/test/file.txt',
|
||||
currentContent,
|
||||
originalParams,
|
||||
mockGeminiClientInstance,
|
||||
mockBaseLlmClientInstance,
|
||||
abortSignal,
|
||||
false,
|
||||
);
|
||||
expect(mockGenerateJson).toHaveBeenCalledTimes(1);
|
||||
expect(result.params.new_string).toBe('replace with "this"');
|
||||
expect(result.params.old_string).toBe('find\\me');
|
||||
expect(result.occurrences).toBe(1);
|
||||
});
|
||||
it('Test 1.4: old_string (with literal \\), new_string (correctly formatted) -> new_string unchanged', async () => {
|
||||
const currentContent = 'This is a test string to find\\me.';
|
||||
const originalParams = {
|
||||
file_path: '/test/file.txt',
|
||||
old_string: 'find\\me',
|
||||
new_string: 'replace with this',
|
||||
};
|
||||
const result = await ensureCorrectEdit(
|
||||
'/test/file.txt',
|
||||
currentContent,
|
||||
originalParams,
|
||||
mockGeminiClientInstance,
|
||||
mockBaseLlmClientInstance,
|
||||
abortSignal,
|
||||
false,
|
||||
);
|
||||
expect(mockGenerateJson).toHaveBeenCalledTimes(0);
|
||||
expect(result.params.new_string).toBe('replace with this');
|
||||
expect(result.params.old_string).toBe('find\\me');
|
||||
expect(result.occurrences).toBe(1);
|
||||
});
|
||||
});
|
||||
|
||||
describe('Scenario Group 2: originalParams.old_string does NOT match, but unescapeStringForGeminiBug(originalParams.old_string) DOES match', () => {
|
||||
it('Test 2.1: old_string (over-escaped, no intended literal \\), new_string (escaped by Gemini) -> new_string unescaped', async () => {
|
||||
const currentContent = 'This is a test string to find "me".';
|
||||
const originalParams = {
|
||||
file_path: '/test/file.txt',
|
||||
old_string: 'find \\"me\\"',
|
||||
new_string: 'replace with \\"this\\"',
|
||||
};
|
||||
mockResponses.push({ corrected_new_string: 'replace with "this"' });
|
||||
const result = await ensureCorrectEdit(
|
||||
'/test/file.txt',
|
||||
currentContent,
|
||||
originalParams,
|
||||
mockGeminiClientInstance,
|
||||
mockBaseLlmClientInstance,
|
||||
abortSignal,
|
||||
false,
|
||||
);
|
||||
expect(mockGenerateJson).toHaveBeenCalledTimes(1);
|
||||
expect(result.params.new_string).toBe('replace with "this"');
|
||||
expect(result.params.old_string).toBe('find "me"');
|
||||
expect(result.occurrences).toBe(1);
|
||||
});
|
||||
it('Test 2.2: old_string (over-escaped, no intended literal \\), new_string (correctly formatted) -> new_string unescaped (harmlessly)', async () => {
|
||||
const currentContent = 'This is a test string to find "me".';
|
||||
const originalParams = {
|
||||
file_path: '/test/file.txt',
|
||||
old_string: 'find \\"me\\"',
|
||||
new_string: 'replace with this',
|
||||
};
|
||||
const result = await ensureCorrectEdit(
|
||||
'/test/file.txt',
|
||||
currentContent,
|
||||
originalParams,
|
||||
mockGeminiClientInstance,
|
||||
mockBaseLlmClientInstance,
|
||||
abortSignal,
|
||||
false,
|
||||
);
|
||||
expect(mockGenerateJson).toHaveBeenCalledTimes(0);
|
||||
expect(result.params.new_string).toBe('replace with this');
|
||||
expect(result.params.old_string).toBe('find "me"');
|
||||
expect(result.occurrences).toBe(1);
|
||||
});
|
||||
it('Test 2.3: old_string (over-escaped, with intended literal \\), new_string (simple) -> new_string corrected', async () => {
|
||||
const currentContent = 'This is a test string to find \\me.';
|
||||
const originalParams = {
|
||||
file_path: '/test/file.txt',
|
||||
old_string: 'find \\\\me',
|
||||
new_string: 'replace with foobar',
|
||||
};
|
||||
const result = await ensureCorrectEdit(
|
||||
'/test/file.txt',
|
||||
currentContent,
|
||||
originalParams,
|
||||
mockGeminiClientInstance,
|
||||
mockBaseLlmClientInstance,
|
||||
abortSignal,
|
||||
false,
|
||||
);
|
||||
expect(mockGenerateJson).toHaveBeenCalledTimes(0);
|
||||
expect(result.params.new_string).toBe('replace with foobar');
|
||||
expect(result.params.old_string).toBe('find \\me');
|
||||
expect(result.occurrences).toBe(1);
|
||||
});
|
||||
});
|
||||
|
||||
describe('Scenario Group 3: LLM Correction Path', () => {
|
||||
it('Test 3.1: old_string (no literal \\), new_string (escaped by Gemini), LLM re-escapes new_string -> final new_string is double unescaped', async () => {
|
||||
const currentContent = 'This is a test string to corrected find me.';
|
||||
const originalParams = {
|
||||
file_path: '/test/file.txt',
|
||||
old_string: 'find me',
|
||||
new_string: 'replace with \\\\"this\\\\"',
|
||||
};
|
||||
const llmNewString = 'LLM says replace with "that"';
|
||||
mockResponses.push({ corrected_new_string_escaping: llmNewString });
|
||||
const result = await ensureCorrectEdit(
|
||||
'/test/file.txt',
|
||||
currentContent,
|
||||
originalParams,
|
||||
mockGeminiClientInstance,
|
||||
mockBaseLlmClientInstance,
|
||||
abortSignal,
|
||||
false,
|
||||
);
|
||||
expect(mockGenerateJson).toHaveBeenCalledTimes(1);
|
||||
expect(result.params.new_string).toBe(llmNewString);
|
||||
expect(result.params.old_string).toBe('find me');
|
||||
expect(result.occurrences).toBe(1);
|
||||
});
|
||||
it('Test 3.2: old_string (with literal \\), new_string (escaped by Gemini), LLM re-escapes new_string -> final new_string is unescaped once', async () => {
|
||||
const currentContent = 'This is a test string to corrected find me.';
|
||||
const originalParams = {
|
||||
file_path: '/test/file.txt',
|
||||
old_string: 'find\\me',
|
||||
new_string: 'replace with \\\\"this\\\\"',
|
||||
};
|
||||
const llmCorrectedOldString = 'corrected find me';
|
||||
const llmNewString = 'LLM says replace with "that"';
|
||||
mockResponses.push({ corrected_target_snippet: llmCorrectedOldString });
|
||||
mockResponses.push({ corrected_new_string: llmNewString });
|
||||
const result = await ensureCorrectEdit(
|
||||
'/test/file.txt',
|
||||
currentContent,
|
||||
originalParams,
|
||||
mockGeminiClientInstance,
|
||||
mockBaseLlmClientInstance,
|
||||
abortSignal,
|
||||
false,
|
||||
);
|
||||
expect(mockGenerateJson).toHaveBeenCalledTimes(2);
|
||||
expect(result.params.new_string).toBe(llmNewString);
|
||||
expect(result.params.old_string).toBe(llmCorrectedOldString);
|
||||
expect(result.occurrences).toBe(1);
|
||||
});
|
||||
it('Test 3.3: old_string needs LLM, new_string is fine -> old_string corrected, new_string original', async () => {
|
||||
const currentContent = 'This is a test string to be corrected.';
|
||||
const originalParams = {
|
||||
file_path: '/test/file.txt',
|
||||
old_string: 'fiiind me',
|
||||
new_string: 'replace with "this"',
|
||||
};
|
||||
const llmCorrectedOldString = 'to be corrected';
|
||||
mockResponses.push({ corrected_target_snippet: llmCorrectedOldString });
|
||||
const result = await ensureCorrectEdit(
|
||||
'/test/file.txt',
|
||||
currentContent,
|
||||
originalParams,
|
||||
mockGeminiClientInstance,
|
||||
mockBaseLlmClientInstance,
|
||||
abortSignal,
|
||||
false,
|
||||
);
|
||||
expect(mockGenerateJson).toHaveBeenCalledTimes(1);
|
||||
expect(result.params.new_string).toBe('replace with "this"');
|
||||
expect(result.params.old_string).toBe(llmCorrectedOldString);
|
||||
expect(result.occurrences).toBe(1);
|
||||
});
|
||||
it('Test 3.4: LLM correction path, correctNewString returns the originalNewString it was passed (which was unescaped) -> final new_string is unescaped', async () => {
|
||||
const currentContent = 'This is a test string to corrected find me.';
|
||||
const originalParams = {
|
||||
file_path: '/test/file.txt',
|
||||
old_string: 'find me',
|
||||
new_string: 'replace with \\\\"this\\\\"',
|
||||
};
|
||||
const newStringForLLMAndReturnedByLLM = 'replace with "this"';
|
||||
mockResponses.push({
|
||||
corrected_new_string_escaping: newStringForLLMAndReturnedByLLM,
|
||||
});
|
||||
const result = await ensureCorrectEdit(
|
||||
'/test/file.txt',
|
||||
currentContent,
|
||||
originalParams,
|
||||
mockGeminiClientInstance,
|
||||
mockBaseLlmClientInstance,
|
||||
abortSignal,
|
||||
false,
|
||||
);
|
||||
expect(mockGenerateJson).toHaveBeenCalledTimes(1);
|
||||
expect(result.params.new_string).toBe(newStringForLLMAndReturnedByLLM);
|
||||
expect(result.occurrences).toBe(1);
|
||||
});
|
||||
});
|
||||
|
||||
describe('Scenario Group 4: No Match Found / Multiple Matches', () => {
|
||||
it('Test 4.1: No version of old_string (original, unescaped, LLM-corrected) matches -> returns original params, 0 occurrences', async () => {
|
||||
const currentContent = 'This content has nothing to find.';
|
||||
const originalParams = {
|
||||
file_path: '/test/file.txt',
|
||||
old_string: 'nonexistent string',
|
||||
new_string: 'some new string',
|
||||
};
|
||||
mockResponses.push({ corrected_target_snippet: 'still nonexistent' });
|
||||
const result = await ensureCorrectEdit(
|
||||
'/test/file.txt',
|
||||
currentContent,
|
||||
originalParams,
|
||||
mockGeminiClientInstance,
|
||||
mockBaseLlmClientInstance,
|
||||
abortSignal,
|
||||
false,
|
||||
);
|
||||
expect(mockGenerateJson).toHaveBeenCalledTimes(1);
|
||||
expect(result.params).toEqual(originalParams);
|
||||
expect(result.occurrences).toBe(0);
|
||||
});
|
||||
it('Test 4.2: unescapedOldStringAttempt results in >1 occurrences -> returns original params, count occurrences', async () => {
|
||||
const currentContent =
|
||||
'This content has find "me" and also find "me" again.';
|
||||
const originalParams = {
|
||||
file_path: '/test/file.txt',
|
||||
old_string: 'find "me"',
|
||||
new_string: 'some new string',
|
||||
};
|
||||
const result = await ensureCorrectEdit(
|
||||
'/test/file.txt',
|
||||
currentContent,
|
||||
originalParams,
|
||||
mockGeminiClientInstance,
|
||||
mockBaseLlmClientInstance,
|
||||
abortSignal,
|
||||
false,
|
||||
);
|
||||
expect(mockGenerateJson).toHaveBeenCalledTimes(0);
|
||||
expect(result.params).toEqual(originalParams);
|
||||
expect(result.occurrences).toBe(2);
|
||||
});
|
||||
});
|
||||
|
||||
describe('Scenario Group 5: Specific unescapeStringForGeminiBug checks (integrated into ensureCorrectEdit)', () => {
|
||||
it('Test 5.1: old_string needs LLM to become currentContent, new_string also needs correction', async () => {
|
||||
const currentContent = 'const x = "a\nbc\\"def\\"';
|
||||
const originalParams = {
|
||||
file_path: '/test/file.txt',
|
||||
old_string: 'const x = \\"a\\nbc\\\\"def\\\\"',
|
||||
new_string: 'const y = \\"new\\nval\\\\"content\\\\"',
|
||||
};
|
||||
const expectedFinalNewString = 'const y = "new\nval\\"content\\"';
|
||||
mockResponses.push({ corrected_target_snippet: currentContent });
|
||||
mockResponses.push({ corrected_new_string: expectedFinalNewString });
|
||||
const result = await ensureCorrectEdit(
|
||||
'/test/file.txt',
|
||||
currentContent,
|
||||
originalParams,
|
||||
mockGeminiClientInstance,
|
||||
mockBaseLlmClientInstance,
|
||||
abortSignal,
|
||||
false,
|
||||
);
|
||||
expect(mockGenerateJson).toHaveBeenCalledTimes(2);
|
||||
expect(result.params.old_string).toBe(currentContent);
|
||||
expect(result.params.new_string).toBe(expectedFinalNewString);
|
||||
expect(result.occurrences).toBe(1);
|
||||
});
|
||||
});
|
||||
|
||||
describe('Scenario Group 6: Concurrent Edits', () => {
|
||||
it('Test 6.1: should return early if file was modified by another process', async () => {
|
||||
const filePath = '/test/file.txt';
|
||||
const currentContent =
|
||||
'This content has been modified by someone else.';
|
||||
const originalParams = {
|
||||
file_path: filePath,
|
||||
old_string: 'nonexistent string',
|
||||
new_string: 'some new string',
|
||||
};
|
||||
|
||||
const now = Date.now();
|
||||
const lastEditTime = now - 5000; // 5 seconds ago
|
||||
|
||||
// Mock the file's modification time to be recent
|
||||
vi.spyOn(fs, 'statSync').mockReturnValue({
|
||||
mtimeMs: now,
|
||||
} as fs.Stats);
|
||||
|
||||
// Mock the last edit timestamp from our history to be in the past
|
||||
const history = [
|
||||
{
|
||||
role: 'model',
|
||||
parts: [
|
||||
{
|
||||
functionResponse: {
|
||||
name: EDIT_TOOL_NAME,
|
||||
id: `${EDIT_TOOL_NAME}-${lastEditTime}-123`,
|
||||
response: {
|
||||
output: {
|
||||
llmContent: `Successfully modified file: ${filePath}`,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
];
|
||||
(mockGeminiClientInstance.getHistory as Mock).mockReturnValue(history);
|
||||
|
||||
const result = await ensureCorrectEdit(
|
||||
filePath,
|
||||
currentContent,
|
||||
originalParams,
|
||||
mockGeminiClientInstance,
|
||||
mockBaseLlmClientInstance,
|
||||
abortSignal,
|
||||
false,
|
||||
);
|
||||
|
||||
expect(result.occurrences).toBe(0);
|
||||
expect(result.params).toEqual(originalParams);
|
||||
});
|
||||
});
|
||||
|
||||
describe('Scenario Group 7: Trimming with Newline Preservation', () => {
|
||||
it('Test 7.1: should preserve trailing newlines in new_string when trimming is applied', async () => {
|
||||
const currentContent = ' find me'; // Matches old_string initially
|
||||
const originalParams = {
|
||||
file_path: '/test/file.txt',
|
||||
old_string: ' find me', // Matches, but has whitespace to trim
|
||||
new_string: ' replaced\n\n', // Needs trimming but preserve newlines
|
||||
};
|
||||
|
||||
const result = await ensureCorrectEdit(
|
||||
'/test/file.txt',
|
||||
currentContent,
|
||||
originalParams,
|
||||
mockGeminiClientInstance,
|
||||
mockBaseLlmClientInstance,
|
||||
abortSignal,
|
||||
false,
|
||||
);
|
||||
|
||||
// old_string should be trimmed to 'find me' because 'find me' also exists uniquely in ' find me'
|
||||
expect(result.params.old_string).toBe('find me');
|
||||
// new_string should be trimmed of spaces but keep ALL newlines
|
||||
expect(result.params.new_string).toBe('replaced\n\n');
|
||||
expect(result.occurrences).toBe(1);
|
||||
});
|
||||
|
||||
it('Test 7.2: should handle trailing newlines separated by spaces (regression fix)', async () => {
|
||||
const currentContent = 'find me '; // Matches old_string initially
|
||||
const originalParams = {
|
||||
file_path: '/test/file.txt',
|
||||
old_string: 'find me ', // Trailing space
|
||||
new_string: 'replaced \n \n', // Trailing newlines with spaces
|
||||
};
|
||||
|
||||
const result = await ensureCorrectEdit(
|
||||
'/test/file.txt',
|
||||
currentContent,
|
||||
originalParams,
|
||||
mockGeminiClientInstance,
|
||||
mockBaseLlmClientInstance,
|
||||
abortSignal,
|
||||
false,
|
||||
);
|
||||
|
||||
expect(result.params.old_string).toBe('find me');
|
||||
// Should capture both newlines and join them, stripping the space between
|
||||
expect(result.params.new_string).toBe('replaced\n\n');
|
||||
expect(result.occurrences).toBe(1);
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe('ensureCorrectFileContent', () => {
|
||||
let mockBaseLlmClientInstance: Mocked<BaseLlmClient>;
|
||||
const abortSignal = new AbortController().signal;
|
||||
@@ -811,5 +214,37 @@ describe('editCorrector', () => {
|
||||
|
||||
expect(result).toBe(correctedContent);
|
||||
});
|
||||
|
||||
it('should return unescaped content when LLM is disabled and aggressiveUnescape is true', async () => {
|
||||
const content = 'LaTeX command \\\\title{Example}';
|
||||
// unescapeStringForGeminiBug would change \\\\title to \title (literal tab and "itle")
|
||||
const expected = 'LaTeX command \title{Example}';
|
||||
|
||||
const result = await ensureCorrectFileContent(
|
||||
content,
|
||||
mockBaseLlmClientInstance,
|
||||
abortSignal,
|
||||
true, // disableLLMCorrection
|
||||
true, // aggressiveUnescape
|
||||
);
|
||||
|
||||
expect(result).toBe(expected);
|
||||
expect(mockGenerateJson).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('should return original content when LLM is disabled and aggressiveUnescape is false', async () => {
|
||||
const content = 'LaTeX command \\\\title{Example}';
|
||||
|
||||
const result = await ensureCorrectFileContent(
|
||||
content,
|
||||
mockBaseLlmClientInstance,
|
||||
abortSignal,
|
||||
true, // disableLLMCorrection
|
||||
false, // aggressiveUnescape
|
||||
);
|
||||
|
||||
expect(result).toBe(content);
|
||||
expect(mockGenerateJson).not.toHaveBeenCalled();
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
@@ -5,21 +5,7 @@
|
||||
*/
|
||||
|
||||
import type { Content } from '@google/genai';
|
||||
import type { GeminiClient } from '../core/client.js';
|
||||
import type { BaseLlmClient } from '../core/baseLlmClient.js';
|
||||
import type { EditToolParams } from '../tools/edit.js';
|
||||
import {
|
||||
EDIT_TOOL_NAME,
|
||||
GREP_TOOL_NAME,
|
||||
READ_FILE_TOOL_NAME,
|
||||
READ_MANY_FILES_TOOL_NAME,
|
||||
WRITE_FILE_TOOL_NAME,
|
||||
} from '../tools/tool-names.js';
|
||||
import {
|
||||
isFunctionResponse,
|
||||
isFunctionCall,
|
||||
} from '../utils/messageInspectors.js';
|
||||
import * as fs from 'node:fs';
|
||||
import { promptIdContext } from './promptIdContext.js';
|
||||
import { debugLogger } from './debugLogger.js';
|
||||
import { LRUCache } from 'mnemonist';
|
||||
@@ -39,336 +25,34 @@ function getPromptId(): string {
|
||||
|
||||
const MAX_CACHE_SIZE = 50;
|
||||
|
||||
// Cache for ensureCorrectEdit results
|
||||
const editCorrectionCache = new LRUCache<string, CorrectedEditResult>(
|
||||
MAX_CACHE_SIZE,
|
||||
);
|
||||
|
||||
// Cache for ensureCorrectFileContent results
|
||||
const fileContentCorrectionCache = new LRUCache<string, string>(MAX_CACHE_SIZE);
|
||||
|
||||
/**
|
||||
* Defines the structure of the parameters within CorrectedEditResult
|
||||
*/
|
||||
interface CorrectedEditParams {
|
||||
file_path: string;
|
||||
old_string: string;
|
||||
new_string: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Defines the result structure for ensureCorrectEdit.
|
||||
*/
|
||||
export interface CorrectedEditResult {
|
||||
params: CorrectedEditParams;
|
||||
occurrences: number;
|
||||
}
|
||||
|
||||
/**
|
||||
* Extracts the timestamp from the .id value, which is in format
|
||||
* <tool.name>-<timestamp>-<uuid>
|
||||
* @param fcnId the ID value of a functionCall or functionResponse object
|
||||
* @returns -1 if the timestamp could not be extracted, else the timestamp (as a number)
|
||||
*/
|
||||
function getTimestampFromFunctionId(fcnId: string): number {
|
||||
const idParts = fcnId.split('-');
|
||||
if (idParts.length > 2) {
|
||||
const timestamp = parseInt(idParts[1], 10);
|
||||
if (!isNaN(timestamp)) {
|
||||
return timestamp;
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
/**
|
||||
* Will look through the gemini client history and determine when the most recent
|
||||
* edit to a target file occurred. If no edit happened, it will return -1
|
||||
* @param filePath the path to the file
|
||||
* @param client the geminiClient, so that we can get the history
|
||||
* @returns a DateTime (as a number) of when the last edit occurred, or -1 if no edit was found.
|
||||
*/
|
||||
async function findLastEditTimestamp(
|
||||
filePath: string,
|
||||
client: GeminiClient,
|
||||
): Promise<number> {
|
||||
const history = client.getHistory() ?? [];
|
||||
|
||||
// Tools that may reference the file path in their FunctionResponse `output`.
|
||||
const toolsInResp = new Set([
|
||||
WRITE_FILE_TOOL_NAME,
|
||||
EDIT_TOOL_NAME,
|
||||
READ_MANY_FILES_TOOL_NAME,
|
||||
GREP_TOOL_NAME,
|
||||
]);
|
||||
// Tools that may reference the file path in their FunctionCall `args`.
|
||||
const toolsInCall = new Set([...toolsInResp, READ_FILE_TOOL_NAME]);
|
||||
|
||||
// Iterate backwards to find the most recent relevant action.
|
||||
for (const entry of history.slice().reverse()) {
|
||||
if (!entry.parts) continue;
|
||||
|
||||
for (const part of entry.parts) {
|
||||
let id: string | undefined;
|
||||
let content: unknown;
|
||||
|
||||
// Check for a relevant FunctionCall with the file path in its arguments.
|
||||
if (
|
||||
isFunctionCall(entry) &&
|
||||
part.functionCall?.name &&
|
||||
toolsInCall.has(part.functionCall.name)
|
||||
) {
|
||||
id = part.functionCall.id;
|
||||
content = part.functionCall.args;
|
||||
}
|
||||
// Check for a relevant FunctionResponse with the file path in its output.
|
||||
else if (
|
||||
isFunctionResponse(entry) &&
|
||||
part.functionResponse?.name &&
|
||||
toolsInResp.has(part.functionResponse.name)
|
||||
) {
|
||||
const { response } = part.functionResponse;
|
||||
if (response && !('error' in response) && 'output' in response) {
|
||||
id = part.functionResponse.id;
|
||||
content = response['output'];
|
||||
}
|
||||
}
|
||||
|
||||
if (!id || content === undefined) continue;
|
||||
|
||||
// Use the "blunt hammer" approach to find the file path in the content.
|
||||
// Note that the tool response data is inconsistent in their formatting
|
||||
// with successes and errors - so, we just check for the existence
|
||||
// as the best guess to if error/failed occurred with the response.
|
||||
const stringified = JSON.stringify(content);
|
||||
if (
|
||||
!stringified.includes('Error') && // only applicable for functionResponse
|
||||
!stringified.includes('Failed') && // only applicable for functionResponse
|
||||
stringified.includes(filePath)
|
||||
) {
|
||||
return getTimestampFromFunctionId(id);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
/**
|
||||
* Attempts to correct edit parameters if the original old_string is not found.
|
||||
* It tries unescaping, and then LLM-based correction.
|
||||
* Results are cached to avoid redundant processing.
|
||||
*
|
||||
* @param currentContent The current content of the file.
|
||||
* @param originalParams The original EditToolParams
|
||||
* @param client The GeminiClient for LLM calls.
|
||||
* @returns A promise resolving to an object containing the (potentially corrected)
|
||||
* EditToolParams (as CorrectedEditParams) and the final occurrences count.
|
||||
*/
|
||||
export async function ensureCorrectEdit(
|
||||
filePath: string,
|
||||
currentContent: string,
|
||||
originalParams: EditToolParams, // This is the EditToolParams from edit.ts, without 'corrected'
|
||||
geminiClient: GeminiClient,
|
||||
baseLlmClient: BaseLlmClient,
|
||||
abortSignal: AbortSignal,
|
||||
disableLLMCorrection: boolean,
|
||||
): Promise<CorrectedEditResult> {
|
||||
const cacheKey = `${currentContent}---${originalParams.old_string}---${originalParams.new_string}`;
|
||||
const cachedResult = editCorrectionCache.get(cacheKey);
|
||||
if (cachedResult) {
|
||||
return cachedResult;
|
||||
}
|
||||
|
||||
let finalNewString = originalParams.new_string;
|
||||
const newStringPotentiallyEscaped =
|
||||
unescapeStringForGeminiBug(originalParams.new_string) !==
|
||||
originalParams.new_string;
|
||||
|
||||
const allowMultiple = originalParams.allow_multiple ?? false;
|
||||
|
||||
let finalOldString = originalParams.old_string;
|
||||
let occurrences = countOccurrences(currentContent, finalOldString);
|
||||
|
||||
const isOccurrencesMatch = allowMultiple
|
||||
? occurrences > 0
|
||||
: occurrences === 1;
|
||||
|
||||
if (isOccurrencesMatch) {
|
||||
if (newStringPotentiallyEscaped && !disableLLMCorrection) {
|
||||
finalNewString = await correctNewStringEscaping(
|
||||
baseLlmClient,
|
||||
finalOldString,
|
||||
originalParams.new_string,
|
||||
abortSignal,
|
||||
);
|
||||
}
|
||||
} else if (occurrences > 1 && !allowMultiple) {
|
||||
// If user doesn't allow multiple but found multiple, return as-is (will fail validation later)
|
||||
const result: CorrectedEditResult = {
|
||||
params: { ...originalParams },
|
||||
occurrences,
|
||||
};
|
||||
editCorrectionCache.set(cacheKey, result);
|
||||
return result;
|
||||
} else {
|
||||
// occurrences is 0 or some other unexpected state initially
|
||||
const unescapedOldStringAttempt = unescapeStringForGeminiBug(
|
||||
originalParams.old_string,
|
||||
);
|
||||
occurrences = countOccurrences(currentContent, unescapedOldStringAttempt);
|
||||
|
||||
const isUnescapedOccurrencesMatch = allowMultiple
|
||||
? occurrences > 0
|
||||
: occurrences === 1;
|
||||
|
||||
if (isUnescapedOccurrencesMatch) {
|
||||
finalOldString = unescapedOldStringAttempt;
|
||||
if (newStringPotentiallyEscaped && !disableLLMCorrection) {
|
||||
finalNewString = await correctNewString(
|
||||
baseLlmClient,
|
||||
originalParams.old_string, // original old
|
||||
unescapedOldStringAttempt, // corrected old
|
||||
originalParams.new_string, // original new (which is potentially escaped)
|
||||
abortSignal,
|
||||
);
|
||||
}
|
||||
} else if (occurrences === 0) {
|
||||
if (filePath) {
|
||||
// In order to keep from clobbering edits made outside our system,
|
||||
// let's check if there was a more recent edit to the file than what
|
||||
// our system has done
|
||||
const lastEditedByUsTime = await findLastEditTimestamp(
|
||||
filePath,
|
||||
geminiClient,
|
||||
);
|
||||
|
||||
// Add a 1-second buffer to account for timing inaccuracies. If the file
|
||||
// was modified more than a second after the last edit tool was run, we
|
||||
// can assume it was modified by something else.
|
||||
if (lastEditedByUsTime > 0) {
|
||||
const stats = fs.statSync(filePath);
|
||||
const diff = stats.mtimeMs - lastEditedByUsTime;
|
||||
if (diff > 2000) {
|
||||
// Hard coded for 2 seconds
|
||||
// This file was edited sooner
|
||||
const result: CorrectedEditResult = {
|
||||
params: { ...originalParams },
|
||||
occurrences: 0, // Explicitly 0 as LLM failed
|
||||
};
|
||||
editCorrectionCache.set(cacheKey, result);
|
||||
return result;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (disableLLMCorrection) {
|
||||
const result: CorrectedEditResult = {
|
||||
params: { ...originalParams },
|
||||
occurrences: 0,
|
||||
};
|
||||
editCorrectionCache.set(cacheKey, result);
|
||||
return result;
|
||||
}
|
||||
|
||||
const llmCorrectedOldString = await correctOldStringMismatch(
|
||||
baseLlmClient,
|
||||
currentContent,
|
||||
unescapedOldStringAttempt,
|
||||
abortSignal,
|
||||
);
|
||||
const llmOldOccurrences = countOccurrences(
|
||||
currentContent,
|
||||
llmCorrectedOldString,
|
||||
);
|
||||
|
||||
const isLlmOccurrencesMatch = allowMultiple
|
||||
? llmOldOccurrences > 0
|
||||
: llmOldOccurrences === 1;
|
||||
|
||||
if (isLlmOccurrencesMatch) {
|
||||
finalOldString = llmCorrectedOldString;
|
||||
occurrences = llmOldOccurrences;
|
||||
|
||||
if (newStringPotentiallyEscaped) {
|
||||
const baseNewStringForLLMCorrection = unescapeStringForGeminiBug(
|
||||
originalParams.new_string,
|
||||
);
|
||||
finalNewString = await correctNewString(
|
||||
baseLlmClient,
|
||||
originalParams.old_string, // original old
|
||||
llmCorrectedOldString, // corrected old
|
||||
baseNewStringForLLMCorrection, // base new for correction
|
||||
abortSignal,
|
||||
);
|
||||
}
|
||||
} else {
|
||||
// LLM correction also failed for old_string
|
||||
const result: CorrectedEditResult = {
|
||||
params: { ...originalParams },
|
||||
occurrences: 0, // Explicitly 0 as LLM failed
|
||||
};
|
||||
editCorrectionCache.set(cacheKey, result);
|
||||
return result;
|
||||
}
|
||||
} else {
|
||||
// Unescaping old_string resulted in > 1 occurrence but not allowMultiple
|
||||
const result: CorrectedEditResult = {
|
||||
params: { ...originalParams },
|
||||
occurrences, // This will be > 1
|
||||
};
|
||||
editCorrectionCache.set(cacheKey, result);
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
const { targetString, pair } = trimPairIfPossible(
|
||||
finalOldString,
|
||||
finalNewString,
|
||||
currentContent,
|
||||
allowMultiple,
|
||||
);
|
||||
finalOldString = targetString;
|
||||
finalNewString = pair;
|
||||
|
||||
// Final result construction
|
||||
const result: CorrectedEditResult = {
|
||||
params: {
|
||||
file_path: originalParams.file_path,
|
||||
old_string: finalOldString,
|
||||
new_string: finalNewString,
|
||||
},
|
||||
occurrences: countOccurrences(currentContent, finalOldString), // Recalculate occurrences with the final old_string
|
||||
};
|
||||
editCorrectionCache.set(cacheKey, result);
|
||||
return result;
|
||||
}
|
||||
|
||||
export async function ensureCorrectFileContent(
|
||||
content: string,
|
||||
baseLlmClient: BaseLlmClient,
|
||||
abortSignal: AbortSignal,
|
||||
disableLLMCorrection: boolean = true,
|
||||
aggressiveUnescape: boolean = false,
|
||||
): Promise<string> {
|
||||
const cachedResult = fileContentCorrectionCache.get(content);
|
||||
if (cachedResult) {
|
||||
return cachedResult;
|
||||
}
|
||||
|
||||
const contentPotentiallyEscaped =
|
||||
unescapeStringForGeminiBug(content) !== content;
|
||||
if (!contentPotentiallyEscaped) {
|
||||
const unescapedContent = unescapeStringForGeminiBug(content);
|
||||
if (unescapedContent === content) {
|
||||
fileContentCorrectionCache.set(content, content);
|
||||
return content;
|
||||
}
|
||||
|
||||
if (disableLLMCorrection) {
|
||||
// If we can't use LLM, we should at least use the unescaped content
|
||||
// as it's likely better than the original if it was detected as potentially escaped.
|
||||
// unescapeStringForGeminiBug is a heuristic, not an LLM call.
|
||||
const unescaped = unescapeStringForGeminiBug(content);
|
||||
fileContentCorrectionCache.set(content, unescaped);
|
||||
return unescaped;
|
||||
if (aggressiveUnescape) {
|
||||
fileContentCorrectionCache.set(content, unescapedContent);
|
||||
return unescapedContent;
|
||||
}
|
||||
fileContentCorrectionCache.set(content, content);
|
||||
return content;
|
||||
}
|
||||
|
||||
const correctedContent = await correctStringEscaping(
|
||||
@@ -380,242 +64,6 @@ export async function ensureCorrectFileContent(
|
||||
return correctedContent;
|
||||
}
|
||||
|
||||
// Define the expected JSON schema for the LLM response for old_string correction
|
||||
const OLD_STRING_CORRECTION_SCHEMA: Record<string, unknown> = {
|
||||
type: 'object',
|
||||
properties: {
|
||||
corrected_target_snippet: {
|
||||
type: 'string',
|
||||
description:
|
||||
'The corrected version of the target snippet that exactly and uniquely matches a segment within the provided file content.',
|
||||
},
|
||||
},
|
||||
required: ['corrected_target_snippet'],
|
||||
};
|
||||
|
||||
export async function correctOldStringMismatch(
|
||||
baseLlmClient: BaseLlmClient,
|
||||
fileContent: string,
|
||||
problematicSnippet: string,
|
||||
abortSignal: AbortSignal,
|
||||
): Promise<string> {
|
||||
const prompt = `
|
||||
Context: A process needs to find an exact literal, unique match for a specific text snippet within a file's content. The provided snippet failed to match exactly. This is most likely because it has been overly escaped.
|
||||
|
||||
Task: Analyze the provided file content and the problematic target snippet. Identify the segment in the file content that the snippet was *most likely* intended to match. Output the *exact*, literal text of that segment from the file content. Focus *only* on removing extra escape characters and correcting formatting, whitespace, or minor differences to achieve a PERFECT literal match. The output must be the exact literal text as it appears in the file.
|
||||
|
||||
Problematic target snippet:
|
||||
\`\`\`
|
||||
${problematicSnippet}
|
||||
\`\`\`
|
||||
|
||||
File Content:
|
||||
\`\`\`
|
||||
${fileContent}
|
||||
\`\`\`
|
||||
|
||||
For example, if the problematic target snippet was "\\\\\\nconst greeting = \`Hello \\\\\`\${name}\\\\\`\`;" and the file content had content that looked like "\nconst greeting = \`Hello ${'\\`'}\${name}${'\\`'}\`;", then corrected_target_snippet should likely be "\nconst greeting = \`Hello ${'\\`'}\${name}${'\\`'}\`;" to fix the incorrect escaping to match the original file content.
|
||||
If the differences are only in whitespace or formatting, apply similar whitespace/formatting changes to the corrected_target_snippet.
|
||||
|
||||
Return ONLY the corrected target snippet in the specified JSON format with the key 'corrected_target_snippet'. If no clear, unique match can be found, return an empty string for 'corrected_target_snippet'.
|
||||
`.trim();
|
||||
|
||||
const contents: Content[] = [{ role: 'user', parts: [{ text: prompt }] }];
|
||||
|
||||
try {
|
||||
const result = await baseLlmClient.generateJson({
|
||||
modelConfigKey: { model: 'edit-corrector' },
|
||||
contents,
|
||||
schema: OLD_STRING_CORRECTION_SCHEMA,
|
||||
abortSignal,
|
||||
systemInstruction: CODE_CORRECTION_SYSTEM_PROMPT,
|
||||
promptId: getPromptId(),
|
||||
role: LlmRole.UTILITY_EDIT_CORRECTOR,
|
||||
});
|
||||
|
||||
if (
|
||||
result &&
|
||||
typeof result['corrected_target_snippet'] === 'string' &&
|
||||
result['corrected_target_snippet'].length > 0
|
||||
) {
|
||||
return result['corrected_target_snippet'];
|
||||
} else {
|
||||
return problematicSnippet;
|
||||
}
|
||||
} catch (error) {
|
||||
if (abortSignal.aborted) {
|
||||
throw error;
|
||||
}
|
||||
|
||||
debugLogger.warn(
|
||||
'Error during LLM call for old string snippet correction:',
|
||||
error,
|
||||
);
|
||||
|
||||
return problematicSnippet;
|
||||
}
|
||||
}
|
||||
|
||||
// Define the expected JSON schema for the new_string correction LLM response
|
||||
const NEW_STRING_CORRECTION_SCHEMA: Record<string, unknown> = {
|
||||
type: 'object',
|
||||
properties: {
|
||||
corrected_new_string: {
|
||||
type: 'string',
|
||||
description:
|
||||
'The original_new_string adjusted to be a suitable replacement for the corrected_old_string, while maintaining the original intent of the change.',
|
||||
},
|
||||
},
|
||||
required: ['corrected_new_string'],
|
||||
};
|
||||
|
||||
/**
|
||||
* Adjusts the new_string to align with a corrected old_string, maintaining the original intent.
|
||||
*/
|
||||
export async function correctNewString(
|
||||
baseLlmClient: BaseLlmClient,
|
||||
originalOldString: string,
|
||||
correctedOldString: string,
|
||||
originalNewString: string,
|
||||
abortSignal: AbortSignal,
|
||||
): Promise<string> {
|
||||
if (originalOldString === correctedOldString) {
|
||||
return originalNewString;
|
||||
}
|
||||
|
||||
const prompt = `
|
||||
Context: A text replacement operation was planned. The original text to be replaced (original_old_string) was slightly different from the actual text in the file (corrected_old_string). The original_old_string has now been corrected to match the file content.
|
||||
We now need to adjust the replacement text (original_new_string) so that it makes sense as a replacement for the corrected_old_string, while preserving the original intent of the change.
|
||||
|
||||
original_old_string (what was initially intended to be found):
|
||||
\`\`\`
|
||||
${originalOldString}
|
||||
\`\`\`
|
||||
|
||||
corrected_old_string (what was actually found in the file and will be replaced):
|
||||
\`\`\`
|
||||
${correctedOldString}
|
||||
\`\`\`
|
||||
|
||||
original_new_string (what was intended to replace original_old_string):
|
||||
\`\`\`
|
||||
${originalNewString}
|
||||
\`\`\`
|
||||
|
||||
Task: Based on the differences between original_old_string and corrected_old_string, and the content of original_new_string, generate a corrected_new_string. This corrected_new_string should be what original_new_string would have been if it was designed to replace corrected_old_string directly, while maintaining the spirit of the original transformation.
|
||||
|
||||
For example, if original_old_string was "\\\\\\nconst greeting = \`Hello \\\\\`\${name}\\\\\`\`;" and corrected_old_string is "\nconst greeting = \`Hello ${'\\`'}\${name}${'\\`'}\`;", and original_new_string was "\\\\\\nconst greeting = \`Hello \\\\\`\${name} \${lastName}\\\\\`\`;", then corrected_new_string should likely be "\nconst greeting = \`Hello ${'\\`'}\${name} \${lastName}${'\\`'}\`;" to fix the incorrect escaping.
|
||||
If the differences are only in whitespace or formatting, apply similar whitespace/formatting changes to the corrected_new_string.
|
||||
|
||||
Return ONLY the corrected string in the specified JSON format with the key 'corrected_new_string'. If no adjustment is deemed necessary or possible, return the original_new_string.
|
||||
`.trim();
|
||||
|
||||
const contents: Content[] = [{ role: 'user', parts: [{ text: prompt }] }];
|
||||
|
||||
try {
|
||||
const result = await baseLlmClient.generateJson({
|
||||
modelConfigKey: { model: 'edit-corrector' },
|
||||
contents,
|
||||
schema: NEW_STRING_CORRECTION_SCHEMA,
|
||||
abortSignal,
|
||||
systemInstruction: CODE_CORRECTION_SYSTEM_PROMPT,
|
||||
promptId: getPromptId(),
|
||||
role: LlmRole.UTILITY_EDIT_CORRECTOR,
|
||||
});
|
||||
|
||||
if (
|
||||
result &&
|
||||
typeof result['corrected_new_string'] === 'string' &&
|
||||
result['corrected_new_string'].length > 0
|
||||
) {
|
||||
return result['corrected_new_string'];
|
||||
} else {
|
||||
return originalNewString;
|
||||
}
|
||||
} catch (error) {
|
||||
if (abortSignal.aborted) {
|
||||
throw error;
|
||||
}
|
||||
|
||||
debugLogger.warn('Error during LLM call for new_string correction:', error);
|
||||
return originalNewString;
|
||||
}
|
||||
}
|
||||
|
||||
const CORRECT_NEW_STRING_ESCAPING_SCHEMA: Record<string, unknown> = {
|
||||
type: 'object',
|
||||
properties: {
|
||||
corrected_new_string_escaping: {
|
||||
type: 'string',
|
||||
description:
|
||||
'The new_string with corrected escaping, ensuring it is a proper replacement for the old_string, especially considering potential over-escaping issues from previous LLM generations.',
|
||||
},
|
||||
},
|
||||
required: ['corrected_new_string_escaping'],
|
||||
};
|
||||
|
||||
export async function correctNewStringEscaping(
|
||||
baseLlmClient: BaseLlmClient,
|
||||
oldString: string,
|
||||
potentiallyProblematicNewString: string,
|
||||
abortSignal: AbortSignal,
|
||||
): Promise<string> {
|
||||
const prompt = `
|
||||
Context: A text replacement operation is planned. The text to be replaced (old_string) has been correctly identified in the file. However, the replacement text (new_string) might have been improperly escaped by a previous LLM generation (e.g. too many backslashes for newlines like \\n instead of \n, or unnecessarily quotes like \\"Hello\\" instead of "Hello").
|
||||
|
||||
old_string (this is the exact text that will be replaced):
|
||||
\`\`\`
|
||||
${oldString}
|
||||
\`\`\`
|
||||
|
||||
potentially_problematic_new_string (this is the text that should replace old_string, but MIGHT have bad escaping, or might be entirely correct):
|
||||
\`\`\`
|
||||
${potentiallyProblematicNewString}
|
||||
\`\`\`
|
||||
|
||||
Task: Analyze the potentially_problematic_new_string. If it's syntactically invalid due to incorrect escaping (e.g., "\n", "\t", "\\", "\\'", "\\""), correct the invalid syntax. The goal is to ensure the new_string, when inserted into the code, will be a valid and correctly interpreted.
|
||||
|
||||
For example, if old_string is "foo" and potentially_problematic_new_string is "bar\\nbaz", the corrected_new_string_escaping should be "bar\nbaz".
|
||||
If potentially_problematic_new_string is console.log(\\"Hello World\\"), it should be console.log("Hello World").
|
||||
|
||||
Return ONLY the corrected string in the specified JSON format with the key 'corrected_new_string_escaping'. If no escaping correction is needed, return the original potentially_problematic_new_string.
|
||||
`.trim();
|
||||
|
||||
const contents: Content[] = [{ role: 'user', parts: [{ text: prompt }] }];
|
||||
|
||||
try {
|
||||
const result = await baseLlmClient.generateJson({
|
||||
modelConfigKey: { model: 'edit-corrector' },
|
||||
contents,
|
||||
schema: CORRECT_NEW_STRING_ESCAPING_SCHEMA,
|
||||
abortSignal,
|
||||
systemInstruction: CODE_CORRECTION_SYSTEM_PROMPT,
|
||||
promptId: getPromptId(),
|
||||
role: LlmRole.UTILITY_EDIT_CORRECTOR,
|
||||
});
|
||||
|
||||
if (
|
||||
result &&
|
||||
typeof result['corrected_new_string_escaping'] === 'string' &&
|
||||
result['corrected_new_string_escaping'].length > 0
|
||||
) {
|
||||
return result['corrected_new_string_escaping'];
|
||||
} else {
|
||||
return potentiallyProblematicNewString;
|
||||
}
|
||||
} catch (error) {
|
||||
if (abortSignal.aborted) {
|
||||
throw error;
|
||||
}
|
||||
|
||||
debugLogger.warn(
|
||||
'Error during LLM call for new_string escaping correction:',
|
||||
error,
|
||||
);
|
||||
return potentiallyProblematicNewString;
|
||||
}
|
||||
}
|
||||
|
||||
const CORRECT_STRING_ESCAPING_SCHEMA: Record<string, unknown> = {
|
||||
type: 'object',
|
||||
properties: {
|
||||
@@ -684,46 +132,6 @@ Return ONLY the corrected string in the specified JSON format with the key 'corr
|
||||
}
|
||||
}
|
||||
|
||||
function trimPreservingTrailingNewline(str: string): string {
|
||||
const trimmedEnd = str.trimEnd();
|
||||
const trailingWhitespace = str.slice(trimmedEnd.length);
|
||||
const trailingNewlines = trailingWhitespace.replace(/[^\r\n]/g, '');
|
||||
return str.trim() + trailingNewlines;
|
||||
}
|
||||
|
||||
function trimPairIfPossible(
|
||||
target: string,
|
||||
trimIfTargetTrims: string,
|
||||
currentContent: string,
|
||||
allowMultiple: boolean,
|
||||
) {
|
||||
const trimmedTargetString = trimPreservingTrailingNewline(target);
|
||||
if (target.length !== trimmedTargetString.length) {
|
||||
const trimmedTargetOccurrences = countOccurrences(
|
||||
currentContent,
|
||||
trimmedTargetString,
|
||||
);
|
||||
|
||||
const isMatch = allowMultiple
|
||||
? trimmedTargetOccurrences > 0
|
||||
: trimmedTargetOccurrences === 1;
|
||||
|
||||
if (isMatch) {
|
||||
const trimmedReactiveString =
|
||||
trimPreservingTrailingNewline(trimIfTargetTrims);
|
||||
return {
|
||||
targetString: trimmedTargetString,
|
||||
pair: trimmedReactiveString,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
targetString: target,
|
||||
pair: trimIfTargetTrims,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Unescapes a string that might have been overly escaped by an LLM.
|
||||
*/
|
||||
@@ -770,23 +178,6 @@ export function unescapeStringForGeminiBug(inputString: string): string {
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Counts occurrences of a substring in a string
|
||||
*/
|
||||
export function countOccurrences(str: string, substr: string): number {
|
||||
if (substr === '') {
|
||||
return 0;
|
||||
}
|
||||
let count = 0;
|
||||
let pos = str.indexOf(substr);
|
||||
while (pos !== -1) {
|
||||
count++;
|
||||
pos = str.indexOf(substr, pos + substr.length); // Start search after the current match
|
||||
}
|
||||
return count;
|
||||
}
|
||||
|
||||
export function resetEditCorrectorCaches_TEST_ONLY() {
|
||||
editCorrectionCache.clear();
|
||||
fileContentCorrectionCache.clear();
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user