Fix and unskip flakey integration test in replace.test.ts (#11060)

This commit is contained in:
Tommaso Sciortino
2025-10-13 18:03:35 -07:00
committed by GitHub
parent dd01af609e
commit f56a561f02
2 changed files with 40 additions and 117 deletions

View File

@@ -4,62 +4,26 @@
* SPDX-License-Identifier: Apache-2.0
*/
import { describe, it, expect, vi } from 'vitest';
import { TestRig, printDebugInfo, validateModelOutput } from './test-helper.js';
import { describe, it, expect } from 'vitest';
import { TestRig } from './test-helper.js';
describe.skip('replace', () => {
describe('replace', () => {
it('should be able to replace content in a file', async () => {
const rig = new TestRig();
await rig.setup('should be able to replace content in a file');
const fileName = 'file_to_replace.txt';
const originalContent = 'original content';
const expectedContent = 'replaced content';
const originalContent = 'foo content';
const expectedContent = 'bar content';
rig.createFile(fileName, originalContent);
const prompt = `Can you replace 'original' with 'replaced' in the file 'file_to_replace.txt'`;
const result = await rig.run(prompt);
await rig.run(`Replace 'foo' with 'bar' in the file 'file_to_replace.txt'`);
const foundToolCall = await rig.waitForToolCall('replace');
// Add debugging information
if (!foundToolCall) {
printDebugInfo(rig, result);
}
expect(foundToolCall, 'Expected to find a replace tool call').toBeTruthy();
// Validate model output - will throw if no output, warn if missing expected content
validateModelOutput(
result,
['replaced', 'file_to_replace.txt'],
'Replace content test',
);
const newFileContent = rig.readFile(fileName);
// Add debugging for file content
if (newFileContent !== expectedContent) {
console.error('File content mismatch - Debug info:');
console.error('Expected:', expectedContent);
console.error('Actual:', newFileContent);
console.error(
'Tool calls:',
rig.readToolLogs().map((t) => ({
name: t.toolRequest.name,
args: t.toolRequest.args,
})),
);
}
expect(newFileContent).toBe(expectedContent);
// Log success info if verbose
vi.stubEnv('VERBOSE', 'true');
if (process.env['VERBOSE'] === 'true') {
console.log('File replaced successfully. New content:', newFileContent);
}
expect(rig.readFile(fileName)).toBe(expectedContent);
});
it('should handle $ literally when replacing text ending with $', async () => {
@@ -74,68 +38,45 @@ describe.skip('replace', () => {
rig.createFile(fileName, originalContent);
const prompt =
"Open regex.yml and append ' # updated' after the line containing ^[sv]d[a-z]$ without breaking the $ character.";
await rig.run(
"Open regex.yml and append ' # updated' after the line containing ^[sv]d[a-z]$ without breaking the $ character.",
);
const result = await rig.run(prompt);
const foundToolCall = await rig.waitForToolCall('replace');
if (!foundToolCall) {
printDebugInfo(rig, result);
}
expect(foundToolCall, 'Expected to find a replace tool call').toBeTruthy();
validateModelOutput(result, ['regex.yml'], 'Replace $ literal test');
const newFileContent = rig.readFile(fileName);
expect(newFileContent).toBe(expectedContent);
expect(rig.readFile(fileName)).toBe(expectedContent);
});
//TODO - https://github.com/google-gemini/gemini-cli/issues/10851
it.skip('should fail safely when old_string is not found', async () => {
it('should fail safely when old_string is not found', async () => {
const rig = new TestRig();
await rig.setup('should fail safely when old_string is not found');
await rig.setup('should fail safely when old_string is not found', {
settings: {
useSmartEdit: false,
maxToolCalls: 1,
},
});
const fileName = 'no_match.txt';
const fileContent = 'hello world';
rig.createFile(fileName, fileContent);
const prompt = `replace "goodbye" with "farewell" in ${fileName}`;
await rig.run(prompt);
await rig.run(
`Make one call to the replace tool to replace the text "goodbye" with "farewell" in ${fileName}.\n
* Do not read the file.
* Do not call any other tools.
* Do not call the replace tool more than once.
* After the first and only tool call, take no further action, even if the tool call fails.`,
);
await rig.waitForTelemetryReady();
const toolLogs = rig.readToolLogs();
const replaceAttempt = toolLogs.find(
(log) => log.toolRequest.name === 'replace',
);
const readAttempt = toolLogs.find(
(log) => log.toolRequest.name === 'read_file',
);
expect(toolLogs.length, 'Expected exactly one tool call').toBe(1);
expect(toolLogs[0].toolRequest.name).toBe('replace');
expect(toolLogs[0].toolRequest.success).toBe(false);
// VERIFY: The model must have at least tried to read the file or perform a replace.
expect(
readAttempt || replaceAttempt,
'Expected model to attempt a read_file or replace',
).toBeDefined();
// If the model tried to replace, that specific attempt must have failed.
if (replaceAttempt) {
if (replaceAttempt.toolRequest.success) {
console.error(
'The replace tool succeeded when it was expected to fail',
);
console.error('Tool call args:', replaceAttempt.toolRequest.args);
}
expect(
replaceAttempt.toolRequest.success,
'If replace is called, it must fail',
).toBe(false);
}
// CRITICAL: The final content of the file must be unchanged.
const newFileContent = rig.readFile(fileName);
expect(newFileContent).toBe(fileContent);
// Ensure file content is unchanged
expect(rig.readFile(fileName)).toBe(fileContent);
});
it('should insert a multi-line block of text', async () => {
@@ -148,20 +89,13 @@ describe.skip('replace', () => {
'Line A\nFirst line\nSecond line\nThird line\nLine C';
rig.createFile(fileName, originalContent);
const prompt = `In ${fileName}, replace "<INSERT_TEXT_HERE>" with:\n${newBlock}`;
const result = await rig.run(prompt);
const prompt = `In ${fileName}, replace "<INSERT_TEXT_HERE>" with:\n${newBlock}. Use unix style line endings.`;
await rig.run(prompt);
const foundToolCall = await rig.waitForToolCall('replace');
if (!foundToolCall) {
printDebugInfo(rig, result);
}
expect(foundToolCall, 'Expected to find a replace tool call').toBeTruthy();
const newFileContent = rig.readFile(fileName);
expect(newFileContent.replace(/\r\n/g, '\n')).toBe(
expectedContent.replace(/\r\n/g, '\n'),
);
expect(rig.readFile(fileName)).toBe(expectedContent);
});
it('should delete a block of text', async () => {
@@ -174,17 +108,13 @@ describe.skip('replace', () => {
const expectedContent = 'Hello\nWorld';
rig.createFile(fileName, originalContent);
const prompt = `In ${fileName}, delete the entire block from "## DELETE THIS ##" to "## END DELETE ##" including the markers.`;
const result = await rig.run(prompt);
await rig.run(
`In ${fileName}, delete the entire block from "## DELETE THIS ##" to "## END DELETE ##" including the markers.`,
);
const foundToolCall = await rig.waitForToolCall('replace');
if (!foundToolCall) {
printDebugInfo(rig, result);
}
expect(foundToolCall, 'Expected to find a replace tool call').toBeTruthy();
const newFileContent = rig.readFile(fileName);
expect(newFileContent).toBe(expectedContent);
expect(rig.readFile(fileName)).toBe(expectedContent);
});
});

View File

@@ -16,15 +16,7 @@ import { hideBin } from 'yargs/helpers';
* @param {string} command The command string to execute (e.g., 'npm run test:e2e -- --watch').
* @returns {Promise<number>} A Promise that resolves with the exit code of the process.
*/
function runCommand(command) {
// Split the command string into the command name and its arguments.
// This handles quotes and spaces more robustly than a simple split(' ').
// For 'npm run test:e2e -- --test-name-pattern "replace"', it results in:
// cmd: 'npm', args: ['run', 'test:e2e', '--', '--test-name-pattern', 'replace']
const parts = command.match(/(?:[^\s"]+|"[^"]*")+/g) || [];
const cmd = parts[0];
const args = parts.slice(1).map((arg) => arg.replace(/"/g, '')); // Remove surrounding quotes
function runCommand(cmd, args = []) {
if (!cmd) {
return Promise.resolve(1);
}
@@ -63,6 +55,7 @@ async function main() {
const NUM_RUNS = argv.runs;
const COMMAND = argv.command;
const ARGS = argv._;
let failures = 0;
console.log(`--- Starting Deflake Run (${NUM_RUNS} iterations) ---`);
@@ -71,7 +64,7 @@ async function main() {
try {
// 3. Await the asynchronous command run
const exitCode = await runCommand(COMMAND);
const exitCode = await runCommand(COMMAND, ARGS);
if (exitCode === 0) {
console.log('✅ Run PASS');