From f56a561f022a459fb25be378dd9b78984edd71e7 Mon Sep 17 00:00:00 2001 From: Tommaso Sciortino Date: Mon, 13 Oct 2025 18:03:35 -0700 Subject: [PATCH] Fix and unskip flakey integration test in replace.test.ts (#11060) --- integration-tests/replace.test.ts | 144 ++++++++---------------------- scripts/deflake.js | 13 +-- 2 files changed, 40 insertions(+), 117 deletions(-) diff --git a/integration-tests/replace.test.ts b/integration-tests/replace.test.ts index 272e35838d..3634dfb82b 100644 --- a/integration-tests/replace.test.ts +++ b/integration-tests/replace.test.ts @@ -4,62 +4,26 @@ * SPDX-License-Identifier: Apache-2.0 */ -import { describe, it, expect, vi } from 'vitest'; -import { TestRig, printDebugInfo, validateModelOutput } from './test-helper.js'; +import { describe, it, expect } from 'vitest'; +import { TestRig } from './test-helper.js'; -describe.skip('replace', () => { +describe('replace', () => { it('should be able to replace content in a file', async () => { const rig = new TestRig(); await rig.setup('should be able to replace content in a file'); const fileName = 'file_to_replace.txt'; - const originalContent = 'original content'; - const expectedContent = 'replaced content'; + const originalContent = 'foo content'; + const expectedContent = 'bar content'; rig.createFile(fileName, originalContent); - const prompt = `Can you replace 'original' with 'replaced' in the file 'file_to_replace.txt'`; - const result = await rig.run(prompt); + await rig.run(`Replace 'foo' with 'bar' in the file 'file_to_replace.txt'`); const foundToolCall = await rig.waitForToolCall('replace'); - - // Add debugging information - if (!foundToolCall) { - printDebugInfo(rig, result); - } - expect(foundToolCall, 'Expected to find a replace tool call').toBeTruthy(); - // Validate model output - will throw if no output, warn if missing expected content - validateModelOutput( - result, - ['replaced', 'file_to_replace.txt'], - 'Replace content test', - ); - - const newFileContent = rig.readFile(fileName); - - // Add debugging for file content - if (newFileContent !== expectedContent) { - console.error('File content mismatch - Debug info:'); - console.error('Expected:', expectedContent); - console.error('Actual:', newFileContent); - console.error( - 'Tool calls:', - rig.readToolLogs().map((t) => ({ - name: t.toolRequest.name, - args: t.toolRequest.args, - })), - ); - } - - expect(newFileContent).toBe(expectedContent); - - // Log success info if verbose - vi.stubEnv('VERBOSE', 'true'); - if (process.env['VERBOSE'] === 'true') { - console.log('File replaced successfully. New content:', newFileContent); - } + expect(rig.readFile(fileName)).toBe(expectedContent); }); it('should handle $ literally when replacing text ending with $', async () => { @@ -74,68 +38,45 @@ describe.skip('replace', () => { rig.createFile(fileName, originalContent); - const prompt = - "Open regex.yml and append ' # updated' after the line containing ^[sv]d[a-z]$ without breaking the $ character."; + await rig.run( + "Open regex.yml and append ' # updated' after the line containing ^[sv]d[a-z]$ without breaking the $ character.", + ); - const result = await rig.run(prompt); const foundToolCall = await rig.waitForToolCall('replace'); - - if (!foundToolCall) { - printDebugInfo(rig, result); - } - expect(foundToolCall, 'Expected to find a replace tool call').toBeTruthy(); - validateModelOutput(result, ['regex.yml'], 'Replace $ literal test'); - - const newFileContent = rig.readFile(fileName); - expect(newFileContent).toBe(expectedContent); + expect(rig.readFile(fileName)).toBe(expectedContent); }); - //TODO - https://github.com/google-gemini/gemini-cli/issues/10851 - it.skip('should fail safely when old_string is not found', async () => { + it('should fail safely when old_string is not found', async () => { const rig = new TestRig(); - await rig.setup('should fail safely when old_string is not found'); + await rig.setup('should fail safely when old_string is not found', { + settings: { + useSmartEdit: false, + maxToolCalls: 1, + }, + }); const fileName = 'no_match.txt'; const fileContent = 'hello world'; rig.createFile(fileName, fileContent); - const prompt = `replace "goodbye" with "farewell" in ${fileName}`; - await rig.run(prompt); + await rig.run( + `Make one call to the replace tool to replace the text "goodbye" with "farewell" in ${fileName}.\n + * Do not read the file. + * Do not call any other tools. + * Do not call the replace tool more than once. + * After the first and only tool call, take no further action, even if the tool call fails.`, + ); await rig.waitForTelemetryReady(); const toolLogs = rig.readToolLogs(); - const replaceAttempt = toolLogs.find( - (log) => log.toolRequest.name === 'replace', - ); - const readAttempt = toolLogs.find( - (log) => log.toolRequest.name === 'read_file', - ); + expect(toolLogs.length, 'Expected exactly one tool call').toBe(1); + expect(toolLogs[0].toolRequest.name).toBe('replace'); + expect(toolLogs[0].toolRequest.success).toBe(false); - // VERIFY: The model must have at least tried to read the file or perform a replace. - expect( - readAttempt || replaceAttempt, - 'Expected model to attempt a read_file or replace', - ).toBeDefined(); - - // If the model tried to replace, that specific attempt must have failed. - if (replaceAttempt) { - if (replaceAttempt.toolRequest.success) { - console.error( - 'The replace tool succeeded when it was expected to fail', - ); - console.error('Tool call args:', replaceAttempt.toolRequest.args); - } - expect( - replaceAttempt.toolRequest.success, - 'If replace is called, it must fail', - ).toBe(false); - } - - // CRITICAL: The final content of the file must be unchanged. - const newFileContent = rig.readFile(fileName); - expect(newFileContent).toBe(fileContent); + // Ensure file content is unchanged + expect(rig.readFile(fileName)).toBe(fileContent); }); it('should insert a multi-line block of text', async () => { @@ -148,20 +89,13 @@ describe.skip('replace', () => { 'Line A\nFirst line\nSecond line\nThird line\nLine C'; rig.createFile(fileName, originalContent); - const prompt = `In ${fileName}, replace "" with:\n${newBlock}`; - const result = await rig.run(prompt); + const prompt = `In ${fileName}, replace "" with:\n${newBlock}. Use unix style line endings.`; + await rig.run(prompt); const foundToolCall = await rig.waitForToolCall('replace'); - if (!foundToolCall) { - printDebugInfo(rig, result); - } expect(foundToolCall, 'Expected to find a replace tool call').toBeTruthy(); - const newFileContent = rig.readFile(fileName); - - expect(newFileContent.replace(/\r\n/g, '\n')).toBe( - expectedContent.replace(/\r\n/g, '\n'), - ); + expect(rig.readFile(fileName)).toBe(expectedContent); }); it('should delete a block of text', async () => { @@ -174,17 +108,13 @@ describe.skip('replace', () => { const expectedContent = 'Hello\nWorld'; rig.createFile(fileName, originalContent); - const prompt = `In ${fileName}, delete the entire block from "## DELETE THIS ##" to "## END DELETE ##" including the markers.`; - const result = await rig.run(prompt); + await rig.run( + `In ${fileName}, delete the entire block from "## DELETE THIS ##" to "## END DELETE ##" including the markers.`, + ); const foundToolCall = await rig.waitForToolCall('replace'); - if (!foundToolCall) { - printDebugInfo(rig, result); - } expect(foundToolCall, 'Expected to find a replace tool call').toBeTruthy(); - const newFileContent = rig.readFile(fileName); - - expect(newFileContent).toBe(expectedContent); + expect(rig.readFile(fileName)).toBe(expectedContent); }); }); diff --git a/scripts/deflake.js b/scripts/deflake.js index 08eace5b43..fcceced389 100644 --- a/scripts/deflake.js +++ b/scripts/deflake.js @@ -16,15 +16,7 @@ import { hideBin } from 'yargs/helpers'; * @param {string} command The command string to execute (e.g., 'npm run test:e2e -- --watch'). * @returns {Promise} A Promise that resolves with the exit code of the process. */ -function runCommand(command) { - // Split the command string into the command name and its arguments. - // This handles quotes and spaces more robustly than a simple split(' '). - // For 'npm run test:e2e -- --test-name-pattern "replace"', it results in: - // cmd: 'npm', args: ['run', 'test:e2e', '--', '--test-name-pattern', 'replace'] - const parts = command.match(/(?:[^\s"]+|"[^"]*")+/g) || []; - const cmd = parts[0]; - const args = parts.slice(1).map((arg) => arg.replace(/"/g, '')); // Remove surrounding quotes - +function runCommand(cmd, args = []) { if (!cmd) { return Promise.resolve(1); } @@ -63,6 +55,7 @@ async function main() { const NUM_RUNS = argv.runs; const COMMAND = argv.command; + const ARGS = argv._; let failures = 0; console.log(`--- Starting Deflake Run (${NUM_RUNS} iterations) ---`); @@ -71,7 +64,7 @@ async function main() { try { // 3. Await the asynchronous command run - const exitCode = await runCommand(COMMAND); + const exitCode = await runCommand(COMMAND, ARGS); if (exitCode === 0) { console.log('✅ Run PASS');