diff --git a/integration-tests/file-system.test.ts b/integration-tests/file-system.test.ts index 6b150be53b..cd880d66aa 100644 --- a/integration-tests/file-system.test.ts +++ b/integration-tests/file-system.test.ts @@ -5,6 +5,8 @@ */ import { describe, it, expect } from 'vitest'; +import { existsSync } from 'node:fs'; +import * as path from 'node:path'; import { TestRig, printDebugInfo, validateModelOutput } from './test-helper.js'; describe('file-system', () => { @@ -90,4 +92,170 @@ describe('file-system', () => { console.log('File written successfully with hello message.'); } }); + + it('should correctly handle file paths with spaces', async () => { + const rig = new TestRig(); + await rig.setup('should correctly handle file paths with spaces'); + const fileName = 'my test file.txt'; + + const result = await rig.run(`write "hello" to "${fileName}"`); + + const foundToolCall = await rig.waitForToolCall('write_file'); + if (!foundToolCall) { + printDebugInfo(rig, result); + } + expect( + foundToolCall, + 'Expected to find a write_file tool call', + ).toBeTruthy(); + + const newFileContent = rig.readFile(fileName); + expect(newFileContent).toBe('hello'); + }); + + it('should perform a read-then-write sequence', async () => { + const rig = new TestRig(); + await rig.setup('should perform a read-then-write sequence'); + const fileName = 'version.txt'; + rig.createFile(fileName, '1.0.0'); + + const prompt = `Read the version from ${fileName} and write the next version 1.0.1 back to the file.`; + const result = await rig.run(prompt); + + await rig.waitForTelemetryReady(); + const toolLogs = rig.readToolLogs(); + + const readCall = toolLogs.find( + (log) => log.toolRequest.name === 'read_file', + ); + const writeCall = toolLogs.find( + (log) => + log.toolRequest.name === 'write_file' || + log.toolRequest.name === 'replace', + ); + + if (!readCall || !writeCall) { + printDebugInfo(rig, result, { readCall, writeCall }); + } + + expect(readCall, 'Expected to find a read_file tool call').toBeDefined(); + expect( + writeCall, + 'Expected to find a write_file or replace tool call', + ).toBeDefined(); + + const newFileContent = rig.readFile(fileName); + expect(newFileContent).toBe('1.0.1'); + }); + + it('should replace multiple instances of a string', async () => { + const rig = new TestRig(); + await rig.setup('should replace multiple instances of a string'); + const fileName = 'ambiguous.txt'; + const fileContent = 'Hey there, \ntest line\ntest line'; + const expectedContent = 'Hey there, \nnew line\nnew line'; + rig.createFile(fileName, fileContent); + + const result = await rig.run( + `replace "test line" with "new line" in ${fileName}`, + ); + + const foundToolCall = await rig.waitForAnyToolCall([ + 'replace', + 'write_file', + ]); + if (!foundToolCall) { + printDebugInfo(rig, result); + } + expect( + foundToolCall, + 'Expected to find a replace or write_file tool call', + ).toBeTruthy(); + + const toolLogs = rig.readToolLogs(); + const successfulEdit = toolLogs.some( + (log) => + (log.toolRequest.name === 'replace' || + log.toolRequest.name === 'write_file') && + log.toolRequest.success, + ); + if (!successfulEdit) { + console.error( + 'Expected a successful edit tool call, but none was found.', + ); + printDebugInfo(rig, result); + } + expect(successfulEdit, 'Expected a successful edit tool call').toBeTruthy(); + + const newFileContent = rig.readFile(fileName); + expect(newFileContent).toBe(expectedContent); + }); + + it('should fail safely when trying to edit a non-existent file', async () => { + const rig = new TestRig(); + await rig.setup( + 'should fail safely when trying to edit a non-existent file', + ); + const fileName = 'non_existent.txt'; + + const result = await rig.run(`In ${fileName}, replace "a" with "b"`); + + await rig.waitForTelemetryReady(); + const toolLogs = rig.readToolLogs(); + + const readAttempt = toolLogs.find( + (log) => log.toolRequest.name === 'read_file', + ); + const writeAttempt = toolLogs.find( + (log) => log.toolRequest.name === 'write_file', + ); + const successfulReplace = toolLogs.find( + (log) => log.toolRequest.name === 'replace' && log.toolRequest.success, + ); + + // The model can either investigate (and fail) or do nothing. + // If it chose to investigate by reading, that read must have failed. + if (readAttempt && readAttempt.toolRequest.success) { + console.error( + 'A read_file attempt succeeded for a non-existent file when it should have failed.', + ); + printDebugInfo(rig, result); + } + if (readAttempt) { + expect( + readAttempt.toolRequest.success, + 'If model tries to read the file, that attempt must fail', + ).toBe(false); + } + + // CRITICAL: Verify that no matter what the model did, it never successfully + // wrote or replaced anything. + if (writeAttempt) { + console.error( + 'A write_file attempt was made when no file should be written.', + ); + printDebugInfo(rig, result); + } + expect( + writeAttempt, + 'write_file should not have been called', + ).toBeUndefined(); + + if (successfulReplace) { + console.error('A successful replace occurred when it should not have.'); + printDebugInfo(rig, result); + } + expect( + successfulReplace, + 'A successful replace should not have occurred', + ).toBeUndefined(); + + // Final verification: ensure the file was not created. + const filePath = path.join(rig.testDir!, fileName); + const fileExists = existsSync(filePath); + + expect(fileExists, 'The non-existent file should not be created').toBe( + false, + ); + }); }); diff --git a/integration-tests/replace.test.ts b/integration-tests/replace.test.ts index 0de23a04c7..6c06f73862 100644 --- a/integration-tests/replace.test.ts +++ b/integration-tests/replace.test.ts @@ -91,4 +91,98 @@ describe('replace', () => { const newFileContent = rig.readFile(fileName); expect(newFileContent).toBe(expectedContent); }); + + it('should fail safely when old_string is not found', async () => { + const rig = new TestRig(); + await rig.setup('should fail safely when old_string is not found'); + const fileName = 'no_match.txt'; + const fileContent = 'hello world'; + rig.createFile(fileName, fileContent); + + const prompt = `replace "goodbye" with "farewell" in ${fileName}`; + await rig.run(prompt); + + await rig.waitForTelemetryReady(); + const toolLogs = rig.readToolLogs(); + + const replaceAttempt = toolLogs.find( + (log) => log.toolRequest.name === 'replace', + ); + const readAttempt = toolLogs.find( + (log) => log.toolRequest.name === 'read_file', + ); + + // VERIFY: The model must have at least tried to read the file or perform a replace. + expect( + readAttempt || replaceAttempt, + 'Expected model to attempt a read_file or replace', + ).toBeDefined(); + + // If the model tried to replace, that specific attempt must have failed. + if (replaceAttempt) { + if (replaceAttempt.toolRequest.success) { + console.error( + 'The replace tool succeeded when it was expected to fail', + ); + console.error('Tool call args:', replaceAttempt.toolRequest.args); + } + expect( + replaceAttempt.toolRequest.success, + 'If replace is called, it must fail', + ).toBe(false); + } + + // CRITICAL: The final content of the file must be unchanged. + const newFileContent = rig.readFile(fileName); + expect(newFileContent).toBe(fileContent); + }); + + it('should insert a multi-line block of text', async () => { + const rig = new TestRig(); + await rig.setup('should insert a multi-line block of text'); + const fileName = 'insert_block.js'; + const originalContent = 'function hello() {\n // INSERT_CODE_HERE\n}'; + const newBlock = "console.log('hello');\n console.log('world');"; + const expectedContent = `function hello() {\n ${newBlock}\n}`; + rig.createFile(fileName, originalContent); + + const prompt = `In ${fileName}, replace "// INSERT_CODE_HERE" with:\n${newBlock}`; + const result = await rig.run(prompt); + + const foundToolCall = await rig.waitForToolCall('replace'); + if (!foundToolCall) { + printDebugInfo(rig, result); + } + expect(foundToolCall, 'Expected to find a replace tool call').toBeTruthy(); + + const newFileContent = rig.readFile(fileName); + + expect(newFileContent.replace(/\r\n/g, '\n')).toBe( + expectedContent.replace(/\r\n/g, '\n'), + ); + }); + + it('should delete a block of text', async () => { + const rig = new TestRig(); + await rig.setup('should delete a block of text'); + const fileName = 'delete_block.txt'; + const blockToDelete = + '## DELETE THIS ##\nThis is a block of text to delete.\n## END DELETE ##'; + const originalContent = `Hello\n${blockToDelete}\nWorld`; + const expectedContent = 'Hello\nWorld'; + rig.createFile(fileName, originalContent); + + const prompt = `In ${fileName}, delete the entire block from "## DELETE THIS ##" to "## END DELETE ##" including the markers.`; + const result = await rig.run(prompt); + + const foundToolCall = await rig.waitForToolCall('replace'); + if (!foundToolCall) { + printDebugInfo(rig, result); + } + expect(foundToolCall, 'Expected to find a replace tool call').toBeTruthy(); + + const newFileContent = rig.readFile(fileName); + + expect(newFileContent).toBe(expectedContent); + }); });