test: additional integration tests for editing a file (#9963)

Co-authored-by: Taneja Hriday <hridayt@google.com>
This commit is contained in:
hritan
2025-09-30 19:59:19 +00:00
committed by GitHub
parent 0fec673bfb
commit c0400a4414
2 changed files with 262 additions and 0 deletions

View File

@@ -5,6 +5,8 @@
*/
import { describe, it, expect } from 'vitest';
import { existsSync } from 'node:fs';
import * as path from 'node:path';
import { TestRig, printDebugInfo, validateModelOutput } from './test-helper.js';
describe('file-system', () => {
@@ -90,4 +92,170 @@ describe('file-system', () => {
console.log('File written successfully with hello message.');
}
});
it('should correctly handle file paths with spaces', async () => {
const rig = new TestRig();
await rig.setup('should correctly handle file paths with spaces');
const fileName = 'my test file.txt';
const result = await rig.run(`write "hello" to "${fileName}"`);
const foundToolCall = await rig.waitForToolCall('write_file');
if (!foundToolCall) {
printDebugInfo(rig, result);
}
expect(
foundToolCall,
'Expected to find a write_file tool call',
).toBeTruthy();
const newFileContent = rig.readFile(fileName);
expect(newFileContent).toBe('hello');
});
it('should perform a read-then-write sequence', async () => {
const rig = new TestRig();
await rig.setup('should perform a read-then-write sequence');
const fileName = 'version.txt';
rig.createFile(fileName, '1.0.0');
const prompt = `Read the version from ${fileName} and write the next version 1.0.1 back to the file.`;
const result = await rig.run(prompt);
await rig.waitForTelemetryReady();
const toolLogs = rig.readToolLogs();
const readCall = toolLogs.find(
(log) => log.toolRequest.name === 'read_file',
);
const writeCall = toolLogs.find(
(log) =>
log.toolRequest.name === 'write_file' ||
log.toolRequest.name === 'replace',
);
if (!readCall || !writeCall) {
printDebugInfo(rig, result, { readCall, writeCall });
}
expect(readCall, 'Expected to find a read_file tool call').toBeDefined();
expect(
writeCall,
'Expected to find a write_file or replace tool call',
).toBeDefined();
const newFileContent = rig.readFile(fileName);
expect(newFileContent).toBe('1.0.1');
});
it('should replace multiple instances of a string', async () => {
const rig = new TestRig();
await rig.setup('should replace multiple instances of a string');
const fileName = 'ambiguous.txt';
const fileContent = 'Hey there, \ntest line\ntest line';
const expectedContent = 'Hey there, \nnew line\nnew line';
rig.createFile(fileName, fileContent);
const result = await rig.run(
`replace "test line" with "new line" in ${fileName}`,
);
const foundToolCall = await rig.waitForAnyToolCall([
'replace',
'write_file',
]);
if (!foundToolCall) {
printDebugInfo(rig, result);
}
expect(
foundToolCall,
'Expected to find a replace or write_file tool call',
).toBeTruthy();
const toolLogs = rig.readToolLogs();
const successfulEdit = toolLogs.some(
(log) =>
(log.toolRequest.name === 'replace' ||
log.toolRequest.name === 'write_file') &&
log.toolRequest.success,
);
if (!successfulEdit) {
console.error(
'Expected a successful edit tool call, but none was found.',
);
printDebugInfo(rig, result);
}
expect(successfulEdit, 'Expected a successful edit tool call').toBeTruthy();
const newFileContent = rig.readFile(fileName);
expect(newFileContent).toBe(expectedContent);
});
it('should fail safely when trying to edit a non-existent file', async () => {
const rig = new TestRig();
await rig.setup(
'should fail safely when trying to edit a non-existent file',
);
const fileName = 'non_existent.txt';
const result = await rig.run(`In ${fileName}, replace "a" with "b"`);
await rig.waitForTelemetryReady();
const toolLogs = rig.readToolLogs();
const readAttempt = toolLogs.find(
(log) => log.toolRequest.name === 'read_file',
);
const writeAttempt = toolLogs.find(
(log) => log.toolRequest.name === 'write_file',
);
const successfulReplace = toolLogs.find(
(log) => log.toolRequest.name === 'replace' && log.toolRequest.success,
);
// The model can either investigate (and fail) or do nothing.
// If it chose to investigate by reading, that read must have failed.
if (readAttempt && readAttempt.toolRequest.success) {
console.error(
'A read_file attempt succeeded for a non-existent file when it should have failed.',
);
printDebugInfo(rig, result);
}
if (readAttempt) {
expect(
readAttempt.toolRequest.success,
'If model tries to read the file, that attempt must fail',
).toBe(false);
}
// CRITICAL: Verify that no matter what the model did, it never successfully
// wrote or replaced anything.
if (writeAttempt) {
console.error(
'A write_file attempt was made when no file should be written.',
);
printDebugInfo(rig, result);
}
expect(
writeAttempt,
'write_file should not have been called',
).toBeUndefined();
if (successfulReplace) {
console.error('A successful replace occurred when it should not have.');
printDebugInfo(rig, result);
}
expect(
successfulReplace,
'A successful replace should not have occurred',
).toBeUndefined();
// Final verification: ensure the file was not created.
const filePath = path.join(rig.testDir!, fileName);
const fileExists = existsSync(filePath);
expect(fileExists, 'The non-existent file should not be created').toBe(
false,
);
});
});

View File

@@ -91,4 +91,98 @@ describe('replace', () => {
const newFileContent = rig.readFile(fileName);
expect(newFileContent).toBe(expectedContent);
});
it('should fail safely when old_string is not found', async () => {
const rig = new TestRig();
await rig.setup('should fail safely when old_string is not found');
const fileName = 'no_match.txt';
const fileContent = 'hello world';
rig.createFile(fileName, fileContent);
const prompt = `replace "goodbye" with "farewell" in ${fileName}`;
await rig.run(prompt);
await rig.waitForTelemetryReady();
const toolLogs = rig.readToolLogs();
const replaceAttempt = toolLogs.find(
(log) => log.toolRequest.name === 'replace',
);
const readAttempt = toolLogs.find(
(log) => log.toolRequest.name === 'read_file',
);
// VERIFY: The model must have at least tried to read the file or perform a replace.
expect(
readAttempt || replaceAttempt,
'Expected model to attempt a read_file or replace',
).toBeDefined();
// If the model tried to replace, that specific attempt must have failed.
if (replaceAttempt) {
if (replaceAttempt.toolRequest.success) {
console.error(
'The replace tool succeeded when it was expected to fail',
);
console.error('Tool call args:', replaceAttempt.toolRequest.args);
}
expect(
replaceAttempt.toolRequest.success,
'If replace is called, it must fail',
).toBe(false);
}
// CRITICAL: The final content of the file must be unchanged.
const newFileContent = rig.readFile(fileName);
expect(newFileContent).toBe(fileContent);
});
it('should insert a multi-line block of text', async () => {
const rig = new TestRig();
await rig.setup('should insert a multi-line block of text');
const fileName = 'insert_block.js';
const originalContent = 'function hello() {\n // INSERT_CODE_HERE\n}';
const newBlock = "console.log('hello');\n console.log('world');";
const expectedContent = `function hello() {\n ${newBlock}\n}`;
rig.createFile(fileName, originalContent);
const prompt = `In ${fileName}, replace "// INSERT_CODE_HERE" with:\n${newBlock}`;
const result = await rig.run(prompt);
const foundToolCall = await rig.waitForToolCall('replace');
if (!foundToolCall) {
printDebugInfo(rig, result);
}
expect(foundToolCall, 'Expected to find a replace tool call').toBeTruthy();
const newFileContent = rig.readFile(fileName);
expect(newFileContent.replace(/\r\n/g, '\n')).toBe(
expectedContent.replace(/\r\n/g, '\n'),
);
});
it('should delete a block of text', async () => {
const rig = new TestRig();
await rig.setup('should delete a block of text');
const fileName = 'delete_block.txt';
const blockToDelete =
'## DELETE THIS ##\nThis is a block of text to delete.\n## END DELETE ##';
const originalContent = `Hello\n${blockToDelete}\nWorld`;
const expectedContent = 'Hello\nWorld';
rig.createFile(fileName, originalContent);
const prompt = `In ${fileName}, delete the entire block from "## DELETE THIS ##" to "## END DELETE ##" including the markers.`;
const result = await rig.run(prompt);
const foundToolCall = await rig.waitForToolCall('replace');
if (!foundToolCall) {
printDebugInfo(rig, result);
}
expect(foundToolCall, 'Expected to find a replace tool call').toBeTruthy();
const newFileContent = rig.readFile(fileName);
expect(newFileContent).toBe(expectedContent);
});
});