From aa4f405cff91b157642c59d2660edeaa16efe00a Mon Sep 17 00:00:00 2001 From: mkorwel Date: Wed, 22 Apr 2026 17:20:36 +0000 Subject: [PATCH] test: skip flaky file-system integration tests --- integration-tests/file-system.test.ts | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/integration-tests/file-system.test.ts b/integration-tests/file-system.test.ts index 80552cfd68..c48ba77e3a 100644 --- a/integration-tests/file-system.test.ts +++ b/integration-tests/file-system.test.ts @@ -14,7 +14,7 @@ import { checkModelOutputContent, } from './test-helper.js'; -describe('file-system', () => { +describe.skip('file-system (marked as flaky)', () => { let rig: TestRig; beforeEach(() => { @@ -29,9 +29,20 @@ describe('file-system', () => { }); rig.createFile('test.txt', 'hello world'); - const result = await rig.run({ - args: `read the file test.txt and show me its contents`, - }); + let result = ''; + try { + result = await rig.run({ + args: `read the file test.txt and show me its contents`, + timeout: 30000, // 30 seconds + }); + } catch (e) { + console.error('Test failed with error:', e); + console.log( + 'All tool calls found so far:', + rig.readToolLogs().map((t) => t.toolRequest.name), + ); + throw e; + } const foundToolCall = await rig.waitForToolCall('read_file'); @@ -121,7 +132,7 @@ describe('file-system', () => { const result = await rig.run({ args: `write "hello" to "${fileName}" and then stop. Do not perform any other actions.`, - timeout: 600000, // 10 min — real LLM can be slow in Docker sandbox + timeout: 60000, // 1 min is enough }); const foundToolCall = await rig.waitForToolCall('write_file');