diff --git a/.github/workflows/e2e.yml b/.github/workflows/e2e.yml index 40feb87df9..e76ddbd00b 100644 --- a/.github/workflows/e2e.yml +++ b/.github/workflows/e2e.yml @@ -218,7 +218,7 @@ jobs: UV_THREADPOOL_SIZE: '32' NODE_ENV: 'test' shell: 'pwsh' - run: 'npm run test:integration:sandbox:none' + run: 'npm run deflake:test:integration:sandbox:none' e2e: name: 'E2E' diff --git a/integration-tests/file-system-interactive.test.ts b/integration-tests/file-system-interactive.test.ts index 79207e7a97..9099c89944 100644 --- a/integration-tests/file-system-interactive.test.ts +++ b/integration-tests/file-system-interactive.test.ts @@ -7,7 +7,7 @@ import { expect, describe, it, beforeEach, afterEach } from 'vitest'; import { TestRig } from './test-helper.js'; -describe.skip('Interactive file system', () => { +describe('Interactive file system', () => { let rig: TestRig; beforeEach(() => { @@ -33,16 +33,16 @@ describe.skip('Interactive file system', () => { const readCall = await rig.waitForToolCall('read_file', 30000); expect(readCall, 'Expected to find a read_file tool call').toBe(true); - await run.expectText('1.0.0', 30000); - // Step 2: Write the file const writePrompt = `now change the version to 1.0.1 in the file`; await run.type(writePrompt); await run.sendKeys('\r'); - await rig.expectToolCallSuccess(['write_file', 'replace'], 30000); - - const newFileContent = rig.readFile(fileName); - expect(newFileContent).toBe('1.0.1'); + // Check tool calls made with right args + await rig.expectToolCallSuccess( + ['write_file', 'replace'], + 30000, + (args) => args.includes('1.0.1') && args.includes(fileName), + ); }); }); diff --git a/integration-tests/test-helper.ts b/integration-tests/test-helper.ts index 2ac1a35ba1..fbc965c0ab 100644 --- a/integration-tests/test-helper.ts +++ b/integration-tests/test-helper.ts @@ -628,7 +628,11 @@ export class TestRig { ); } - async expectToolCallSuccess(toolNames: string[], timeout?: number) { + async expectToolCallSuccess( + toolNames: string[], + timeout?: number, + matchArgs?: (args: string) => boolean, + ) { // Use environment-specific timeout if (!timeout) { timeout = getDefaultTimeout(); @@ -642,7 +646,10 @@ export class TestRig { const toolLogs = this.readToolLogs(); return toolNames.some((name) => toolLogs.some( - (log) => log.toolRequest.name === name && log.toolRequest.success, + (log) => + log.toolRequest.name === name && + log.toolRequest.success && + (matchArgs?.call(this, log.toolRequest.args) ?? true), ), ); }, diff --git a/package.json b/package.json index 9d13f5dbd4..5cc555f686 100644 --- a/package.json +++ b/package.json @@ -21,8 +21,8 @@ "start:a2a-server": "CODER_AGENT_PORT=41242 npm run start --workspace @google/gemini-cli-a2a-server", "debug": "cross-env DEBUG=1 node --inspect-brk scripts/start.js", "deflake": "node scripts/deflake.js", - "deflake:test:integration:sandbox:none": "npm run deflake -- --command='npm run test:integration:sandbox:none -- --retry=0'", - "deflake:test:integration:sandbox:docker": "npm run deflake -- --command='npm run test:integration:sandbox:docker -- --retry=0'", + "deflake:test:integration:sandbox:none": "npm run deflake -- --command=\"npm run test:integration:sandbox:none -- --retry=0", + "deflake:test:integration:sandbox:docker": "npm run deflake -- --command=\"npm run test:integration:sandbox:docker -- --retry=0", "auth:npm": "npx google-artifactregistry-auth", "auth:docker": "gcloud auth configure-docker us-west1-docker.pkg.dev", "auth": "npm run auth:npm && npm run auth:docker",