diff --git a/.github/workflows/chained_e2e.yml b/.github/workflows/chained_e2e.yml
index 494163966e..487225d452 100644
--- a/.github/workflows/chained_e2e.yml
+++ b/.github/workflows/chained_e2e.yml
@@ -277,6 +277,37 @@ jobs:
         shell: 'pwsh'
         run: 'npm run test:integration:sandbox:none'
 
+  evals:
+    name: 'Evals (ALWAYS_PASSING)'
+    needs:
+      - 'merge_queue_skipper'
+      - 'parse_run_context'
+    runs-on: 'gemini-cli-ubuntu-16-core'
+    if: |
+      always() && (needs.merge_queue_skipper.result !='success' || needs.merge_queue_skipper.outputs.skip != 'true')
+    steps:
+      - name: 'Checkout'
+        uses: 'actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955' # ratchet:actions/checkout@v5
+        with:
+          ref: '${{ needs.parse_run_context.outputs.sha }}'
+          repository: '${{ needs.parse_run_context.outputs.repository }}'
+
+      - name: 'Set up Node.js 20.x'
+        uses: 'actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020' # ratchet:actions-node@v4
+        with:
+          node-version: '20.x'
+
+      - name: 'Install dependencies'
+        run: 'npm ci'
+
+      - name: 'Build project'
+        run: 'npm run build'
+
+      - name: 'Run Evals (Required to pass)'
+        env:
+          GEMINI_API_KEY: '${{ secrets.GEMINI_API_KEY }}'
+        run: 'npm run test:always_passing_evals'
+
   e2e:
     name: 'E2E'
     if: |
@@ -284,13 +315,15 @@ jobs:
     needs:
       - 'e2e_linux'
       - 'e2e_mac'
+      - 'evals'
       - 'merge_queue_skipper'
     runs-on: 'gemini-cli-ubuntu-16-core'
     steps:
       - name: 'Check E2E test results'
         run: |
           if [[ ${{ needs.e2e_linux.result }} != 'success' || \
-               ${{ needs.e2e_mac.result }} != 'success' ]]; then
+               ${{ needs.e2e_mac.result }} != 'success' || \
+               ${{ needs.evals.result }} != 'success' ]]; then
             echo "One or more E2E jobs failed."
             exit 1
           fi
diff --git a/.github/workflows/evals-nightly.yml b/.github/workflows/evals-nightly.yml
new file mode 100644
index 0000000000..6d44de7c12
--- /dev/null
+++ b/.github/workflows/evals-nightly.yml
@@ -0,0 +1,41 @@
+name: 'Evals: Nightly'
+
+on:
+  schedule:
+    - cron: '0 1 * * *' # Runs at 1 AM every day
+  workflow_dispatch:
+    inputs:
+      run_all:
+        description: 'Run all evaluations (including usually passing)'
+        type: 'boolean'
+        default: true
+
+permissions:
+  contents: 'read'
+  checks: 'write'
+
+jobs:
+  evals:
+    name: 'Evals (USUALLY_PASSING) nightly run'
+    runs-on: 'gemini-cli-ubuntu-16-core'
+    steps:
+      - name: 'Checkout'
+        uses: 'actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8' # ratchet:actions/checkout@v5
+
+      - name: 'Set up Node.js'
+        uses: 'actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020' # ratchet:actions/setup-node@v4
+        with:
+          node-version-file: '.nvmrc'
+          cache: 'npm'
+
+      - name: 'Install dependencies'
+        run: 'npm ci'
+
+      - name: 'Build project'
+        run: 'npm run build'
+
+      - name: 'Run Evals'
+        env:
+          GEMINI_API_KEY: '${{ secrets.GEMINI_API_KEY }}'
+          RUN_EVALS: "${{ github.event.inputs.run_all != 'false' }}"
+        run: 'npm run test:all_evals'
diff --git a/.gitignore b/.gitignore
index bfb2b5e576..5128952039 100644
--- a/.gitignore
+++ b/.gitignore
@@ -59,3 +59,4 @@ patch_output.log
 .genkit
 .gemini-clipboard/
 .eslintcache
+evals/logs/
diff --git a/eslint.config.js b/eslint.config.js
index c2d0d3b69b..0f20eeab42 100644
--- a/eslint.config.js
+++ b/eslint.config.js
@@ -35,6 +35,8 @@ export default tseslint.config(
       'package/bundle/**',
       '.integration-tests/**',
       'dist/**',
+      'evals/**',
+      'packages/test-utils/**',
     ],
   },
   eslint.configs.recommended,
diff --git a/evals/README.md b/evals/README.md
new file mode 100644
index 0000000000..a339af842f
--- /dev/null
+++ b/evals/README.md
@@ -0,0 +1,102 @@
+# Behavioral Evals
+
+Behavioral evaluations (evals) are tests designed to validate the agent's
+behavior in response to specific prompts. They serve as a critical feedback loop
+for changes to system prompts, tool definitions, and other model-steering
+mechanisms.
+
+## Why Behavioral Evals?
+
+Unlike traditional **integration tests** which verify that the system functions
+correctly (e.g., "does the file writer actually write to disk?"), behavioral
+evals verify that the model _chooses_ to take the correct action (e.g., "does
+the model decide to write to disk when asked to save code?").
+
+They are also distinct from broad **industry benchmarks** (like SWE-bench).
+While benchmarks measure general capabilities across complex challenges, our
+behavioral evals focus on specific, granular behaviors relevant to the Gemini
+CLI's features.
+
+### Key Characteristics
+
+- **Feedback Loop**: They help us understand how changes to prompts or tools
+  affect the model's decision-making.
+  - _Did a change to the system prompt make the model less likely to use tool
+    X?_
+  - _Did a new tool definition confuse the model?_
+- **Regression Testing**: They prevent regressions in model steering.
+- **Non-Determinism**: Unlike unit tests, LLM behavior can be non-deterministic.
+  We distinguish between behaviors that should be robust (`ALWAYS_PASSES`) and
+  those that are generally reliable but might occasionally vary
+  (`USUALLY_PASSES`).
+
+## Creating an Evaluation
+
+Evaluations are located in the `evals` directory. Each evaluation is a Vitest
+test file that uses the `evalTest` function from `evals/test-helper.ts`.
+
+### `evalTest`
+
+The `evalTest` function is a helper that runs a single evaluation case. It takes
+two arguments:
+
+1. `policy`: The consistency expectation for this test (`'ALWAYS_PASSES'` or
+   `'USUALLY_PASSES'`).
+2. `evalCase`: An object defining the test case.
+
+#### Policies
+
+- `ALWAYS_PASSES`: Tests expected to pass 100% of the time. These are typically
+  trivial and test basic functionality. These run in every CI.
+- `USUALLY_PASSES`: Tests expected to pass most of the time but may have some
+  flakiness due to non-deterministic behaviors. These are run nightly and used
+  to track the health of the product from build to build.
+
+#### `EvalCase` Properties
+
+- `name`: The name of the evaluation case.
+- `prompt`: The prompt to send to the model.
+- `params`: An optional object with parameters to pass to the test rig (e.g.,
+  settings).
+- `assert`: An async function that takes the test rig and the result of the run
+  and asserts that the result is correct.
+- `log`: An optional boolean that, if set to `true`, will log the tool calls to
+  a file in the `evals/logs` directory.
+
+### Example
+
+```typescript
+import { describe, expect } from 'vitest';
+import { evalTest } from './test-helper.js';
+
+describe('my_feature', () => {
+  evalTest('ALWAYS_PASSES', {
+    name: 'should do something',
+    prompt: 'do it',
+    assert: async (rig, result) => {
+      // assertions
+    },
+  });
+});
+```
+
+## Running Evaluations
+
+### Always Passing Evals
+
+To run the evaluations that are expected to always pass (CI safe):
+
+```bash
+npm run test:always_passing_evals
+```
+
+### All Evals
+
+To run all evaluations, including those that may be flaky ("usually passes"):
+
+```bash
+npm run test:all_evals
+```
+
+This command sets the `RUN_EVALS` environment variable to `1`, which enables the
+`USUALLY_PASSES` tests.
diff --git a/evals/save_memory.eval.ts b/evals/save_memory.eval.ts
new file mode 100644
index 0000000000..a64f21798a
--- /dev/null
+++ b/evals/save_memory.eval.ts
@@ -0,0 +1,31 @@
+/**
+ * @license
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import { describe, expect } from 'vitest';
+import { evalTest } from './test-helper.js';
+import { validateModelOutput } from '../integration-tests/test-helper.js';
+
+describe('save_memory', () => {
+  evalTest('ALWAYS_PASSES', {
+    name: 'should be able to save to memory',
+    log: true,
+    params: {
+      settings: { tools: { core: ['save_memory'] } },
+    },
+    prompt: `remember that my favorite color is  blue.
+  
+    what is my favorite color? tell me that and surround it with $ symbol`,
+    assert: async (rig, result) => {
+      const foundToolCall = await rig.waitForToolCall('save_memory');
+      expect(
+        foundToolCall,
+        'Expected to find a save_memory tool call',
+      ).toBeTruthy();
+
+      validateModelOutput(result, 'blue', 'Save memory test');
+    },
+  });
+});
diff --git a/evals/test-helper.ts b/evals/test-helper.ts
new file mode 100644
index 0000000000..f394521d1e
--- /dev/null
+++ b/evals/test-helper.ts
@@ -0,0 +1,70 @@
+/**
+ * @license
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import { it } from 'vitest';
+import fs from 'node:fs';
+import { TestRig } from '@google/gemini-cli-test-utils';
+
+export * from '@google/gemini-cli-test-utils';
+
+// Indicates the consistency expectation for this test.
+// - ALWAYS_PASSES - Means that the test is expected to pass 100% of the time. These
+//   These tests are typically trivial and test basic functionality with unambiguous
+//   prompts. For example: "call save_memory to remember foo" should be fairly reliable.
+//   These are the first line of defense against regressions in key behaviors and run in
+//   every CI. You can run these locally with 'npm run test:always_passing_evals'.
+//
+// - USUALLY_PASSES - Means that the test is expected to pass most of the time but
+//   may have some flakiness as a result of relying on non-deterministic prompted
+//   behaviors and/or ambiguous prompts or complex tasks.
+//   For example: "Please do build changes until the very end" --> ambiguous whether
+//   the agent should add to memory without more explicit system prompt or user
+//   instructions. There are many more of these tests and they may pass less consistently.
+//   The pass/fail trendline of this set of tests can be used as a general measure
+//   of product quality. You can run these locally with 'npm run test:all_evals'.
+//   This may take a really long time and is not recommended.
+export type EvalPolicy = 'ALWAYS_PASSES' | 'USUALLY_PASSES';
+
+export function evalTest(policy: EvalPolicy, evalCase: EvalCase) {
+  const fn = async () => {
+    const rig = new TestRig();
+    try {
+      await rig.setup(evalCase.name, evalCase.params);
+      const result = await rig.run({ args: evalCase.prompt });
+      await evalCase.assert(rig, result);
+    } finally {
+      if (evalCase.log) {
+        await logToFile(
+          evalCase.name,
+          JSON.stringify(rig.readToolLogs(), null, 2),
+        );
+      }
+      await rig.cleanup();
+    }
+  };
+
+  if (policy === 'USUALLY_PASSES' && !process.env.RUN_EVALS) {
+    it.skip(evalCase.name, fn);
+  } else {
+    it(evalCase.name, fn);
+  }
+}
+
+export interface EvalCase {
+  name: string;
+  params?: Record<string, any>;
+  prompt: string;
+  assert: (rig: TestRig, result: string) => Promise<void>;
+  log?: boolean;
+}
+
+async function logToFile(name: string, content: string) {
+  const logDir = 'evals/logs';
+  await fs.promises.mkdir(logDir, { recursive: true });
+  const sanitizedName = name.replace(/[^a-z0-9]/gi, '_').toLowerCase();
+  const logFile = `${logDir}/${sanitizedName}.log`;
+  await fs.promises.writeFile(logFile, content);
+}
diff --git a/evals/vitest.config.ts b/evals/vitest.config.ts
new file mode 100644
index 0000000000..8476b638ff
--- /dev/null
+++ b/evals/vitest.config.ts
@@ -0,0 +1,15 @@
+/**
+ * @license
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import { defineConfig } from 'vitest/config';
+
+export default defineConfig({
+  test: {
+    testTimeout: 300000, // 5 minutes
+    reporters: ['default'],
+    include: ['**/*.eval.ts'],
+  },
+});
diff --git a/integration-tests/save_memory.test.ts b/integration-tests/save_memory.test.ts
deleted file mode 100644
index 38b4d060fa..0000000000
--- a/integration-tests/save_memory.test.ts
+++ /dev/null
@@ -1,54 +0,0 @@
-/**
- * @license
- * Copyright 2025 Google LLC
- * SPDX-License-Identifier: Apache-2.0
- */
-
-import { describe, it, expect, beforeEach, afterEach } from 'vitest';
-import { TestRig, printDebugInfo, validateModelOutput } from './test-helper.js';
-
-describe('save_memory', () => {
-  let rig: TestRig;
-
-  beforeEach(() => {
-    rig = new TestRig();
-  });
-
-  afterEach(async () => await rig.cleanup());
-
-  it('should be able to save to memory', async () => {
-    await rig.setup('should be able to save to memory', {
-      settings: { tools: { core: ['save_memory'] } },
-    });
-
-    const prompt = `remember that my favorite color is  blue.
-
-  what is my favorite color? tell me that and surround it with $ symbol`;
-    const result = await rig.run({ args: prompt });
-
-    const foundToolCall = await rig.waitForToolCall('save_memory');
-
-    // Add debugging information
-    if (!foundToolCall || !result.toLowerCase().includes('blue')) {
-      const allTools = printDebugInfo(rig, result, {
-        'Found tool call': foundToolCall,
-        'Contains blue': result.toLowerCase().includes('blue'),
-      });
-
-      console.error(
-        'Memory tool calls:',
-        allTools
-          .filter((t) => t.toolRequest.name === 'save_memory')
-          .map((t) => t.toolRequest.args),
-      );
-    }
-
-    expect(
-      foundToolCall,
-      'Expected to find a save_memory tool call',
-    ).toBeTruthy();
-
-    // Validate model output - will throw if no output, warn if missing expected content
-    validateModelOutput(result, 'blue', 'Save memory test');
-  });
-});
diff --git a/integration-tests/test-helper.ts b/integration-tests/test-helper.ts
index 9a2a6cefca..a13f260c4b 100644
--- a/integration-tests/test-helper.ts
+++ b/integration-tests/test-helper.ts
@@ -4,1225 +4,4 @@
  * SPDX-License-Identifier: Apache-2.0
  */
 
-import { expect } from 'vitest';
-import { execSync, spawn, type ChildProcess } from 'node:child_process';
-import { mkdirSync, writeFileSync, readFileSync } from 'node:fs';
-import { join, dirname } from 'node:path';
-import { fileURLToPath } from 'node:url';
-import { env } from 'node:process';
-import { setTimeout as sleep } from 'node:timers/promises';
-import { DEFAULT_GEMINI_MODEL } from '../packages/core/src/config/models.js';
-import fs from 'node:fs';
-import * as pty from '@lydell/node-pty';
-import stripAnsi from 'strip-ansi';
-import * as os from 'node:os';
-import { GEMINI_DIR } from '../packages/core/src/utils/paths.js';
-
-const __dirname = dirname(fileURLToPath(import.meta.url));
-const BUNDLE_PATH = join(__dirname, '..', 'bundle/gemini.js');
-
-// Get timeout based on environment
-function getDefaultTimeout() {
-  if (env['CI']) return 60000; // 1 minute in CI
-  if (env['GEMINI_SANDBOX']) return 30000; // 30s in containers
-  return 15000; // 15s locally
-}
-
-export async function poll(
-  predicate: () => boolean,
-  timeout: number,
-  interval: number,
-): Promise<boolean> {
-  const startTime = Date.now();
-  let attempts = 0;
-  while (Date.now() - startTime < timeout) {
-    attempts++;
-    const result = predicate();
-    if (env['VERBOSE'] === 'true' && attempts % 5 === 0) {
-      console.log(
-        `Poll attempt ${attempts}: ${result ? 'success' : 'waiting...'}`,
-      );
-    }
-    if (result) {
-      return true;
-    }
-    await sleep(interval);
-  }
-  if (env['VERBOSE'] === 'true') {
-    console.log(`Poll timed out after ${attempts} attempts`);
-  }
-  return false;
-}
-
-function sanitizeTestName(name: string) {
-  return name
-    .toLowerCase()
-    .replace(/[^a-z0-9]/g, '-')
-    .replace(/-+/g, '-');
-}
-
-// Helper to create detailed error messages
-export function createToolCallErrorMessage(
-  expectedTools: string | string[],
-  foundTools: string[],
-  result: string,
-) {
-  const expectedStr = Array.isArray(expectedTools)
-    ? expectedTools.join(' or ')
-    : expectedTools;
-  return (
-    `Expected to find ${expectedStr} tool call(s). ` +
-    `Found: ${foundTools.length > 0 ? foundTools.join(', ') : 'none'}. ` +
-    `Output preview: ${result ? result.substring(0, 200) + '...' : 'no output'}`
-  );
-}
-
-// Helper to print debug information when tests fail
-export function printDebugInfo(
-  rig: TestRig,
-  result: string,
-  context: Record<string, unknown> = {},
-) {
-  console.error('Test failed - Debug info:');
-  console.error('Result length:', result.length);
-  console.error('Result (first 500 chars):', result.substring(0, 500));
-  console.error(
-    'Result (last 500 chars):',
-    result.substring(result.length - 500),
-  );
-
-  // Print any additional context provided
-  Object.entries(context).forEach(([key, value]) => {
-    console.error(`${key}:`, value);
-  });
-
-  // Check what tools were actually called
-  const allTools = rig.readToolLogs();
-  console.error(
-    'All tool calls found:',
-    allTools.map((t) => t.toolRequest.name),
-  );
-
-  return allTools;
-}
-
-// Helper to validate model output and warn about unexpected content
-export function validateModelOutput(
-  result: string,
-  expectedContent: string | (string | RegExp)[] | null = null,
-  testName = '',
-) {
-  // First, check if there's any output at all (this should fail the test if missing)
-  if (!result || result.trim().length === 0) {
-    throw new Error('Expected LLM to return some output');
-  }
-
-  // If expectedContent is provided, check for it and warn if missing
-  if (expectedContent) {
-    const contents = Array.isArray(expectedContent)
-      ? expectedContent
-      : [expectedContent];
-    const missingContent = contents.filter((content) => {
-      if (typeof content === 'string') {
-        return !result.toLowerCase().includes(content.toLowerCase());
-      } else if (content instanceof RegExp) {
-        return !content.test(result);
-      }
-      return false;
-    });
-
-    if (missingContent.length > 0) {
-      console.warn(
-        `Warning: LLM did not include expected content in response: ${missingContent.join(
-          ', ',
-        )}.`,
-        'This is not ideal but not a test failure.',
-      );
-      console.warn(
-        'The tool was called successfully, which is the main requirement.',
-      );
-      console.warn('Expected content:', expectedContent);
-      console.warn('Actual output:', result);
-      return false;
-    } else if (env['VERBOSE'] === 'true') {
-      console.log(`${testName}: Model output validated successfully.`);
-    }
-    return true;
-  }
-
-  return true;
-}
-
-interface ParsedLog {
-  attributes?: {
-    'event.name'?: string;
-    function_name?: string;
-    function_args?: string;
-    success?: boolean;
-    duration_ms?: number;
-    request_text?: string;
-    hook_event_name?: string;
-    hook_name?: string;
-    hook_input?: Record<string, unknown>;
-    hook_output?: Record<string, unknown>;
-    exit_code?: number;
-    stdout?: string;
-    stderr?: string;
-    error?: string;
-  };
-  scopeMetrics?: {
-    metrics: {
-      descriptor: {
-        name: string;
-      };
-    }[];
-  }[];
-}
-
-export class InteractiveRun {
-  ptyProcess: pty.IPty;
-  public output = '';
-
-  constructor(ptyProcess: pty.IPty) {
-    this.ptyProcess = ptyProcess;
-    ptyProcess.onData((data) => {
-      this.output += data;
-      if (env['KEEP_OUTPUT'] === 'true' || env['VERBOSE'] === 'true') {
-        process.stdout.write(data);
-      }
-    });
-  }
-
-  async expectText(text: string, timeout?: number) {
-    if (!timeout) {
-      timeout = getDefaultTimeout();
-    }
-    await poll(
-      () => stripAnsi(this.output).toLowerCase().includes(text.toLowerCase()),
-      timeout,
-      200,
-    );
-    expect(stripAnsi(this.output).toLowerCase()).toContain(text.toLowerCase());
-  }
-
-  // This types slowly to make sure command is correct, but only work for short
-  // commands that are not multi-line, use sendKeys to type long prompts
-  async type(text: string) {
-    let typedSoFar = '';
-    for (const char of text) {
-      if (char === '\r') {
-        // wait >30ms before `enter` to avoid fast return conversion
-        // from bufferFastReturn() in KeypressContent.tsx
-        await sleep(50);
-      }
-
-      this.ptyProcess.write(char);
-      typedSoFar += char;
-
-      // Wait for the typed sequence so far to be echoed back.
-      const found = await poll(
-        () => stripAnsi(this.output).includes(typedSoFar),
-        5000, // 5s timeout per character (generous for CI)
-        10, // check frequently
-      );
-
-      if (!found) {
-        throw new Error(
-          `Timed out waiting for typed text to appear in output: "${typedSoFar}".\nStripped output:\n${stripAnsi(
-            this.output,
-          )}`,
-        );
-      }
-    }
-  }
-
-  // Types an entire string at once, necessary for some things like commands
-  // but may run into paste detection issues for larger strings.
-  async sendText(text: string) {
-    this.ptyProcess.write(text);
-    await sleep(5);
-  }
-
-  // Simulates typing a string one character at a time to avoid paste detection.
-  async sendKeys(text: string) {
-    const delay = 5;
-    for (const char of text) {
-      this.ptyProcess.write(char);
-      await sleep(delay);
-    }
-  }
-
-  async kill() {
-    this.ptyProcess.kill();
-  }
-
-  expectExit(): Promise<number> {
-    return new Promise((resolve, reject) => {
-      const timer = setTimeout(
-        () =>
-          reject(
-            new Error(`Test timed out: process did not exit within a minute.`),
-          ),
-        60000,
-      );
-      this.ptyProcess.onExit(({ exitCode }) => {
-        clearTimeout(timer);
-        resolve(exitCode);
-      });
-    });
-  }
-}
-
-export class TestRig {
-  testDir: string | null = null;
-  homeDir: string | null = null;
-  testName?: string;
-  _lastRunStdout?: string;
-  // Path to the copied fake responses file for this test.
-  fakeResponsesPath?: string;
-  // Original fake responses file path for rewriting goldens in record mode.
-  originalFakeResponsesPath?: string;
-  private _interactiveRuns: InteractiveRun[] = [];
-  private _spawnedProcesses: ChildProcess[] = [];
-
-  setup(
-    testName: string,
-    options: {
-      settings?: Record<string, unknown>;
-      fakeResponsesPath?: string;
-    } = {},
-  ) {
-    this.testName = testName;
-    const sanitizedName = sanitizeTestName(testName);
-    const testFileDir =
-      env['INTEGRATION_TEST_FILE_DIR'] || join(os.tmpdir(), 'gemini-cli-tests');
-    this.testDir = join(testFileDir, sanitizedName);
-    this.homeDir = join(testFileDir, sanitizedName + '-home');
-    mkdirSync(this.testDir, { recursive: true });
-    mkdirSync(this.homeDir, { recursive: true });
-    if (options.fakeResponsesPath) {
-      this.fakeResponsesPath = join(this.testDir, 'fake-responses.json');
-      this.originalFakeResponsesPath = options.fakeResponsesPath;
-      if (process.env['REGENERATE_MODEL_GOLDENS'] !== 'true') {
-        fs.copyFileSync(options.fakeResponsesPath, this.fakeResponsesPath);
-      }
-    }
-
-    // Create a settings file to point the CLI to the local collector
-    this._createSettingsFile(options.settings);
-  }
-
-  private _createSettingsFile(overrideSettings?: Record<string, unknown>) {
-    const projectGeminiDir = join(this.testDir!, GEMINI_DIR);
-    mkdirSync(projectGeminiDir, { recursive: true });
-
-    // In sandbox mode, use an absolute path for telemetry inside the container
-    // The container mounts the test directory at the same path as the host
-    const telemetryPath = join(this.homeDir!, 'telemetry.log'); // Always use home directory for telemetry
-
-    const settings = {
-      general: {
-        // Nightly releases sometimes becomes out of sync with local code and
-        // triggers auto-update, which causes tests to fail.
-        disableAutoUpdate: true,
-        previewFeatures: false,
-      },
-      telemetry: {
-        enabled: true,
-        target: 'local',
-        otlpEndpoint: '',
-        outfile: telemetryPath,
-      },
-      security: {
-        auth: {
-          selectedType: 'gemini-api-key',
-        },
-      },
-      ui: {
-        useAlternateBuffer: true,
-      },
-      model: {
-        name: DEFAULT_GEMINI_MODEL,
-      },
-      sandbox:
-        env['GEMINI_SANDBOX'] !== 'false' ? env['GEMINI_SANDBOX'] : false,
-      // Don't show the IDE connection dialog when running from VsCode
-      ide: { enabled: false, hasSeenNudge: true },
-      ...overrideSettings, // Allow tests to override/add settings
-    };
-    writeFileSync(
-      join(projectGeminiDir, 'settings.json'),
-      JSON.stringify(settings, null, 2),
-    );
-  }
-
-  createFile(fileName: string, content: string) {
-    const filePath = join(this.testDir!, fileName);
-    writeFileSync(filePath, content);
-    return filePath;
-  }
-
-  mkdir(dir: string) {
-    mkdirSync(join(this.testDir!, dir), { recursive: true });
-  }
-
-  sync() {
-    if (os.platform() === 'win32') return;
-    // ensure file system is done before spawning
-    execSync('sync', { cwd: this.testDir! });
-  }
-
-  /**
-   * The command and args to use to invoke Gemini CLI. Allows us to switch
-   * between using the bundled gemini.js (the default) and using the installed
-   * 'gemini' (used to verify npm bundles).
-   */
-  private _getCommandAndArgs(extraInitialArgs: string[] = []): {
-    command: string;
-    initialArgs: string[];
-  } {
-    const isNpmReleaseTest =
-      env['INTEGRATION_TEST_USE_INSTALLED_GEMINI'] === 'true';
-    const command = isNpmReleaseTest ? 'gemini' : 'node';
-    const initialArgs = isNpmReleaseTest
-      ? extraInitialArgs
-      : [BUNDLE_PATH, ...extraInitialArgs];
-    if (this.fakeResponsesPath) {
-      if (process.env['REGENERATE_MODEL_GOLDENS'] === 'true') {
-        initialArgs.push('--record-responses', this.fakeResponsesPath);
-      } else {
-        initialArgs.push('--fake-responses', this.fakeResponsesPath);
-      }
-    }
-    return { command, initialArgs };
-  }
-
-  run(options: {
-    args?: string | string[];
-    stdin?: string;
-    stdinDoesNotEnd?: boolean;
-    yolo?: boolean;
-    timeout?: number;
-    env?: Record<string, string | undefined>;
-  }): Promise<string> {
-    const yolo = options.yolo !== false;
-    const { command, initialArgs } = this._getCommandAndArgs(
-      yolo ? ['--yolo'] : [],
-    );
-    const commandArgs = [...initialArgs];
-    const execOptions: {
-      cwd: string;
-      encoding: 'utf-8';
-      input?: string;
-    } = {
-      cwd: this.testDir!,
-      encoding: 'utf-8',
-    };
-
-    if (options.args) {
-      if (Array.isArray(options.args)) {
-        commandArgs.push(...options.args);
-      } else {
-        commandArgs.push(options.args);
-      }
-    }
-
-    if (options.stdin) {
-      execOptions.input = options.stdin;
-    }
-
-    const child = spawn(command, commandArgs, {
-      cwd: this.testDir!,
-      stdio: 'pipe',
-      env: {
-        ...process.env,
-        GEMINI_CLI_HOME: this.homeDir!,
-        ...options.env,
-      },
-    });
-    this._spawnedProcesses.push(child);
-
-    let stdout = '';
-    let stderr = '';
-
-    // Handle stdin if provided
-    if (execOptions.input) {
-      child.stdin!.write(execOptions.input);
-    }
-
-    if (!options.stdinDoesNotEnd) {
-      child.stdin!.end();
-    }
-
-    child.stdout!.setEncoding('utf8');
-    child.stdout!.on('data', (data: string) => {
-      stdout += data;
-      if (env['KEEP_OUTPUT'] === 'true' || env['VERBOSE'] === 'true') {
-        process.stdout.write(data);
-      }
-    });
-
-    child.stderr!.setEncoding('utf8');
-    child.stderr!.on('data', (data: string) => {
-      stderr += data;
-      if (env['KEEP_OUTPUT'] === 'true' || env['VERBOSE'] === 'true') {
-        process.stderr.write(data);
-      }
-    });
-
-    const timeout = options.timeout ?? 120000;
-    const promise = new Promise<string>((resolve, reject) => {
-      const timer = setTimeout(() => {
-        child.kill('SIGKILL');
-        reject(
-          new Error(
-            `Process timed out after ${timeout}ms.\nStdout:\n${stdout}\nStderr:\n${stderr}`,
-          ),
-        );
-      }, timeout);
-
-      child.on('error', (err) => {
-        clearTimeout(timer);
-        reject(err);
-      });
-
-      child.on('close', (code: number) => {
-        clearTimeout(timer);
-        if (code === 0) {
-          // Store the raw stdout for Podman telemetry parsing
-          this._lastRunStdout = stdout;
-
-          // Filter out telemetry output when running with Podman
-          const result = this._filterPodmanTelemetry(stdout);
-
-          // Check if this is a JSON output test - if so, don't include stderr
-          // as it would corrupt the JSON
-          const isJsonOutput =
-            commandArgs.includes('--output-format') &&
-            commandArgs.includes('json');
-
-          // If we have stderr output and it's not a JSON test, include that also
-          const finalResult =
-            stderr && !isJsonOutput
-              ? `${result}\n\nStdErr:\n${stderr}`
-              : result;
-
-          resolve(finalResult);
-        } else {
-          reject(new Error(`Process exited with code ${code}:\n${stderr}`));
-        }
-      });
-    });
-
-    return promise;
-  }
-
-  private _filterPodmanTelemetry(stdout: string): string {
-    if (env['GEMINI_SANDBOX'] !== 'podman') {
-      return stdout;
-    }
-
-    // Remove telemetry JSON objects from output
-    // They are multi-line JSON objects that start with { and contain telemetry fields
-    const lines = stdout.split(os.EOL);
-    const filteredLines = [];
-    let inTelemetryObject = false;
-    let braceDepth = 0;
-
-    for (const line of lines) {
-      if (!inTelemetryObject && line.trim() === '{') {
-        // Check if this might be start of telemetry object
-        inTelemetryObject = true;
-        braceDepth = 1;
-      } else if (inTelemetryObject) {
-        // Count braces to track nesting
-        for (const char of line) {
-          if (char === '{') braceDepth++;
-          else if (char === '}') braceDepth--;
-        }
-
-        // Check if we've closed all braces
-        if (braceDepth === 0) {
-          inTelemetryObject = false;
-          // Skip this line (the closing brace)
-          continue;
-        }
-      } else {
-        // Not in telemetry object, keep the line
-        filteredLines.push(line);
-      }
-    }
-
-    return filteredLines.join('\n');
-  }
-
-  runCommand(
-    args: string[],
-    options: {
-      stdin?: string;
-      timeout?: number;
-      env?: Record<string, string | undefined>;
-    } = {},
-  ): Promise<string> {
-    const { command, initialArgs } = this._getCommandAndArgs();
-    const commandArgs = [...initialArgs, ...args];
-
-    const child = spawn(command, commandArgs, {
-      cwd: this.testDir!,
-      stdio: 'pipe',
-      env: {
-        ...process.env,
-        GEMINI_CLI_HOME: this.homeDir!,
-        ...options.env,
-      },
-    });
-    this._spawnedProcesses.push(child);
-
-    let stdout = '';
-    let stderr = '';
-
-    if (options.stdin) {
-      child.stdin!.write(options.stdin);
-      child.stdin!.end();
-    }
-
-    child.stdout!.setEncoding('utf8');
-    child.stdout!.on('data', (data: string) => {
-      stdout += data;
-      if (env['KEEP_OUTPUT'] === 'true' || env['VERBOSE'] === 'true') {
-        process.stdout.write(data);
-      }
-    });
-
-    child.stderr!.setEncoding('utf8');
-    child.stderr!.on('data', (data: string) => {
-      stderr += data;
-      if (env['KEEP_OUTPUT'] === 'true' || env['VERBOSE'] === 'true') {
-        process.stderr.write(data);
-      }
-    });
-
-    const timeout = options.timeout ?? 120000;
-    const promise = new Promise<string>((resolve, reject) => {
-      const timer = setTimeout(() => {
-        child.kill('SIGKILL');
-        reject(
-          new Error(
-            `Process timed out after ${timeout}ms.\nStdout:\n${stdout}\nStderr:\n${stderr}`,
-          ),
-        );
-      }, timeout);
-
-      child.on('error', (err) => {
-        clearTimeout(timer);
-        reject(err);
-      });
-
-      child.on('close', (code: number) => {
-        clearTimeout(timer);
-        if (code === 0) {
-          this._lastRunStdout = stdout;
-          const result = this._filterPodmanTelemetry(stdout);
-
-          // Check if this is a JSON output test - if so, don't include stderr
-          // as it would corrupt the JSON
-          const isJsonOutput =
-            commandArgs.includes('--output-format') &&
-            commandArgs.includes('json');
-
-          const finalResult =
-            stderr && !isJsonOutput
-              ? `${result}\n\nStdErr:\n${stderr}`
-              : result;
-          resolve(finalResult);
-        } else {
-          reject(new Error(`Process exited with code ${code}:\n${stderr}`));
-        }
-      });
-    });
-
-    return promise;
-  }
-
-  readFile(fileName: string) {
-    const filePath = join(this.testDir!, fileName);
-    const content = readFileSync(filePath, 'utf-8');
-    if (env['KEEP_OUTPUT'] === 'true' || env['VERBOSE'] === 'true') {
-      console.log(`--- FILE: ${filePath} ---`);
-      console.log(content);
-      console.log(`--- END FILE: ${filePath} ---`);
-    }
-    return content;
-  }
-
-  async cleanup() {
-    // Kill any interactive runs that are still active
-    for (const run of this._interactiveRuns) {
-      try {
-        await run.kill();
-      } catch (error) {
-        if (env['VERBOSE'] === 'true') {
-          console.warn('Failed to kill interactive run during cleanup:', error);
-        }
-      }
-    }
-    this._interactiveRuns = [];
-
-    // Kill any other spawned processes that are still running
-    for (const child of this._spawnedProcesses) {
-      if (child.exitCode === null && child.signalCode === null) {
-        try {
-          child.kill('SIGKILL');
-        } catch (error) {
-          if (env['VERBOSE'] === 'true') {
-            console.warn(
-              'Failed to kill spawned process during cleanup:',
-              error,
-            );
-          }
-        }
-      }
-    }
-    this._spawnedProcesses = [];
-
-    if (
-      process.env['REGENERATE_MODEL_GOLDENS'] === 'true' &&
-      this.fakeResponsesPath
-    ) {
-      fs.copyFileSync(this.fakeResponsesPath, this.originalFakeResponsesPath!);
-    }
-    // Clean up test directory and home directory
-    if (this.testDir && !env['KEEP_OUTPUT']) {
-      try {
-        fs.rmSync(this.testDir, { recursive: true, force: true });
-      } catch (error) {
-        // Ignore cleanup errors
-        if (env['VERBOSE'] === 'true') {
-          console.warn('Cleanup warning:', (error as Error).message);
-        }
-      }
-    }
-    if (this.homeDir && !env['KEEP_OUTPUT']) {
-      try {
-        fs.rmSync(this.homeDir, { recursive: true, force: true });
-      } catch (error) {
-        // Ignore cleanup errors
-        if (env['VERBOSE'] === 'true') {
-          console.warn('Cleanup warning:', (error as Error).message);
-        }
-      }
-    }
-  }
-
-  async waitForTelemetryReady() {
-    // Telemetry is always written to the test directory
-    const logFilePath = join(this.homeDir!, 'telemetry.log');
-
-    if (!logFilePath) return;
-
-    // Wait for telemetry file to exist and have content
-    await poll(
-      () => {
-        if (!fs.existsSync(logFilePath)) return false;
-        try {
-          const content = readFileSync(logFilePath, 'utf-8');
-          // Check if file has meaningful content (at least one complete JSON object)
-          return content.includes('"scopeMetrics"');
-        } catch {
-          return false;
-        }
-      },
-      2000, // 2 seconds max - reduced since telemetry should flush on exit now
-      100, // check every 100ms
-    );
-  }
-
-  async waitForTelemetryEvent(eventName: string, timeout?: number) {
-    if (!timeout) {
-      timeout = getDefaultTimeout();
-    }
-
-    await this.waitForTelemetryReady();
-
-    return poll(
-      () => {
-        const logs = this._readAndParseTelemetryLog();
-        return logs.some(
-          (logData) =>
-            logData.attributes &&
-            logData.attributes['event.name'] === `gemini_cli.${eventName}`,
-        );
-      },
-      timeout,
-      100,
-    );
-  }
-
-  async waitForToolCall(
-    toolName: string,
-    timeout?: number,
-    matchArgs?: (args: string) => boolean,
-  ) {
-    // Use environment-specific timeout
-    if (!timeout) {
-      timeout = getDefaultTimeout();
-    }
-
-    // Wait for telemetry to be ready before polling for tool calls
-    await this.waitForTelemetryReady();
-
-    return poll(
-      () => {
-        const toolLogs = this.readToolLogs();
-        return toolLogs.some(
-          (log) =>
-            log.toolRequest.name === toolName &&
-            (matchArgs?.call(this, log.toolRequest.args) ?? true),
-        );
-      },
-      timeout,
-      100,
-    );
-  }
-
-  async expectToolCallSuccess(
-    toolNames: string[],
-    timeout?: number,
-    matchArgs?: (args: string) => boolean,
-  ) {
-    // Use environment-specific timeout
-    if (!timeout) {
-      timeout = getDefaultTimeout();
-    }
-
-    // Wait for telemetry to be ready before polling for tool calls
-    await this.waitForTelemetryReady();
-
-    const success = await poll(
-      () => {
-        const toolLogs = this.readToolLogs();
-        return toolNames.some((name) =>
-          toolLogs.some(
-            (log) =>
-              log.toolRequest.name === name &&
-              log.toolRequest.success &&
-              (matchArgs?.call(this, log.toolRequest.args) ?? true),
-          ),
-        );
-      },
-      timeout,
-      100,
-    );
-
-    expect(
-      success,
-      `Expected to find successful toolCalls for ${JSON.stringify(toolNames)}`,
-    ).toBe(true);
-  }
-
-  async waitForAnyToolCall(toolNames: string[], timeout?: number) {
-    if (!timeout) {
-      timeout = getDefaultTimeout();
-    }
-
-    // Wait for telemetry to be ready before polling for tool calls
-    await this.waitForTelemetryReady();
-
-    return poll(
-      () => {
-        const toolLogs = this.readToolLogs();
-        return toolNames.some((name) =>
-          toolLogs.some((log) => log.toolRequest.name === name),
-        );
-      },
-      timeout,
-      100,
-    );
-  }
-
-  _parseToolLogsFromStdout(stdout: string) {
-    const logs: {
-      timestamp: number;
-      toolRequest: {
-        name: string;
-        args: string;
-        success: boolean;
-        duration_ms: number;
-      };
-    }[] = [];
-
-    // The console output from Podman is JavaScript object notation, not JSON
-    // Look for tool call events in the output
-    // Updated regex to handle tool names with hyphens and underscores
-    const toolCallPattern =
-      /body:\s*'Tool call:\s*([\w-]+)\..*?Success:\s*(\w+)\..*?Duration:\s*(\d+)ms\.'/g;
-    const matches = [...stdout.matchAll(toolCallPattern)];
-
-    for (const match of matches) {
-      const toolName = match[1];
-      const success = match[2] === 'true';
-      const duration = parseInt(match[3], 10);
-
-      // Try to find function_args nearby
-      const matchIndex = match.index || 0;
-      const contextStart = Math.max(0, matchIndex - 500);
-      const contextEnd = Math.min(stdout.length, matchIndex + 500);
-      const context = stdout.substring(contextStart, contextEnd);
-
-      // Look for function_args in the context
-      let args = '{}';
-      const argsMatch = context.match(/function_args:\s*'([^']+)'/);
-      if (argsMatch) {
-        args = argsMatch[1];
-      }
-
-      // Also try to find function_name to double-check
-      // Updated regex to handle tool names with hyphens and underscores
-      const nameMatch = context.match(/function_name:\s*'([\w-]+)'/);
-      const actualToolName = nameMatch ? nameMatch[1] : toolName;
-
-      logs.push({
-        timestamp: Date.now(),
-        toolRequest: {
-          name: actualToolName,
-          args: args,
-          success: success,
-          duration_ms: duration,
-        },
-      });
-    }
-
-    // If no matches found with the simple pattern, try the JSON parsing approach
-    // in case the format changes
-    if (logs.length === 0) {
-      const lines = stdout.split(os.EOL);
-      let currentObject = '';
-      let inObject = false;
-      let braceDepth = 0;
-
-      for (const line of lines) {
-        if (!inObject && line.trim() === '{') {
-          inObject = true;
-          braceDepth = 1;
-          currentObject = line + '\n';
-        } else if (inObject) {
-          currentObject += line + '\n';
-
-          // Count braces
-          for (const char of line) {
-            if (char === '{') braceDepth++;
-            else if (char === '}') braceDepth--;
-          }
-
-          // If we've closed all braces, try to parse the object
-          if (braceDepth === 0) {
-            inObject = false;
-            try {
-              const obj = JSON.parse(currentObject);
-
-              // Check for tool call in different formats
-              if (
-                obj.body &&
-                obj.body.includes('Tool call:') &&
-                obj.attributes
-              ) {
-                const bodyMatch = obj.body.match(/Tool call: (\w+)\./);
-                if (bodyMatch) {
-                  logs.push({
-                    timestamp: obj.timestamp || Date.now(),
-                    toolRequest: {
-                      name: bodyMatch[1],
-                      args: obj.attributes.function_args || '{}',
-                      success: obj.attributes.success !== false,
-                      duration_ms: obj.attributes.duration_ms || 0,
-                    },
-                  });
-                }
-              } else if (
-                obj.attributes &&
-                obj.attributes['event.name'] === 'gemini_cli.tool_call'
-              ) {
-                logs.push({
-                  timestamp: obj.attributes['event.timestamp'],
-                  toolRequest: {
-                    name: obj.attributes.function_name,
-                    args: obj.attributes.function_args,
-                    success: obj.attributes.success,
-                    duration_ms: obj.attributes.duration_ms,
-                  },
-                });
-              }
-            } catch {
-              // Not valid JSON
-            }
-            currentObject = '';
-          }
-        }
-      }
-    }
-
-    return logs;
-  }
-
-  private _readAndParseTelemetryLog(): ParsedLog[] {
-    // Telemetry is always written to the test directory
-    const logFilePath = join(this.homeDir!, 'telemetry.log');
-
-    if (!logFilePath || !fs.existsSync(logFilePath)) {
-      return [];
-    }
-
-    const content = readFileSync(logFilePath, 'utf-8');
-
-    // Split the content into individual JSON objects
-    // They are separated by "}\n{"
-    const jsonObjects = content
-      .split(/}\n{/)
-      .map((obj, index, array) => {
-        // Add back the braces we removed during split
-        if (index > 0) obj = '{' + obj;
-        if (index < array.length - 1) obj = obj + '}';
-        return obj.trim();
-      })
-      .filter((obj) => obj);
-
-    const logs: ParsedLog[] = [];
-
-    for (const jsonStr of jsonObjects) {
-      try {
-        const logData = JSON.parse(jsonStr);
-        logs.push(logData);
-      } catch (e) {
-        // Skip objects that aren't valid JSON
-        if (env['VERBOSE'] === 'true') {
-          console.error('Failed to parse telemetry object:', e);
-        }
-      }
-    }
-
-    return logs;
-  }
-
-  readToolLogs() {
-    // For Podman, first check if telemetry file exists and has content
-    // If not, fall back to parsing from stdout
-    if (env['GEMINI_SANDBOX'] === 'podman') {
-      // Try reading from file first
-      const logFilePath = join(this.homeDir!, 'telemetry.log');
-
-      if (fs.existsSync(logFilePath)) {
-        try {
-          const content = readFileSync(logFilePath, 'utf-8');
-          if (content && content.includes('"event.name"')) {
-            // File has content, use normal file parsing
-            // Continue to the normal file parsing logic below
-          } else if (this._lastRunStdout) {
-            // File exists but is empty or doesn't have events, parse from stdout
-            return this._parseToolLogsFromStdout(this._lastRunStdout);
-          }
-        } catch {
-          // Error reading file, fall back to stdout
-          if (this._lastRunStdout) {
-            return this._parseToolLogsFromStdout(this._lastRunStdout);
-          }
-        }
-      } else if (this._lastRunStdout) {
-        // No file exists, parse from stdout
-        return this._parseToolLogsFromStdout(this._lastRunStdout);
-      }
-    }
-
-    const parsedLogs = this._readAndParseTelemetryLog();
-    const logs: {
-      toolRequest: {
-        name: string;
-        args: string;
-        success: boolean;
-        duration_ms: number;
-      };
-    }[] = [];
-
-    for (const logData of parsedLogs) {
-      // Look for tool call logs
-      if (
-        logData.attributes &&
-        logData.attributes['event.name'] === 'gemini_cli.tool_call'
-      ) {
-        const toolName = logData.attributes.function_name!;
-        logs.push({
-          toolRequest: {
-            name: toolName,
-            args: logData.attributes.function_args ?? '{}',
-            success: logData.attributes.success ?? false,
-            duration_ms: logData.attributes.duration_ms ?? 0,
-          },
-        });
-      }
-    }
-
-    return logs;
-  }
-
-  readAllApiRequest(): ParsedLog[] {
-    const logs = this._readAndParseTelemetryLog();
-    const apiRequests = logs.filter(
-      (logData) =>
-        logData.attributes &&
-        logData.attributes['event.name'] === `gemini_cli.api_request`,
-    );
-    return apiRequests;
-  }
-
-  readLastApiRequest(): ParsedLog | null {
-    const logs = this._readAndParseTelemetryLog();
-    const apiRequests = logs.filter(
-      (logData) =>
-        logData.attributes &&
-        logData.attributes['event.name'] === `gemini_cli.api_request`,
-    );
-    return apiRequests.pop() || null;
-  }
-
-  async waitForMetric(metricName: string, timeout?: number) {
-    await this.waitForTelemetryReady();
-
-    const fullName = metricName.startsWith('gemini_cli.')
-      ? metricName
-      : `gemini_cli.${metricName}`;
-
-    return poll(
-      () => {
-        const logs = this._readAndParseTelemetryLog();
-        for (const logData of logs) {
-          if (logData.scopeMetrics) {
-            for (const scopeMetric of logData.scopeMetrics) {
-              for (const metric of scopeMetric.metrics) {
-                if (metric.descriptor.name === fullName) {
-                  return true;
-                }
-              }
-            }
-          }
-        }
-        return false;
-      },
-      timeout ?? getDefaultTimeout(),
-      100,
-    );
-  }
-
-  readMetric(metricName: string): Record<string, unknown> | null {
-    const logs = this._readAndParseTelemetryLog();
-    for (const logData of logs) {
-      if (logData.scopeMetrics) {
-        for (const scopeMetric of logData.scopeMetrics) {
-          for (const metric of scopeMetric.metrics) {
-            if (metric.descriptor.name === `gemini_cli.${metricName}`) {
-              return metric;
-            }
-          }
-        }
-      }
-    }
-    return null;
-  }
-
-  async runInteractive(options?: {
-    args?: string | string[];
-    yolo?: boolean;
-    env?: Record<string, string | undefined>;
-  }): Promise<InteractiveRun> {
-    const yolo = options?.yolo !== false;
-    const { command, initialArgs } = this._getCommandAndArgs(
-      yolo ? ['--yolo'] : [],
-    );
-    const commandArgs = [...initialArgs];
-
-    const envVars = {
-      ...process.env,
-      GEMINI_CLI_HOME: this.homeDir!,
-      ...options?.env,
-    };
-
-    const ptyOptions: pty.IPtyForkOptions = {
-      name: 'xterm-color',
-      cols: 80,
-      rows: 80,
-      cwd: this.testDir!,
-      env: Object.fromEntries(
-        Object.entries(envVars).filter(([, v]) => v !== undefined),
-      ) as { [key: string]: string },
-    };
-
-    const executable = command === 'node' ? process.execPath : command;
-    const ptyProcess = pty.spawn(executable, commandArgs, ptyOptions);
-
-    const run = new InteractiveRun(ptyProcess);
-    this._interactiveRuns.push(run);
-    // Wait for the app to be ready
-    await run.expectText('  Type your message or @path/to/file', 30000);
-    return run;
-  }
-
-  readHookLogs() {
-    const parsedLogs = this._readAndParseTelemetryLog();
-    const logs: {
-      hookCall: {
-        hook_event_name: string;
-        hook_name: string;
-        hook_input: Record<string, unknown>;
-        hook_output: Record<string, unknown>;
-        exit_code: number;
-        stdout: string;
-        stderr: string;
-        duration_ms: number;
-        success: boolean;
-        error: string;
-      };
-    }[] = [];
-
-    for (const logData of parsedLogs) {
-      // Look for tool call logs
-      if (
-        logData.attributes &&
-        logData.attributes['event.name'] === 'gemini_cli.hook_call'
-      ) {
-        logs.push({
-          hookCall: {
-            hook_event_name: logData.attributes.hook_event_name ?? '',
-            hook_name: logData.attributes.hook_name ?? '',
-            hook_input: logData.attributes.hook_input ?? {},
-            hook_output: logData.attributes.hook_output ?? {},
-            exit_code: logData.attributes.exit_code ?? 0,
-            stdout: logData.attributes.stdout ?? '',
-            stderr: logData.attributes.stderr ?? '',
-            duration_ms: logData.attributes.duration_ms ?? 0,
-            success: logData.attributes.success ?? false,
-            error: logData.attributes.error ?? '',
-          },
-        });
-      }
-    }
-
-    return logs;
-  }
-
-  async pollCommand(
-    commandFn: () => Promise<void>,
-    predicateFn: () => boolean,
-    timeout: number = 30000,
-    interval: number = 1000,
-  ) {
-    const startTime = Date.now();
-    while (Date.now() - startTime < timeout) {
-      await commandFn();
-      // Give it a moment to process
-      await sleep(500);
-      if (predicateFn()) {
-        return;
-      }
-      await sleep(interval);
-    }
-    throw new Error(`pollCommand timed out after ${timeout}ms`);
-  }
-}
+export * from '@google/gemini-cli-test-utils';
diff --git a/package-lock.json b/package-lock.json
index b9f71c339f..7d036d9b96 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -522,7 +522,7 @@
       "version": "2.0.1",
       "resolved": "https://registry.npmjs.org/@bundled-es-modules/cookie/-/cookie-2.0.1.tgz",
       "integrity": "sha512-8o+5fRPLNbjbdGRRmJj3h6Hh1AQJf2dk3qQ/5ZFb+PXkRNiSoMGGUKlsgLfrxneb72axVJyIYji64E2+nNfYyw==",
-      "dev": true,
+      "devOptional": true,
       "license": "ISC",
       "dependencies": {
         "cookie": "^0.7.2"
@@ -532,7 +532,7 @@
       "version": "1.0.1",
       "resolved": "https://registry.npmjs.org/@bundled-es-modules/statuses/-/statuses-1.0.1.tgz",
       "integrity": "sha512-yn7BklA5acgcBr+7w064fGV+SGIFySjCKpqjcWgBAIfrAkY+4GQTJJHQMeT3V/sgz23VTEVV8TtOmkvJAhFVfg==",
-      "dev": true,
+      "devOptional": true,
       "license": "ISC",
       "dependencies": {
         "statuses": "^2.0.1"
@@ -542,7 +542,7 @@
       "version": "0.1.6",
       "resolved": "https://registry.npmjs.org/@bundled-es-modules/tough-cookie/-/tough-cookie-0.1.6.tgz",
       "integrity": "sha512-dvMHbL464C0zI+Yqxbz6kZ5TOEp7GLW+pry/RWndAR8MJQAXZ2rPmIs8tziTZjeIyhSNZgZbCePtfSbdWqStJw==",
-      "dev": true,
+      "devOptional": true,
       "license": "ISC",
       "dependencies": {
         "@types/tough-cookie": "^4.0.5",
@@ -553,7 +553,7 @@
       "version": "4.1.4",
       "resolved": "https://registry.npmjs.org/tough-cookie/-/tough-cookie-4.1.4.tgz",
       "integrity": "sha512-Loo5UUvLD9ScZ6jh8beX1T6sO1w2/MpCRpEP7V280GKMVUQ0Jzar2U3UJPsrdbziLEMMhu3Ujnq//rhiFuIeag==",
-      "dev": true,
+      "devOptional": true,
       "license": "BSD-3-Clause",
       "dependencies": {
         "psl": "^1.1.33",
@@ -592,7 +592,6 @@
       "cpu": [
         "ppc64"
       ],
-      "dev": true,
       "license": "MIT",
       "optional": true,
       "os": [
@@ -609,7 +608,6 @@
       "cpu": [
         "arm"
       ],
-      "dev": true,
       "license": "MIT",
       "optional": true,
       "os": [
@@ -626,7 +624,6 @@
       "cpu": [
         "arm64"
       ],
-      "dev": true,
       "license": "MIT",
       "optional": true,
       "os": [
@@ -643,7 +640,6 @@
       "cpu": [
         "x64"
       ],
-      "dev": true,
       "license": "MIT",
       "optional": true,
       "os": [
@@ -660,7 +656,6 @@
       "cpu": [
         "arm64"
       ],
-      "dev": true,
       "license": "MIT",
       "optional": true,
       "os": [
@@ -677,7 +672,6 @@
       "cpu": [
         "x64"
       ],
-      "dev": true,
       "license": "MIT",
       "optional": true,
       "os": [
@@ -694,7 +688,6 @@
       "cpu": [
         "arm64"
       ],
-      "dev": true,
       "license": "MIT",
       "optional": true,
       "os": [
@@ -711,7 +704,6 @@
       "cpu": [
         "x64"
       ],
-      "dev": true,
       "license": "MIT",
       "optional": true,
       "os": [
@@ -728,7 +720,6 @@
       "cpu": [
         "arm"
       ],
-      "dev": true,
       "license": "MIT",
       "optional": true,
       "os": [
@@ -745,7 +736,6 @@
       "cpu": [
         "arm64"
       ],
-      "dev": true,
       "license": "MIT",
       "optional": true,
       "os": [
@@ -762,7 +752,6 @@
       "cpu": [
         "ia32"
       ],
-      "dev": true,
       "license": "MIT",
       "optional": true,
       "os": [
@@ -779,7 +768,6 @@
       "cpu": [
         "loong64"
       ],
-      "dev": true,
       "license": "MIT",
       "optional": true,
       "os": [
@@ -796,7 +784,6 @@
       "cpu": [
         "mips64el"
       ],
-      "dev": true,
       "license": "MIT",
       "optional": true,
       "os": [
@@ -813,7 +800,6 @@
       "cpu": [
         "ppc64"
       ],
-      "dev": true,
       "license": "MIT",
       "optional": true,
       "os": [
@@ -830,7 +816,6 @@
       "cpu": [
         "riscv64"
       ],
-      "dev": true,
       "license": "MIT",
       "optional": true,
       "os": [
@@ -847,7 +832,6 @@
       "cpu": [
         "s390x"
       ],
-      "dev": true,
       "license": "MIT",
       "optional": true,
       "os": [
@@ -864,7 +848,6 @@
       "cpu": [
         "x64"
       ],
-      "dev": true,
       "license": "MIT",
       "optional": true,
       "os": [
@@ -881,7 +864,6 @@
       "cpu": [
         "arm64"
       ],
-      "dev": true,
       "license": "MIT",
       "optional": true,
       "os": [
@@ -898,7 +880,6 @@
       "cpu": [
         "x64"
       ],
-      "dev": true,
       "license": "MIT",
       "optional": true,
       "os": [
@@ -915,7 +896,6 @@
       "cpu": [
         "arm64"
       ],
-      "dev": true,
       "license": "MIT",
       "optional": true,
       "os": [
@@ -932,7 +912,6 @@
       "cpu": [
         "x64"
       ],
-      "dev": true,
       "license": "MIT",
       "optional": true,
       "os": [
@@ -949,7 +928,6 @@
       "cpu": [
         "arm64"
       ],
-      "dev": true,
       "license": "MIT",
       "optional": true,
       "os": [
@@ -966,7 +944,6 @@
       "cpu": [
         "x64"
       ],
-      "dev": true,
       "license": "MIT",
       "optional": true,
       "os": [
@@ -983,7 +960,6 @@
       "cpu": [
         "arm64"
       ],
-      "dev": true,
       "license": "MIT",
       "optional": true,
       "os": [
@@ -1000,7 +976,6 @@
       "cpu": [
         "ia32"
       ],
-      "dev": true,
       "license": "MIT",
       "optional": true,
       "os": [
@@ -1017,7 +992,6 @@
       "cpu": [
         "x64"
       ],
-      "dev": true,
       "license": "MIT",
       "optional": true,
       "os": [
@@ -1639,7 +1613,7 @@
       "version": "5.1.14",
       "resolved": "https://registry.npmjs.org/@inquirer/confirm/-/confirm-5.1.14.tgz",
       "integrity": "sha512-5yR4IBfe0kXe59r1YCTG8WXkUbl7Z35HK87Sw+WUyGD8wNUx7JvY7laahzeytyE1oLn74bQnL7hstctQxisQ8Q==",
-      "dev": true,
+      "devOptional": true,
       "license": "MIT",
       "dependencies": {
         "@inquirer/core": "^10.1.15",
@@ -1661,7 +1635,7 @@
       "version": "10.1.15",
       "resolved": "https://registry.npmjs.org/@inquirer/core/-/core-10.1.15.tgz",
       "integrity": "sha512-8xrp836RZvKkpNbVvgWUlxjT4CraKk2q+I3Ksy+seI2zkcE+y6wNs1BVhgcv8VyImFecUhdQrYLdW32pAjwBdA==",
-      "dev": true,
+      "devOptional": true,
       "license": "MIT",
       "dependencies": {
         "@inquirer/figures": "^1.0.13",
@@ -1689,7 +1663,7 @@
       "version": "4.3.2",
       "resolved": "https://registry.npmjs.org/ansi-escapes/-/ansi-escapes-4.3.2.tgz",
       "integrity": "sha512-gKXj5ALrKWQLsYG9jlTRmR/xKluxHV+Z9QEwNIgCfM1/uwPMCuzVVnh5mwTd+OuBZcwSIMbqssNWRm1lE51QaQ==",
-      "dev": true,
+      "devOptional": true,
       "license": "MIT",
       "dependencies": {
         "type-fest": "^0.21.3"
@@ -1705,7 +1679,7 @@
       "version": "0.21.3",
       "resolved": "https://registry.npmjs.org/type-fest/-/type-fest-0.21.3.tgz",
       "integrity": "sha512-t0rzBq87m3fVcduHDUFhKmyyX+9eo6WQjZvf51Ea/M0Q7+T374Jp1aUiyUl0GKxp8M/OETVHSDvmkyPgvX+X2w==",
-      "dev": true,
+      "devOptional": true,
       "license": "(MIT OR CC0-1.0)",
       "engines": {
         "node": ">=10"
@@ -1718,7 +1692,7 @@
       "version": "1.0.13",
       "resolved": "https://registry.npmjs.org/@inquirer/figures/-/figures-1.0.13.tgz",
       "integrity": "sha512-lGPVU3yO9ZNqA7vTYz26jny41lE7yoQansmqdMLBEfqaGsmdg7V3W9mK9Pvb5IL4EVZ9GnSDGMO/cJXud5dMaw==",
-      "dev": true,
+      "devOptional": true,
       "license": "MIT",
       "engines": {
         "node": ">=18"
@@ -1728,7 +1702,7 @@
       "version": "3.0.8",
       "resolved": "https://registry.npmjs.org/@inquirer/type/-/type-3.0.8.tgz",
       "integrity": "sha512-lg9Whz8onIHRthWaN1Q9EGLa/0LFJjyM8mEUbL1eTi6yMGvBf8gvyDLtxSXztQsxMvhxxNpJYrwa1YHdq+w4Jw==",
-      "dev": true,
+      "devOptional": true,
       "license": "MIT",
       "engines": {
         "node": ">=18"
@@ -1863,7 +1837,6 @@
       "version": "1.5.5",
       "resolved": "https://registry.npmjs.org/@jridgewell/sourcemap-codec/-/sourcemap-codec-1.5.5.tgz",
       "integrity": "sha512-cYQ9310grqxueWbl+WuIUIaiUaDcj7WOq5fVhEljNVgRfOUhY9fy2zTvfoqWsnebh8Sl70VScFbICvJnLKB0Og==",
-      "dev": true,
       "license": "MIT"
     },
     "node_modules/@jridgewell/trace-mapping": {
@@ -2032,7 +2005,6 @@
       "resolved": "https://registry.npmjs.org/@lydell/node-pty/-/node-pty-1.1.0.tgz",
       "integrity": "sha512-VDD8LtlMTOrPKWMXUAcB9+LTktzuunqrMwkYR1DMRBkS6LQrCt+0/Ws1o2rMml/n3guePpS7cxhHF7Nm5K4iMw==",
       "license": "MIT",
-      "optional": true,
       "optionalDependencies": {
         "@lydell/node-pty-darwin-arm64": "1.1.0",
         "@lydell/node-pty-darwin-x64": "1.1.0",
@@ -2420,7 +2392,7 @@
       "version": "0.39.5",
       "resolved": "https://registry.npmjs.org/@mswjs/interceptors/-/interceptors-0.39.5.tgz",
       "integrity": "sha512-B9nHSJYtsv79uo7QdkZ/b/WoKm20IkVSmTc/WCKarmDtFwM0dRx2ouEniqwNkzCSLn3fydzKmnMzjtfdOWt3VQ==",
-      "dev": true,
+      "devOptional": true,
       "license": "MIT",
       "dependencies": {
         "@open-draft/deferred-promise": "^2.2.0",
@@ -2655,14 +2627,14 @@
       "version": "2.2.0",
       "resolved": "https://registry.npmjs.org/@open-draft/deferred-promise/-/deferred-promise-2.2.0.tgz",
       "integrity": "sha512-CecwLWx3rhxVQF6V4bAgPS5t+So2sTbPgAzafKkVizyi7tlwpcFpdFqq+wqF2OwNBmqFuu6tOyouTuxgpMfzmA==",
-      "dev": true,
+      "devOptional": true,
       "license": "MIT"
     },
     "node_modules/@open-draft/logger": {
       "version": "0.3.0",
       "resolved": "https://registry.npmjs.org/@open-draft/logger/-/logger-0.3.0.tgz",
       "integrity": "sha512-X2g45fzhxH238HKO4xbSr7+wBS8Fvw6ixhTDuvLd5mqh6bJJCFAPwU9mPDxbcrRtfxv4u5IHCEH77BmxvXmmxQ==",
-      "dev": true,
+      "devOptional": true,
       "license": "MIT",
       "dependencies": {
         "is-node-process": "^1.2.0",
@@ -2673,7 +2645,7 @@
       "version": "2.1.0",
       "resolved": "https://registry.npmjs.org/@open-draft/until/-/until-2.1.0.tgz",
       "integrity": "sha512-U69T3ItWHvLwGg5eJ0n3I62nWuE6ilHlmz7zM0npLBRvPRd7e6NYmg54vvRtP5mZG7kZqZCFVdsTWo7BPtBujg==",
-      "dev": true,
+      "devOptional": true,
       "license": "MIT"
     },
     "node_modules/@opentelemetry/api": {
@@ -3337,7 +3309,6 @@
       "cpu": [
         "arm"
       ],
-      "dev": true,
       "license": "MIT",
       "optional": true,
       "os": [
@@ -3351,7 +3322,6 @@
       "cpu": [
         "arm64"
       ],
-      "dev": true,
       "license": "MIT",
       "optional": true,
       "os": [
@@ -3365,7 +3335,6 @@
       "cpu": [
         "arm64"
       ],
-      "dev": true,
       "license": "MIT",
       "optional": true,
       "os": [
@@ -3379,7 +3348,6 @@
       "cpu": [
         "x64"
       ],
-      "dev": true,
       "license": "MIT",
       "optional": true,
       "os": [
@@ -3393,7 +3361,6 @@
       "cpu": [
         "arm64"
       ],
-      "dev": true,
       "license": "MIT",
       "optional": true,
       "os": [
@@ -3407,7 +3374,6 @@
       "cpu": [
         "x64"
       ],
-      "dev": true,
       "license": "MIT",
       "optional": true,
       "os": [
@@ -3421,7 +3387,6 @@
       "cpu": [
         "arm"
       ],
-      "dev": true,
       "license": "MIT",
       "optional": true,
       "os": [
@@ -3435,7 +3400,6 @@
       "cpu": [
         "arm"
       ],
-      "dev": true,
       "license": "MIT",
       "optional": true,
       "os": [
@@ -3449,7 +3413,6 @@
       "cpu": [
         "arm64"
       ],
-      "dev": true,
       "license": "MIT",
       "optional": true,
       "os": [
@@ -3463,7 +3426,6 @@
       "cpu": [
         "arm64"
       ],
-      "dev": true,
       "license": "MIT",
       "optional": true,
       "os": [
@@ -3477,7 +3439,6 @@
       "cpu": [
         "loong64"
       ],
-      "dev": true,
       "license": "MIT",
       "optional": true,
       "os": [
@@ -3491,7 +3452,6 @@
       "cpu": [
         "ppc64"
       ],
-      "dev": true,
       "license": "MIT",
       "optional": true,
       "os": [
@@ -3505,7 +3465,6 @@
       "cpu": [
         "riscv64"
       ],
-      "dev": true,
       "license": "MIT",
       "optional": true,
       "os": [
@@ -3519,7 +3478,6 @@
       "cpu": [
         "riscv64"
       ],
-      "dev": true,
       "license": "MIT",
       "optional": true,
       "os": [
@@ -3533,7 +3491,6 @@
       "cpu": [
         "s390x"
       ],
-      "dev": true,
       "license": "MIT",
       "optional": true,
       "os": [
@@ -3547,7 +3504,6 @@
       "cpu": [
         "x64"
       ],
-      "dev": true,
       "license": "MIT",
       "optional": true,
       "os": [
@@ -3561,7 +3517,6 @@
       "cpu": [
         "x64"
       ],
-      "dev": true,
       "license": "MIT",
       "optional": true,
       "os": [
@@ -3575,7 +3530,6 @@
       "cpu": [
         "arm64"
       ],
-      "dev": true,
       "license": "MIT",
       "optional": true,
       "os": [
@@ -3589,7 +3543,6 @@
       "cpu": [
         "arm64"
       ],
-      "dev": true,
       "license": "MIT",
       "optional": true,
       "os": [
@@ -3603,7 +3556,6 @@
       "cpu": [
         "ia32"
       ],
-      "dev": true,
       "license": "MIT",
       "optional": true,
       "os": [
@@ -3617,7 +3569,6 @@
       "cpu": [
         "x64"
       ],
-      "dev": true,
       "license": "MIT",
       "optional": true,
       "os": [
@@ -3631,7 +3582,6 @@
       "cpu": [
         "x64"
       ],
-      "dev": true,
       "license": "MIT",
       "optional": true,
       "os": [
@@ -4088,7 +4038,6 @@
       "version": "5.2.3",
       "resolved": "https://registry.npmjs.org/@types/chai/-/chai-5.2.3.tgz",
       "integrity": "sha512-Mw558oeA9fFbv65/y4mHtXDs9bPnFMZAL/jxdPFUpOHHIXX91mcgEHbS5Lahr+pwZFR8A7GQleRWeI6cGFC2UA==",
-      "dev": true,
       "license": "MIT",
       "dependencies": {
         "@types/deep-eql": "*",
@@ -4122,7 +4071,7 @@
       "version": "0.6.0",
       "resolved": "https://registry.npmjs.org/@types/cookie/-/cookie-0.6.0.tgz",
       "integrity": "sha512-4Kh9a6B2bQciAhf7FSuMRRkUWecJgJu9nPnx3yzpsfXX/c50REIqpHY4C82bXP90qrLtXtkDxTZosYO3UpOwlA==",
-      "dev": true,
+      "devOptional": true,
       "license": "MIT"
     },
     "node_modules/@types/cookiejar": {
@@ -4146,7 +4095,6 @@
       "version": "4.0.2",
       "resolved": "https://registry.npmjs.org/@types/deep-eql/-/deep-eql-4.0.2.tgz",
       "integrity": "sha512-c9h9dVVMigMPc4bwTvC5dxqtqJZwQPePsWjPlpSOnojbor6pGqdk541lfA7AqFQr5pB1BRdq0juY9db81BwyFw==",
-      "dev": true,
       "license": "MIT"
     },
     "node_modules/@types/diff": {
@@ -4170,7 +4118,6 @@
       "version": "1.0.8",
       "resolved": "https://registry.npmjs.org/@types/estree/-/estree-1.0.8.tgz",
       "integrity": "sha512-dWHzHa2WqEXI/O1E9OjrocMTKJl2mSrEolh1Iomrv6U+JuNwaHXsXx9bLu5gG7BUWFIN0skIQJQ/L1rIex4X6w==",
-      "dev": true,
       "license": "MIT"
     },
     "node_modules/@types/express": {
@@ -4526,7 +4473,7 @@
       "version": "2.0.6",
       "resolved": "https://registry.npmjs.org/@types/statuses/-/statuses-2.0.6.tgz",
       "integrity": "sha512-xMAgYwceFhRA2zY+XbEA7mxYbA093wdiW8Vu6gZPGWy9cmOyU9XesH1tNcEWsKFd5Vzrqx5T3D38PWx1FIIXkA==",
-      "dev": true,
+      "devOptional": true,
       "license": "MIT"
     },
     "node_modules/@types/superagent": {
@@ -5140,7 +5087,6 @@
       "version": "3.2.4",
       "resolved": "https://registry.npmjs.org/@vitest/expect/-/expect-3.2.4.tgz",
       "integrity": "sha512-Io0yyORnB6sikFlt8QW5K7slY4OjqNX9jmJQ02QDda8lyM6B5oNgVWoSoKPac8/kgnCUzuHQKrSLtu/uOqqrig==",
-      "dev": true,
       "license": "MIT",
       "dependencies": {
         "@types/chai": "^5.2.2",
@@ -5157,7 +5103,6 @@
       "version": "3.2.4",
       "resolved": "https://registry.npmjs.org/@vitest/mocker/-/mocker-3.2.4.tgz",
       "integrity": "sha512-46ryTE9RZO/rfDd7pEqFl7etuyzekzEhUbTW3BvmeO/BcCMEgq59BKhek3dXDWgAj4oMK6OZi+vRr1wPW6qjEQ==",
-      "dev": true,
       "license": "MIT",
       "dependencies": {
         "@vitest/spy": "3.2.4",
@@ -5184,7 +5129,6 @@
       "version": "3.2.4",
       "resolved": "https://registry.npmjs.org/@vitest/pretty-format/-/pretty-format-3.2.4.tgz",
       "integrity": "sha512-IVNZik8IVRJRTr9fxlitMKeJeXFFFN0JaB9PHPGQ8NKQbGpfjlTx9zO4RefN8gp7eqjNy8nyK3NZmBzOPeIxtA==",
-      "dev": true,
       "license": "MIT",
       "dependencies": {
         "tinyrainbow": "^2.0.0"
@@ -5197,7 +5141,6 @@
       "version": "3.2.4",
       "resolved": "https://registry.npmjs.org/@vitest/runner/-/runner-3.2.4.tgz",
       "integrity": "sha512-oukfKT9Mk41LreEW09vt45f8wx7DordoWUZMYdY/cyAk7w5TWkTRCNZYF7sX7n2wB7jyGAl74OxgwhPgKaqDMQ==",
-      "dev": true,
       "license": "MIT",
       "dependencies": {
         "@vitest/utils": "3.2.4",
@@ -5212,7 +5155,6 @@
       "version": "3.2.4",
       "resolved": "https://registry.npmjs.org/@vitest/snapshot/-/snapshot-3.2.4.tgz",
       "integrity": "sha512-dEYtS7qQP2CjU27QBC5oUOxLE/v5eLkGqPE0ZKEIDGMs4vKWe7IjgLOeauHsR0D5YuuycGRO5oSRXnwnmA78fQ==",
-      "dev": true,
       "license": "MIT",
       "dependencies": {
         "@vitest/pretty-format": "3.2.4",
@@ -5227,7 +5169,6 @@
       "version": "3.2.4",
       "resolved": "https://registry.npmjs.org/@vitest/spy/-/spy-3.2.4.tgz",
       "integrity": "sha512-vAfasCOe6AIK70iP5UD11Ac4siNUNJ9i/9PZ3NKx07sG6sUxeag1LWdNrMWeKKYBLlzuK+Gn65Yd5nyL6ds+nw==",
-      "dev": true,
       "license": "MIT",
       "dependencies": {
         "tinyspy": "^4.0.3"
@@ -5240,7 +5181,6 @@
       "version": "3.2.4",
       "resolved": "https://registry.npmjs.org/@vitest/utils/-/utils-3.2.4.tgz",
       "integrity": "sha512-fB2V0JFrQSMsCo9HiSq3Ezpdv4iYaXRG1Sx8edX3MwxfyNn83mKiGzOcH+Fkxt4MHxr3y42fQi1oeAInqgX2QA==",
-      "dev": true,
       "license": "MIT",
       "dependencies": {
         "@vitest/pretty-format": "3.2.4",
@@ -6318,7 +6258,6 @@
       "version": "2.0.1",
       "resolved": "https://registry.npmjs.org/assertion-error/-/assertion-error-2.0.1.tgz",
       "integrity": "sha512-Izi8RQcffqCeNVgFigKli1ssklIbpHnCYc6AknXGYoB6grJqyeby7jv12JUQgmTAnIDnbck1uxksT4dzN3PWBA==",
-      "dev": true,
       "license": "MIT",
       "engines": {
         "node": ">=12"
@@ -6714,7 +6653,6 @@
       "version": "6.7.14",
       "resolved": "https://registry.npmjs.org/cac/-/cac-6.7.14.tgz",
       "integrity": "sha512-b6Ilus+c3RrdDk+JhLKUAQfzzgLEPy6wcXqS7f/xe1EETvsDP6GORG7SFuOs6cID5YkqchW/LXZbX5bc8j7ZcQ==",
-      "dev": true,
       "license": "MIT",
       "engines": {
         "node": ">=8"
@@ -6830,7 +6768,6 @@
       "version": "5.3.3",
       "resolved": "https://registry.npmjs.org/chai/-/chai-5.3.3.tgz",
       "integrity": "sha512-4zNhdJD/iOjSH0A05ea+Ke6MU5mmpQcbQsSOkgdaUMJ9zTlDTD/GYlwohmIE2u0gaxHYiVHEn1Fw9mZ/ktJWgw==",
-      "dev": true,
       "license": "MIT",
       "dependencies": {
         "assertion-error": "^2.0.1",
@@ -6881,7 +6818,6 @@
       "version": "2.1.1",
       "resolved": "https://registry.npmjs.org/check-error/-/check-error-2.1.1.tgz",
       "integrity": "sha512-OAlb+T7V4Op9OwdkjmguYRqncdlx5JiofwOAUkmTF+jNdHwzTaTs4sRAGpzLF3oOz5xAyDGrPgeIDFQmDOTiJw==",
-      "dev": true,
       "license": "MIT",
       "engines": {
         "node": ">= 16"
@@ -7101,7 +7037,7 @@
       "version": "4.1.0",
       "resolved": "https://registry.npmjs.org/cli-width/-/cli-width-4.1.0.tgz",
       "integrity": "sha512-ouuZd4/dm2Sw5Gmqy6bGyNNNe1qt9RpmxveLSO7KcgsTnU7RXfsw+/bukWGo1abgBiMAic068rclZsO4IWmmxQ==",
-      "dev": true,
+      "devOptional": true,
       "license": "ISC",
       "engines": {
         "node": ">= 12"
@@ -7805,7 +7741,6 @@
       "version": "5.0.2",
       "resolved": "https://registry.npmjs.org/deep-eql/-/deep-eql-5.0.2.tgz",
       "integrity": "sha512-h5k/5U50IJJFpzfL6nO9jaaumfjO/f2NjK/oYB2Djzm4p9L+3T9qWpZqZ2hAbLPuuYq9wrU08WQyBTL5GbPk5Q==",
-      "dev": true,
       "license": "MIT",
       "engines": {
         "node": ">=6"
@@ -8587,7 +8522,6 @@
       "version": "1.7.0",
       "resolved": "https://registry.npmjs.org/es-module-lexer/-/es-module-lexer-1.7.0.tgz",
       "integrity": "sha512-jEQoCwk8hyb2AZziIOLhDqpm5+2ww5uIE6lkO/6jcOCusfk6LhMHpXXfBLXTZ7Ydyt0j4VoUQv6uGNYbdW+kBA==",
-      "dev": true,
       "license": "MIT"
     },
     "node_modules/es-object-atoms": {
@@ -8662,7 +8596,6 @@
       "version": "0.25.6",
       "resolved": "https://registry.npmjs.org/esbuild/-/esbuild-0.25.6.tgz",
       "integrity": "sha512-GVuzuUwtdsghE3ocJ9Bs8PNoF13HNQ5TXbEi2AhvVb8xU1Iwt9Fos9FEamfoee+u/TOsn7GUWc04lz46n2bbTg==",
-      "dev": true,
       "hasInstallScript": true,
       "license": "MIT",
       "bin": {
@@ -9111,7 +9044,6 @@
       "version": "3.0.3",
       "resolved": "https://registry.npmjs.org/estree-walker/-/estree-walker-3.0.3.tgz",
       "integrity": "sha512-7RUKfXgSMMkzt6ZuXmqapOurLGPPfgj6l9uRZ7lRGolvk0y2yocc35LdcxKC5PQZdn2DMqioAQ2NoWcrTKmm6g==",
-      "dev": true,
       "license": "MIT",
       "dependencies": {
         "@types/estree": "^1.0.0"
@@ -9279,7 +9211,6 @@
       "version": "1.2.2",
       "resolved": "https://registry.npmjs.org/expect-type/-/expect-type-1.2.2.tgz",
       "integrity": "sha512-JhFGDVJ7tmDJItKhYgJCGLOWjuK9vPxiXoUFLwLDc99NlmklilbiQJwoctZtt13+xMw91MCk/REan6MWHqDjyA==",
-      "dev": true,
       "license": "Apache-2.0",
       "engines": {
         "node": ">=12.0.0"
@@ -9898,7 +9829,6 @@
       "version": "2.3.3",
       "resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.3.tgz",
       "integrity": "sha512-5xoDfX+fL7faATnagmWPpbFtwh/R77WmMMqqHGS65C3vvB0YHrgF+B1YmZ3441tMj5n63k0212XNoJwzlhffQw==",
-      "dev": true,
       "hasInstallScript": true,
       "license": "MIT",
       "optional": true,
@@ -10097,7 +10027,7 @@
       "version": "4.10.1",
       "resolved": "https://registry.npmjs.org/get-tsconfig/-/get-tsconfig-4.10.1.tgz",
       "integrity": "sha512-auHyJ4AgMz7vgS8Hp3N6HXSmlMdUyhSUrfBF16w153rxtLIEOE+HGqaBppczZvnHLqQJfiHotCYpNhl0lUROFQ==",
-      "dev": true,
+      "devOptional": true,
       "license": "MIT",
       "dependencies": {
         "resolve-pkg-maps": "^1.0.0"
@@ -10498,7 +10428,7 @@
       "version": "16.11.0",
       "resolved": "https://registry.npmjs.org/graphql/-/graphql-16.11.0.tgz",
       "integrity": "sha512-mS1lbMsxgQj6hge1XZ6p7GPhbrtFwUFYi3wRzXAC/FmYnyXMTvvI3td3rjmQ2u8ewXueaSvRPWaEcgVVOT9Jnw==",
-      "dev": true,
+      "devOptional": true,
       "license": "MIT",
       "engines": {
         "node": "^12.22.0 || ^14.16.0 || ^16.0.0 || >=17.0.0"
@@ -10620,7 +10550,7 @@
       "version": "4.0.3",
       "resolved": "https://registry.npmjs.org/headers-polyfill/-/headers-polyfill-4.0.3.tgz",
       "integrity": "sha512-IScLbePpkvO846sIwOtOTDjutRMWdXdJmXdMvk6gCBHxFO8d+QKOQedyZSxFTTFYRSmlgSTDtXqqq4pcenBXLQ==",
-      "dev": true,
+      "devOptional": true,
       "license": "MIT"
     },
     "node_modules/highlight.js": {
@@ -11411,7 +11341,7 @@
       "version": "1.2.0",
       "resolved": "https://registry.npmjs.org/is-node-process/-/is-node-process-1.2.0.tgz",
       "integrity": "sha512-Vg4o6/fqPxIjtxgUH5QLJhwZ7gW5diGCVlXpuUfELC62CuxM1iHcRe51f2W1FDy04Ai4KJkagKjx3XaqyfRKXw==",
-      "dev": true,
+      "devOptional": true,
       "license": "MIT"
     },
     "node_modules/is-number": {
@@ -11824,7 +11754,6 @@
       "version": "9.0.1",
       "resolved": "https://registry.npmjs.org/js-tokens/-/js-tokens-9.0.1.tgz",
       "integrity": "sha512-mxa9E9ITFOt0ban3j6L5MpjwegGz6lBQmM1IJkWeBZGcMxto50+eWdjC/52xDbS2vy0k7vIMK0Fe2wfL9OQSpQ==",
-      "dev": true,
       "license": "MIT"
     },
     "node_modules/js-yaml": {
@@ -12564,7 +12493,6 @@
       "version": "3.2.1",
       "resolved": "https://registry.npmjs.org/loupe/-/loupe-3.2.1.tgz",
       "integrity": "sha512-CdzqowRJCeLU72bHvWqwRBBlLcMEtIvGrlvef74kMnV2AolS9Y8xUv1I0U/MNAWMhBlKIoyuEgoJ0t/bbwHbLQ==",
-      "dev": true,
       "license": "MIT"
     },
     "node_modules/lowercase-keys": {
@@ -12604,7 +12532,6 @@
       "version": "0.30.21",
       "resolved": "https://registry.npmjs.org/magic-string/-/magic-string-0.30.21.tgz",
       "integrity": "sha512-vd2F4YUyEXKGcLHoq+TEyCjxueSeHnFxyyjNp80yg0XV4vUhnDer/lvvlqM/arB5bXQN5K2/3oinyCRyx8T2CQ==",
-      "dev": true,
       "license": "MIT",
       "dependencies": {
         "@jridgewell/sourcemap-codec": "^1.5.5"
@@ -12931,7 +12858,7 @@
       "version": "2.10.4",
       "resolved": "https://registry.npmjs.org/msw/-/msw-2.10.4.tgz",
       "integrity": "sha512-6R1or/qyele7q3RyPwNuvc0IxO8L8/Aim6Sz5ncXEgcWUNxSKE+udriTOWHtpMwmfkLYlacA2y7TIx4cL5lgHA==",
-      "dev": true,
+      "devOptional": true,
       "hasInstallScript": true,
       "license": "MIT",
       "dependencies": {
@@ -12976,14 +12903,14 @@
       "version": "6.3.0",
       "resolved": "https://registry.npmjs.org/path-to-regexp/-/path-to-regexp-6.3.0.tgz",
       "integrity": "sha512-Yhpw4T9C6hPpgPeA28us07OJeqZ5EzQTkbfwuhsUg0c237RomFoETJgmp2sa3F/41gfLE6G5cqcYwznmeEeOlQ==",
-      "dev": true,
+      "devOptional": true,
       "license": "MIT"
     },
     "node_modules/msw/node_modules/type-fest": {
       "version": "4.41.0",
       "resolved": "https://registry.npmjs.org/type-fest/-/type-fest-4.41.0.tgz",
       "integrity": "sha512-TeTSQ6H5YHvpqVwBRcnLDCBnDOHWYu7IvGbHT6N8AOymcr9PJGjc1GTtiWZTYg0NCgYwvnYWEkVChQAr9bjfwA==",
-      "dev": true,
+      "devOptional": true,
       "license": "(MIT OR CC0-1.0)",
       "engines": {
         "node": ">=16"
@@ -13023,7 +12950,7 @@
       "version": "2.0.0",
       "resolved": "https://registry.npmjs.org/mute-stream/-/mute-stream-2.0.0.tgz",
       "integrity": "sha512-WWdIxpyjEn+FhQJQQv9aQAYlHoNVdzIzUySNV1gHUPDSdZJ3yZn7pAAbQcV7B56Mvu881q9FZV+0Vx2xC44VWA==",
-      "dev": true,
+      "devOptional": true,
       "license": "ISC",
       "engines": {
         "node": "^18.17.0 || >=20.5.0"
@@ -13053,7 +12980,6 @@
       "version": "3.3.11",
       "resolved": "https://registry.npmjs.org/nanoid/-/nanoid-3.3.11.tgz",
       "integrity": "sha512-N8SpfPUnUp1bK+PMYW8qSWdl9U+wwNWI4QKxOYDy9JAro3WMX7p2OeVRF9v+347pnakNevPmiHhNmZ2HbFA76w==",
-      "dev": true,
       "funding": [
         {
           "type": "github",
@@ -13807,7 +13733,7 @@
       "version": "1.4.3",
       "resolved": "https://registry.npmjs.org/outvariant/-/outvariant-1.4.3.tgz",
       "integrity": "sha512-+Sl2UErvtsoajRDKCE5/dBz4DIvHXQQnAxtQTF04OJxY0+DyZXSo5P5Bb7XYWOh81syohlYL24hbDwxedPUJCA==",
-      "dev": true,
+      "devOptional": true,
       "license": "MIT"
     },
     "node_modules/own-keys": {
@@ -14154,14 +14080,12 @@
       "version": "2.0.3",
       "resolved": "https://registry.npmjs.org/pathe/-/pathe-2.0.3.tgz",
       "integrity": "sha512-WUjGcAqP1gQacoQe+OBJsFA7Ld4DyXuUIjZ5cc75cLHvJ7dtNsTugphxIADwspS+AraAUePCKrSVtPLFj/F88w==",
-      "dev": true,
       "license": "MIT"
     },
     "node_modules/pathval": {
       "version": "2.0.1",
       "resolved": "https://registry.npmjs.org/pathval/-/pathval-2.0.1.tgz",
       "integrity": "sha512-//nshmD55c46FuFw26xV/xFAaB5HF9Xdap7HJBBnrKdAd6/GxDBaNA1870O79+9ueg61cZLSVc+OaFlfmObYVQ==",
-      "dev": true,
       "license": "MIT",
       "engines": {
         "node": ">= 14.16"
@@ -14267,7 +14191,6 @@
       "version": "8.5.6",
       "resolved": "https://registry.npmjs.org/postcss/-/postcss-8.5.6.tgz",
       "integrity": "sha512-3Ybi1tAuwAP9s0r1UQ2J4n5Y0G05bJkpUIO0/bI9MhwmD70S5aTWbXGBwxHrelT+XM1k6dM0pk+SwNkpTRN7Pg==",
-      "dev": true,
       "funding": [
         {
           "type": "opencollective",
@@ -14495,7 +14418,7 @@
       "version": "1.15.0",
       "resolved": "https://registry.npmjs.org/psl/-/psl-1.15.0.tgz",
       "integrity": "sha512-JZd3gMVBAVQkSs6HdNZo9Sdo0LNcQeMNP3CozBJb3JYC/QUYZTnKxP+f8oWRX4rHP5EurWxqAHTSwUCjlNKa1w==",
-      "dev": true,
+      "devOptional": true,
       "license": "MIT",
       "dependencies": {
         "punycode": "^2.3.1"
@@ -14529,7 +14452,7 @@
       "version": "2.3.1",
       "resolved": "https://registry.npmjs.org/punycode/-/punycode-2.3.1.tgz",
       "integrity": "sha512-vYt7UD1U9Wg6138shLtLOvdAu+8DsC/ilFtEVHcH+wydcSpNE20AfSOduf6MkRFahL5FY7X1oU7nKVZFtfq8Fg==",
-      "dev": true,
+      "devOptional": true,
       "license": "MIT",
       "engines": {
         "node": ">=6"
@@ -14564,7 +14487,7 @@
       "version": "2.2.0",
       "resolved": "https://registry.npmjs.org/querystringify/-/querystringify-2.2.0.tgz",
       "integrity": "sha512-FIqgj2EUvTa7R50u0rGsyTftzjYmv/a3hO345bZNrqabNqjtgiDMgmo4mkUjd+nzU5oF3dClKqFIPUKybUyqoQ==",
-      "dev": true,
+      "devOptional": true,
       "license": "MIT"
     },
     "node_modules/queue-microtask": {
@@ -15070,7 +14993,7 @@
       "version": "1.0.0",
       "resolved": "https://registry.npmjs.org/requires-port/-/requires-port-1.0.0.tgz",
       "integrity": "sha512-KigOCHcocU3XODJxsu8i/j8T9tzT4adHiecwORRQ0ZZFcp7ahwXuRU1m+yuO90C5ZUyGeGfocHDI14M3L3yDAQ==",
-      "dev": true,
+      "devOptional": true,
       "license": "MIT"
     },
     "node_modules/resolve": {
@@ -15127,7 +15050,7 @@
       "version": "1.0.0",
       "resolved": "https://registry.npmjs.org/resolve-pkg-maps/-/resolve-pkg-maps-1.0.0.tgz",
       "integrity": "sha512-seS2Tj26TBVOC2NIc2rOe2y2ZO7efxITtLZcGSOnHHNOQ7CkiUBfw0Iw2ck6xkIhPwLhKNLS8BO+hEpngQlqzw==",
-      "dev": true,
+      "devOptional": true,
       "license": "MIT",
       "funding": {
         "url": "https://github.com/privatenumber/resolve-pkg-maps?sponsor=1"
@@ -15305,7 +15228,6 @@
       "version": "4.53.2",
       "resolved": "https://registry.npmjs.org/rollup/-/rollup-4.53.2.tgz",
       "integrity": "sha512-MHngMYwGJVi6Fmnk6ISmnk7JAHRNF0UkuucA0CUW3N3a4KnONPEZz+vUanQP/ZC/iY1Qkf3bwPWzyY84wEks1g==",
-      "dev": true,
       "license": "MIT",
       "dependencies": {
         "@types/estree": "1.0.8"
@@ -15808,7 +15730,6 @@
       "version": "2.0.0",
       "resolved": "https://registry.npmjs.org/siginfo/-/siginfo-2.0.0.tgz",
       "integrity": "sha512-ybx0WO1/8bSBLEWXZvEd7gMW3Sn3JFlW3TvX1nREbDLRNQNaeNN8WK0meBwPdAaOI7TtRRRJn/Es1zhrrCHu7g==",
-      "dev": true,
       "license": "ISC"
     },
     "node_modules/signal-exit": {
@@ -15966,7 +15887,6 @@
       "version": "1.2.1",
       "resolved": "https://registry.npmjs.org/source-map-js/-/source-map-js-1.2.1.tgz",
       "integrity": "sha512-UXWMKhLOwVKb728IUtQPXxfYU+usdybtUrK/8uGE8CQMvrhOpwvzDBwj0QhSL7MQc7vIsISBG8VQ8+IDQxpfQA==",
-      "dev": true,
       "license": "BSD-3-Clause",
       "engines": {
         "node": ">=0.10.0"
@@ -16045,7 +15965,6 @@
       "version": "0.0.2",
       "resolved": "https://registry.npmjs.org/stackback/-/stackback-0.0.2.tgz",
       "integrity": "sha512-1XMJE5fQo1jGH6Y/7ebnwPOBEkIEnT4QF32d5R1+VXdXveM0IBMJt8zfaxX1P3QhVwrYe+576+jkANtSS2mBbw==",
-      "dev": true,
       "license": "MIT"
     },
     "node_modules/statuses": {
@@ -16061,7 +15980,6 @@
       "version": "3.10.0",
       "resolved": "https://registry.npmjs.org/std-env/-/std-env-3.10.0.tgz",
       "integrity": "sha512-5GS12FdOZNliM5mAOxFRg7Ir0pWz8MdpYm6AY6VPkGpbA7ZzmbzNcBJQ0GPvvyWgcY7QAhCgf9Uy89I03faLkg==",
-      "dev": true,
       "license": "MIT"
     },
     "node_modules/stop-iteration-iterator": {
@@ -16109,7 +16027,7 @@
       "version": "0.5.1",
       "resolved": "https://registry.npmjs.org/strict-event-emitter/-/strict-event-emitter-0.5.1.tgz",
       "integrity": "sha512-vMgjE/GGEPEFnhFub6pa4FmJBRBVOLpIII2hvCZ8Kzb7K0hlHo7mQv6xYrBvCL2LtAIBwFUK8wvuJgTVSQ5MFQ==",
-      "dev": true,
+      "devOptional": true,
       "license": "MIT"
     },
     "node_modules/string_decoder": {
@@ -16382,7 +16300,6 @@
       "version": "3.1.0",
       "resolved": "https://registry.npmjs.org/strip-literal/-/strip-literal-3.1.0.tgz",
       "integrity": "sha512-8r3mkIM/2+PpjHoOtiAW8Rg3jJLHaV7xPwG+YRGrv6FP0wwk/toTpATxWYOW0BKdWwl82VT2tFYi5DlROa0Mxg==",
-      "dev": true,
       "license": "MIT",
       "dependencies": {
         "js-tokens": "^9.0.1"
@@ -16938,7 +16855,6 @@
       "version": "2.9.0",
       "resolved": "https://registry.npmjs.org/tinybench/-/tinybench-2.9.0.tgz",
       "integrity": "sha512-0+DUvqWMValLmha6lr4kD8iAMK1HzV0/aKnCtWb9v9641TnP/MFb7Pc2bxoxQjTXAErryXVgUOfv2YqNllqGeg==",
-      "dev": true,
       "license": "MIT"
     },
     "node_modules/tinycolor2": {
@@ -16951,14 +16867,12 @@
       "version": "0.3.2",
       "resolved": "https://registry.npmjs.org/tinyexec/-/tinyexec-0.3.2.tgz",
       "integrity": "sha512-KQQR9yN7R5+OSwaK0XQoj22pwHoTlgYqmUscPYoknOoWCWfj/5/ABTMRi69FrKU5ffPVh5QcFikpWJI/P1ocHA==",
-      "dev": true,
       "license": "MIT"
     },
     "node_modules/tinyglobby": {
       "version": "0.2.15",
       "resolved": "https://registry.npmjs.org/tinyglobby/-/tinyglobby-0.2.15.tgz",
       "integrity": "sha512-j2Zq4NyQYG5XMST4cbs02Ak8iJUdxRM0XI5QyxXuZOzKOINmWurp3smXu3y5wDcJrptwpSjgXHzIQxR0omXljQ==",
-      "dev": true,
       "license": "MIT",
       "dependencies": {
         "fdir": "^6.5.0",
@@ -16975,7 +16889,6 @@
       "version": "6.5.0",
       "resolved": "https://registry.npmjs.org/fdir/-/fdir-6.5.0.tgz",
       "integrity": "sha512-tIbYtZbucOs0BRGqPJkshJUYdL+SDH7dVM8gjy+ERp3WAUjLEFJE+02kanyHtwjWOnwrKYBiwAmM0p4kLJAnXg==",
-      "dev": true,
       "license": "MIT",
       "engines": {
         "node": ">=12.0.0"
@@ -16993,7 +16906,6 @@
       "version": "4.0.3",
       "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-4.0.3.tgz",
       "integrity": "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q==",
-      "dev": true,
       "license": "MIT",
       "engines": {
         "node": ">=12"
@@ -17016,7 +16928,6 @@
       "version": "1.1.1",
       "resolved": "https://registry.npmjs.org/tinypool/-/tinypool-1.1.1.tgz",
       "integrity": "sha512-Zba82s87IFq9A9XmjiX5uZA/ARWDrB03OHlq+Vw1fSdt0I+4/Kutwy8BP4Y/y/aORMo61FQ0vIb5j44vSo5Pkg==",
-      "dev": true,
       "license": "MIT",
       "engines": {
         "node": "^18.0.0 || >=20.0.0"
@@ -17026,7 +16937,6 @@
       "version": "2.0.0",
       "resolved": "https://registry.npmjs.org/tinyrainbow/-/tinyrainbow-2.0.0.tgz",
       "integrity": "sha512-op4nsTR47R6p0vMUUoYl/a+ljLFVtlfaXkLQmqfLR1qHma1h/ysYk4hEXZ880bf2CYgTskvTa/e196Vd5dDQXw==",
-      "dev": true,
       "license": "MIT",
       "engines": {
         "node": ">=14.0.0"
@@ -17036,7 +16946,6 @@
       "version": "4.0.4",
       "resolved": "https://registry.npmjs.org/tinyspy/-/tinyspy-4.0.4.tgz",
       "integrity": "sha512-azl+t0z7pw/z958Gy9svOTuzqIk6xq+NSheJzn5MMWtWTFywIacg2wUlzKFGtt3cthx0r2SxMK0yzJOR0IES7Q==",
-      "dev": true,
       "license": "MIT",
       "engines": {
         "node": ">=14.0.0"
@@ -17227,7 +17136,7 @@
       "version": "4.20.3",
       "resolved": "https://registry.npmjs.org/tsx/-/tsx-4.20.3.tgz",
       "integrity": "sha512-qjbnuR9Tr+FJOMBqJCW5ehvIo/buZq7vH7qD7JziU98h6l3qGy0a/yPFjwO+y0/T7GFpNgNAvEcPPVfyT8rrPQ==",
-      "dev": true,
+      "devOptional": true,
       "license": "MIT",
       "dependencies": {
         "esbuild": "~0.25.0",
@@ -17410,7 +17319,7 @@
       "version": "5.8.3",
       "resolved": "https://registry.npmjs.org/typescript/-/typescript-5.8.3.tgz",
       "integrity": "sha512-p1diW6TqL9L07nNxvRMM7hMMw4c5XOo/1ibL4aAIGmSAt9slTE1Xgw5KWuof2uTOvCg9BY7ZRi+GaF+7sfgPeQ==",
-      "dev": true,
+      "devOptional": true,
       "license": "Apache-2.0",
       "bin": {
         "tsc": "bin/tsc",
@@ -17514,7 +17423,7 @@
       "version": "0.2.0",
       "resolved": "https://registry.npmjs.org/universalify/-/universalify-0.2.0.tgz",
       "integrity": "sha512-CJ1QgKmNg3CwvAv/kOFmtnEN05f0D/cn9QntgNOQlQF9dgvVTHj3t+8JPdjqawCHk7V/KA+fbUqzZ9XWhcqPUg==",
-      "dev": true,
+      "devOptional": true,
       "license": "MIT",
       "engines": {
         "node": ">= 4.0.0"
@@ -17550,7 +17459,7 @@
       "version": "1.5.10",
       "resolved": "https://registry.npmjs.org/url-parse/-/url-parse-1.5.10.tgz",
       "integrity": "sha512-WypcfiRhfeUP9vvF0j6rw0J3hrWrw6iZv3+22h6iRMJ/8z1Tj6XfLP4DsUix5MhMPnXpiHDoKyoZ/bdCkwBCiQ==",
-      "dev": true,
+      "devOptional": true,
       "license": "MIT",
       "dependencies": {
         "querystringify": "^2.1.1",
@@ -17628,7 +17537,6 @@
       "version": "7.2.2",
       "resolved": "https://registry.npmjs.org/vite/-/vite-7.2.2.tgz",
       "integrity": "sha512-BxAKBWmIbrDgrokdGZH1IgkIk/5mMHDreLDmCJ0qpyJaAteP8NvMhkwr/ZCQNqNH97bw/dANTE9PDzqwJghfMQ==",
-      "dev": true,
       "license": "MIT",
       "dependencies": {
         "esbuild": "^0.25.0",
@@ -17703,7 +17611,6 @@
       "version": "3.2.4",
       "resolved": "https://registry.npmjs.org/vite-node/-/vite-node-3.2.4.tgz",
       "integrity": "sha512-EbKSKh+bh1E1IFxeO0pg1n4dvoOTt0UDiXMd/qn++r98+jPO1xtJilvXldeuQ8giIB5IkpjCgMleHMNEsGH6pg==",
-      "dev": true,
       "license": "MIT",
       "dependencies": {
         "cac": "^6.7.14",
@@ -17726,7 +17633,6 @@
       "version": "6.5.0",
       "resolved": "https://registry.npmjs.org/fdir/-/fdir-6.5.0.tgz",
       "integrity": "sha512-tIbYtZbucOs0BRGqPJkshJUYdL+SDH7dVM8gjy+ERp3WAUjLEFJE+02kanyHtwjWOnwrKYBiwAmM0p4kLJAnXg==",
-      "dev": true,
       "license": "MIT",
       "engines": {
         "node": ">=12.0.0"
@@ -17744,7 +17650,6 @@
       "version": "4.0.3",
       "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-4.0.3.tgz",
       "integrity": "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q==",
-      "dev": true,
       "license": "MIT",
       "engines": {
         "node": ">=12"
@@ -17757,7 +17662,6 @@
       "version": "3.2.4",
       "resolved": "https://registry.npmjs.org/vitest/-/vitest-3.2.4.tgz",
       "integrity": "sha512-LUCP5ev3GURDysTWiP47wRRUpLKMOfPh+yKTx3kVIEiu5KOMeqzpnYNsKyOoVrULivR8tLcks4+lga33Whn90A==",
-      "dev": true,
       "license": "MIT",
       "dependencies": {
         "@types/chai": "^5.2.2",
@@ -17830,7 +17734,6 @@
       "version": "4.0.3",
       "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-4.0.3.tgz",
       "integrity": "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q==",
-      "dev": true,
       "license": "MIT",
       "engines": {
         "node": ">=12"
@@ -17993,7 +17896,6 @@
       "version": "2.3.0",
       "resolved": "https://registry.npmjs.org/why-is-node-running/-/why-is-node-running-2.3.0.tgz",
       "integrity": "sha512-hUrmaWBdVDcxvYqnyh09zunKzROWjbZTiNy8dBEjkS7ehEDQibXJ7XvlmtbwuTclUiIyN+CyXQD4Vmko8fNm8w==",
-      "dev": true,
       "license": "MIT",
       "dependencies": {
         "siginfo": "^2.0.0",
@@ -18263,7 +18165,7 @@
       "version": "2.8.1",
       "resolved": "https://registry.npmjs.org/yaml/-/yaml-2.8.1.tgz",
       "integrity": "sha512-lcYcMxX2PO9XMGvAJkJ3OsNMw+/7FKes7/hgerGUYWIoWu5j/+YQqcZr5JnPZWzOsEBgMbSbiSTn/dv/69Mkpw==",
-      "dev": true,
+      "devOptional": true,
       "license": "ISC",
       "bin": {
         "yaml": "bin.mjs"
@@ -18388,7 +18290,7 @@
       "version": "2.1.2",
       "resolved": "https://registry.npmjs.org/yoctocolors-cjs/-/yoctocolors-cjs-2.1.2.tgz",
       "integrity": "sha512-cYVsTjKl8b+FrnidjibDWskAv7UKOfcwaVZdp/it9n1s9fU3IkgDbhdIRKCW4JDsAlECJY0ytoVPT3sK6kideA==",
-      "dev": true,
+      "devOptional": true,
       "license": "MIT",
       "engines": {
         "node": ">=18"
@@ -19054,6 +18956,12 @@
       "name": "@google/gemini-cli-test-utils",
       "version": "0.25.0-nightly.20260107.59a18e710",
       "license": "Apache-2.0",
+      "dependencies": {
+        "@google/gemini-cli-core": "file:../core",
+        "@lydell/node-pty": "1.1.0",
+        "strip-ansi": "^7.1.2",
+        "vitest": "^3.2.4"
+      },
       "devDependencies": {
         "typescript": "^5.3.3"
       },
diff --git a/package.json b/package.json
index f5c10deaf5..b69c37d69b 100644
--- a/package.json
+++ b/package.json
@@ -41,6 +41,8 @@
     "test": "npm run test --workspaces --if-present",
     "test:ci": "npm run test:ci --workspaces --if-present && npm run test:scripts",
     "test:scripts": "vitest run --config ./scripts/tests/vitest.config.ts",
+    "test:always_passing_evals": "vitest run --config evals/vitest.config.ts",
+    "test:all_evals": "cross-env RUN_EVALS=1 vitest run --config evals/vitest.config.ts",
     "test:e2e": "cross-env VERBOSE=true KEEP_OUTPUT=true npm run test:integration:sandbox:none",
     "test:integration:all": "npm run test:integration:sandbox:none && npm run test:integration:sandbox:docker && npm run test:integration:sandbox:podman",
     "test:integration:sandbox:none": "cross-env GEMINI_SANDBOX=false vitest run --root ./integration-tests",
diff --git a/packages/test-utils/package.json b/packages/test-utils/package.json
index dddb6c01f2..a05464d3e5 100644
--- a/packages/test-utils/package.json
+++ b/packages/test-utils/package.json
@@ -9,6 +9,12 @@
     "build": "node ../../scripts/build_package.js",
     "typecheck": "tsc --noEmit"
   },
+  "dependencies": {
+    "@google/gemini-cli-core": "file:../core",
+    "@lydell/node-pty": "1.1.0",
+    "strip-ansi": "^7.1.2",
+    "vitest": "^3.2.4"
+  },
   "devDependencies": {
     "typescript": "^5.3.3"
   },
diff --git a/packages/test-utils/src/index.ts b/packages/test-utils/src/index.ts
index b8af8aa7d6..c1f2f09d3e 100644
--- a/packages/test-utils/src/index.ts
+++ b/packages/test-utils/src/index.ts
@@ -5,3 +5,4 @@
  */
 
 export * from './file-system-test-helpers.js';
+export * from './test-rig.js';
diff --git a/packages/test-utils/src/test-rig.ts b/packages/test-utils/src/test-rig.ts
new file mode 100644
index 0000000000..8b55637715
--- /dev/null
+++ b/packages/test-utils/src/test-rig.ts
@@ -0,0 +1,1227 @@
+/**
+ * @license
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import { expect } from 'vitest';
+import { execSync, spawn, type ChildProcess } from 'node:child_process';
+import { mkdirSync, writeFileSync, readFileSync } from 'node:fs';
+import { join, dirname } from 'node:path';
+import { fileURLToPath } from 'node:url';
+import { env } from 'node:process';
+import { setTimeout as sleep } from 'node:timers/promises';
+import { DEFAULT_GEMINI_MODEL, GEMINI_DIR } from '@google/gemini-cli-core';
+import fs from 'node:fs';
+import * as pty from '@lydell/node-pty';
+import stripAnsi from 'strip-ansi';
+import * as os from 'node:os';
+
+const __dirname = dirname(fileURLToPath(import.meta.url));
+const BUNDLE_PATH = join(__dirname, '..', '..', '..', 'bundle/gemini.js');
+
+// Get timeout based on environment
+export function getDefaultTimeout() {
+  if (env['CI']) return 60000; // 1 minute in CI
+  if (env['GEMINI_SANDBOX']) return 30000; // 30s in containers
+  return 15000; // 15s locally
+}
+
+export async function poll(
+  predicate: () => boolean,
+  timeout: number,
+  interval: number,
+): Promise<boolean> {
+  const startTime = Date.now();
+  let attempts = 0;
+  while (Date.now() - startTime < timeout) {
+    attempts++;
+    const result = predicate();
+    if (env['VERBOSE'] === 'true' && attempts % 5 === 0) {
+      console.log(
+        `Poll attempt ${attempts}: ${result ? 'success' : 'waiting...'}`,
+      );
+    }
+    if (result) {
+      return true;
+    }
+    await sleep(interval);
+  }
+  if (env['VERBOSE'] === 'true') {
+    console.log(`Poll timed out after ${attempts} attempts`);
+  }
+  return false;
+}
+
+export function sanitizeTestName(name: string) {
+  return name
+    .toLowerCase()
+    .replace(/[^a-z0-9]/g, '-')
+    .replace(/-+/g, '-');
+}
+
+// Helper to create detailed error messages
+export function createToolCallErrorMessage(
+  expectedTools: string | string[],
+  foundTools: string[],
+  result: string,
+) {
+  const expectedStr = Array.isArray(expectedTools)
+    ? expectedTools.join(' or ')
+    : expectedTools;
+  return (
+    `Expected to find ${expectedStr} tool call(s). ` +
+    `Found: ${foundTools.length > 0 ? foundTools.join(', ') : 'none'}. ` +
+    `Output preview: ${result ? result.substring(0, 200) + '...' : 'no output'}`
+  );
+}
+
+// Helper to print debug information when tests fail
+export function printDebugInfo(
+  rig: TestRig,
+  result: string,
+  context: Record<string, unknown> = {},
+) {
+  console.error('Test failed - Debug info:');
+  console.error('Result length:', result.length);
+  console.error('Result (first 500 chars):', result.substring(0, 500));
+  console.error(
+    'Result (last 500 chars):',
+    result.substring(result.length - 500),
+  );
+
+  // Print any additional context provided
+  Object.entries(context).forEach(([key, value]) => {
+    console.error(`${key}:`, value);
+  });
+
+  // Check what tools were actually called
+  const allTools = rig.readToolLogs();
+  console.error(
+    'All tool calls found:',
+    allTools.map((t) => t.toolRequest.name),
+  );
+
+  return allTools;
+}
+
+// Helper to validate model output and warn about unexpected content
+export function validateModelOutput(
+  result: string,
+  expectedContent: string | (string | RegExp)[] | null = null,
+  testName = '',
+) {
+  // First, check if there's any output at all (this should fail the test if missing)
+  if (!result || result.trim().length === 0) {
+    throw new Error('Expected LLM to return some output');
+  }
+
+  // If expectedContent is provided, check for it and warn if missing
+  if (expectedContent) {
+    const contents = Array.isArray(expectedContent)
+      ? expectedContent
+      : [expectedContent];
+    const missingContent = contents.filter((content) => {
+      if (typeof content === 'string') {
+        return !result.toLowerCase().includes(content.toLowerCase());
+      } else if (content instanceof RegExp) {
+        return !content.test(result);
+      }
+      return false;
+    });
+
+    if (missingContent.length > 0) {
+      console.warn(
+        `Warning: LLM did not include expected content in response: ${missingContent.join(
+          ', ',
+        )}.`,
+        'This is not ideal but not a test failure.',
+      );
+      console.warn(
+        'The tool was called successfully, which is the main requirement.',
+      );
+      console.warn('Expected content:', expectedContent);
+      console.warn('Actual output:', result);
+      return false;
+    } else if (env['VERBOSE'] === 'true') {
+      console.log(`${testName}: Model output validated successfully.`);
+    }
+    return true;
+  }
+
+  return true;
+}
+
+export interface ParsedLog {
+  attributes?: {
+    'event.name'?: string;
+    function_name?: string;
+    function_args?: string;
+    success?: boolean;
+    duration_ms?: number;
+    request_text?: string;
+    hook_event_name?: string;
+    hook_name?: string;
+    hook_input?: Record<string, unknown>;
+    hook_output?: Record<string, unknown>;
+    exit_code?: number;
+    stdout?: string;
+    stderr?: string;
+    error?: string;
+  };
+  scopeMetrics?: {
+    metrics: {
+      descriptor: {
+        name: string;
+      };
+    }[];
+  }[];
+}
+
+export class InteractiveRun {
+  ptyProcess: pty.IPty;
+  public output = '';
+
+  constructor(ptyProcess: pty.IPty) {
+    this.ptyProcess = ptyProcess;
+    ptyProcess.onData((data) => {
+      this.output += data;
+      if (env['KEEP_OUTPUT'] === 'true' || env['VERBOSE'] === 'true') {
+        process.stdout.write(data);
+      }
+    });
+  }
+
+  async expectText(text: string, timeout?: number) {
+    if (!timeout) {
+      timeout = getDefaultTimeout();
+    }
+    await poll(
+      () => stripAnsi(this.output).toLowerCase().includes(text.toLowerCase()),
+      timeout,
+      200,
+    );
+    expect(stripAnsi(this.output).toLowerCase()).toContain(text.toLowerCase());
+  }
+
+  // This types slowly to make sure command is correct, but only work for short
+  // commands that are not multi-line, use sendKeys to type long prompts
+  async type(text: string) {
+    let typedSoFar = '';
+    for (const char of text) {
+      if (char === '\r') {
+        // wait >30ms before `enter` to avoid fast return conversion
+        // from bufferFastReturn() in KeypressContent.tsx
+        await sleep(50);
+      }
+
+      this.ptyProcess.write(char);
+      typedSoFar += char;
+
+      // Wait for the typed sequence so far to be echoed back.
+      const found = await poll(
+        () => stripAnsi(this.output).includes(typedSoFar),
+        5000, // 5s timeout per character (generous for CI)
+        10, // check frequently
+      );
+
+      if (!found) {
+        throw new Error(
+          `Timed out waiting for typed text to appear in output: "${typedSoFar}".\nStripped output:\n${stripAnsi(
+            this.output,
+          )}`,
+        );
+      }
+    }
+  }
+
+  // Types an entire string at once, necessary for some things like commands
+  // but may run into paste detection issues for larger strings.
+  async sendText(text: string) {
+    this.ptyProcess.write(text);
+    await sleep(5);
+  }
+
+  // Simulates typing a string one character at a time to avoid paste detection.
+  async sendKeys(text: string) {
+    const delay = 5;
+    for (const char of text) {
+      this.ptyProcess.write(char);
+      await sleep(delay);
+    }
+  }
+
+  async kill() {
+    this.ptyProcess.kill();
+  }
+
+  expectExit(): Promise<number> {
+    return new Promise((resolve, reject) => {
+      const timer = setTimeout(
+        () =>
+          reject(
+            new Error(`Test timed out: process did not exit within a minute.`),
+          ),
+        60000,
+      );
+      this.ptyProcess.onExit(({ exitCode }) => {
+        clearTimeout(timer);
+        resolve(exitCode);
+      });
+    });
+  }
+}
+
+export class TestRig {
+  testDir: string | null = null;
+  homeDir: string | null = null;
+  testName?: string;
+  _lastRunStdout?: string;
+  // Path to the copied fake responses file for this test.
+  fakeResponsesPath?: string;
+  // Original fake responses file path for rewriting goldens in record mode.
+  originalFakeResponsesPath?: string;
+  private _interactiveRuns: InteractiveRun[] = [];
+  private _spawnedProcesses: ChildProcess[] = [];
+
+  setup(
+    testName: string,
+    options: {
+      settings?: Record<string, unknown>;
+      fakeResponsesPath?: string;
+    } = {},
+  ) {
+    this.testName = testName;
+    const sanitizedName = sanitizeTestName(testName);
+    const testFileDir =
+      env['INTEGRATION_TEST_FILE_DIR'] || join(os.tmpdir(), 'gemini-cli-tests');
+    this.testDir = join(testFileDir, sanitizedName);
+    this.homeDir = join(testFileDir, sanitizedName + '-home');
+    mkdirSync(this.testDir, { recursive: true });
+    mkdirSync(this.homeDir, { recursive: true });
+    if (options.fakeResponsesPath) {
+      this.fakeResponsesPath = join(this.testDir, 'fake-responses.json');
+      this.originalFakeResponsesPath = options.fakeResponsesPath;
+      if (process.env['REGENERATE_MODEL_GOLDENS'] !== 'true') {
+        fs.copyFileSync(options.fakeResponsesPath, this.fakeResponsesPath);
+      }
+    }
+
+    // Create a settings file to point the CLI to the local collector
+    this._createSettingsFile(options.settings);
+  }
+
+  private _createSettingsFile(overrideSettings?: Record<string, unknown>) {
+    const projectGeminiDir = join(this.testDir!, GEMINI_DIR);
+    mkdirSync(projectGeminiDir, { recursive: true });
+
+    // In sandbox mode, use an absolute path for telemetry inside the container
+    // The container mounts the test directory at the same path as the host
+    const telemetryPath = join(this.homeDir!, 'telemetry.log'); // Always use home directory for telemetry
+
+    const settings = {
+      general: {
+        // Nightly releases sometimes becomes out of sync with local code and
+        // triggers auto-update, which causes tests to fail.
+        disableAutoUpdate: true,
+        previewFeatures: false,
+      },
+      telemetry: {
+        enabled: true,
+        target: 'local',
+        otlpEndpoint: '',
+        outfile: telemetryPath,
+      },
+      security: {
+        auth: {
+          selectedType: 'gemini-api-key',
+        },
+      },
+      ui: {
+        useAlternateBuffer: true,
+      },
+      model: {
+        name: DEFAULT_GEMINI_MODEL,
+      },
+      sandbox:
+        env['GEMINI_SANDBOX'] !== 'false' ? env['GEMINI_SANDBOX'] : false,
+      // Don't show the IDE connection dialog when running from VsCode
+      ide: { enabled: false, hasSeenNudge: true },
+      ...overrideSettings, // Allow tests to override/add settings
+    };
+    writeFileSync(
+      join(projectGeminiDir, 'settings.json'),
+      JSON.stringify(settings, null, 2),
+    );
+  }
+
+  createFile(fileName: string, content: string) {
+    const filePath = join(this.testDir!, fileName);
+    writeFileSync(filePath, content);
+    return filePath;
+  }
+
+  mkdir(dir: string) {
+    mkdirSync(join(this.testDir!, dir), { recursive: true });
+  }
+
+  sync() {
+    if (os.platform() === 'win32') return;
+    // ensure file system is done before spawning
+    execSync('sync', { cwd: this.testDir! });
+  }
+
+  /**
+   * The command and args to use to invoke Gemini CLI. Allows us to switch
+   * between using the bundled gemini.js (the default) and using the installed
+   * 'gemini' (used to verify npm bundles).
+   */
+  private _getCommandAndArgs(extraInitialArgs: string[] = []): {
+    command: string;
+    initialArgs: string[];
+  } {
+    const isNpmReleaseTest =
+      env['INTEGRATION_TEST_USE_INSTALLED_GEMINI'] === 'true';
+    const command = isNpmReleaseTest ? 'gemini' : 'node';
+    const initialArgs = isNpmReleaseTest
+      ? extraInitialArgs
+      : [BUNDLE_PATH, ...extraInitialArgs];
+    if (this.fakeResponsesPath) {
+      if (process.env['REGENERATE_MODEL_GOLDENS'] === 'true') {
+        initialArgs.push('--record-responses', this.fakeResponsesPath);
+      } else {
+        initialArgs.push('--fake-responses', this.fakeResponsesPath);
+      }
+    }
+    return { command, initialArgs };
+  }
+
+  run(options: {
+    args?: string | string[];
+    stdin?: string;
+    stdinDoesNotEnd?: boolean;
+    yolo?: boolean;
+    timeout?: number;
+    env?: Record<string, string | undefined>;
+  }): Promise<string> {
+    const yolo = options.yolo !== false;
+    const { command, initialArgs } = this._getCommandAndArgs(
+      yolo ? ['--yolo'] : [],
+    );
+    const commandArgs = [...initialArgs];
+    const execOptions: {
+      cwd: string;
+      encoding: 'utf-8';
+      input?: string;
+    } = {
+      cwd: this.testDir!,
+      encoding: 'utf-8',
+    };
+
+    if (options.args) {
+      if (Array.isArray(options.args)) {
+        commandArgs.push(...options.args);
+      } else {
+        commandArgs.push(options.args);
+      }
+    }
+
+    if (options.stdin) {
+      execOptions.input = options.stdin;
+    }
+
+    const child = spawn(command, commandArgs, {
+      cwd: this.testDir!,
+      stdio: 'pipe',
+      env: {
+        ...process.env,
+        GEMINI_CLI_HOME: this.homeDir!,
+        ...options.env,
+      },
+    });
+    this._spawnedProcesses.push(child);
+
+    let stdout = '';
+    let stderr = '';
+
+    // Handle stdin if provided
+    if (execOptions.input) {
+      child.stdin!.write(execOptions.input);
+    }
+
+    if (!options.stdinDoesNotEnd) {
+      child.stdin!.end();
+    }
+
+    child.stdout!.setEncoding('utf8');
+    child.stdout!.on('data', (data: string) => {
+      stdout += data;
+      if (env['KEEP_OUTPUT'] === 'true' || env['VERBOSE'] === 'true') {
+        process.stdout.write(data);
+      }
+    });
+
+    child.stderr!.setEncoding('utf8');
+    child.stderr!.on('data', (data: string) => {
+      stderr += data;
+      if (env['KEEP_OUTPUT'] === 'true' || env['VERBOSE'] === 'true') {
+        process.stderr.write(data);
+      }
+    });
+
+    const timeout = options.timeout ?? 120000;
+    const promise = new Promise<string>((resolve, reject) => {
+      const timer = setTimeout(() => {
+        child.kill('SIGKILL');
+        reject(
+          new Error(
+            `Process timed out after ${timeout}ms.\nStdout:\n${stdout}\nStderr:\n${stderr}`,
+          ),
+        );
+      }, timeout);
+
+      child.on('error', (err) => {
+        clearTimeout(timer);
+        reject(err);
+      });
+
+      child.on('close', (code: number) => {
+        clearTimeout(timer);
+        if (code === 0) {
+          // Store the raw stdout for Podman telemetry parsing
+          this._lastRunStdout = stdout;
+
+          // Filter out telemetry output when running with Podman
+          const result = this._filterPodmanTelemetry(stdout);
+
+          // Check if this is a JSON output test - if so, don't include stderr
+          // as it would corrupt the JSON
+          const isJsonOutput =
+            commandArgs.includes('--output-format') &&
+            commandArgs.includes('json');
+
+          // If we have stderr output and it's not a JSON test, include that also
+          const finalResult =
+            stderr && !isJsonOutput
+              ? `${result}\n\nStdErr:\n${stderr}`
+              : result;
+
+          resolve(finalResult);
+        } else {
+          reject(new Error(`Process exited with code ${code}:\n${stderr}`));
+        }
+      });
+    });
+
+    return promise;
+  }
+
+  private _filterPodmanTelemetry(stdout: string): string {
+    if (env['GEMINI_SANDBOX'] !== 'podman') {
+      return stdout;
+    }
+
+    // Remove telemetry JSON objects from output
+    // They are multi-line JSON objects that start with { and contain telemetry fields
+    const lines = stdout.split(os.EOL);
+    const filteredLines = [];
+    let inTelemetryObject = false;
+    let braceDepth = 0;
+
+    for (const line of lines) {
+      if (!inTelemetryObject && line.trim() === '{') {
+        // Check if this might be start of telemetry object
+        inTelemetryObject = true;
+        braceDepth = 1;
+      } else if (inTelemetryObject) {
+        // Count braces to track nesting
+        for (const char of line) {
+          if (char === '{') braceDepth++;
+          else if (char === '}') braceDepth--;
+        }
+
+        // Check if we've closed all braces
+        if (braceDepth === 0) {
+          inTelemetryObject = false;
+          // Skip this line (the closing brace)
+          continue;
+        }
+      } else {
+        // Not in telemetry object, keep the line
+        filteredLines.push(line);
+      }
+    }
+
+    return filteredLines.join('\n');
+  }
+
+  runCommand(
+    args: string[],
+    options: {
+      stdin?: string;
+      timeout?: number;
+      env?: Record<string, string | undefined>;
+    } = {},
+  ): Promise<string> {
+    const { command, initialArgs } = this._getCommandAndArgs();
+    const commandArgs = [...initialArgs, ...args];
+
+    const child = spawn(command, commandArgs, {
+      cwd: this.testDir!,
+      stdio: 'pipe',
+      env: {
+        ...process.env,
+        GEMINI_CLI_HOME: this.homeDir!,
+        ...options.env,
+      },
+    });
+    this._spawnedProcesses.push(child);
+
+    let stdout = '';
+    let stderr = '';
+
+    if (options.stdin) {
+      child.stdin!.write(options.stdin);
+      child.stdin!.end();
+    }
+
+    child.stdout!.setEncoding('utf8');
+    child.stdout!.on('data', (data: string) => {
+      stdout += data;
+      if (env['KEEP_OUTPUT'] === 'true' || env['VERBOSE'] === 'true') {
+        process.stdout.write(data);
+      }
+    });
+
+    child.stderr!.setEncoding('utf8');
+    child.stderr!.on('data', (data: string) => {
+      stderr += data;
+      if (env['KEEP_OUTPUT'] === 'true' || env['VERBOSE'] === 'true') {
+        process.stderr.write(data);
+      }
+    });
+
+    const timeout = options.timeout ?? 120000;
+    const promise = new Promise<string>((resolve, reject) => {
+      const timer = setTimeout(() => {
+        child.kill('SIGKILL');
+        reject(
+          new Error(
+            `Process timed out after ${timeout}ms.\nStdout:\n${stdout}\nStderr:\n${stderr}`,
+          ),
+        );
+      }, timeout);
+
+      child.on('error', (err) => {
+        clearTimeout(timer);
+        reject(err);
+      });
+
+      child.on('close', (code: number) => {
+        clearTimeout(timer);
+        if (code === 0) {
+          this._lastRunStdout = stdout;
+          const result = this._filterPodmanTelemetry(stdout);
+
+          // Check if this is a JSON output test - if so, don't include stderr
+          // as it would corrupt the JSON
+          const isJsonOutput =
+            commandArgs.includes('--output-format') &&
+            commandArgs.includes('json');
+
+          const finalResult =
+            stderr && !isJsonOutput
+              ? `${result}\n\nStdErr:\n${stderr}`
+              : result;
+          resolve(finalResult);
+        } else {
+          reject(new Error(`Process exited with code ${code}:\n${stderr}`));
+        }
+      });
+    });
+
+    return promise;
+  }
+
+  readFile(fileName: string) {
+    const filePath = join(this.testDir!, fileName);
+    const content = readFileSync(filePath, 'utf-8');
+    if (env['KEEP_OUTPUT'] === 'true' || env['VERBOSE'] === 'true') {
+      console.log(`--- FILE: ${filePath} ---`);
+      console.log(content);
+      console.log(`--- END FILE: ${filePath} ---`);
+    }
+    return content;
+  }
+
+  async cleanup() {
+    // Kill any interactive runs that are still active
+    for (const run of this._interactiveRuns) {
+      try {
+        await run.kill();
+      } catch (error) {
+        if (env['VERBOSE'] === 'true') {
+          console.warn('Failed to kill interactive run during cleanup:', error);
+        }
+      }
+    }
+    this._interactiveRuns = [];
+
+    // Kill any other spawned processes that are still running
+    for (const child of this._spawnedProcesses) {
+      if (child.exitCode === null && child.signalCode === null) {
+        try {
+          child.kill('SIGKILL');
+        } catch (error) {
+          if (env['VERBOSE'] === 'true') {
+            console.warn(
+              'Failed to kill spawned process during cleanup:',
+              error,
+            );
+          }
+        }
+      }
+    }
+    this._spawnedProcesses = [];
+
+    if (
+      process.env['REGENERATE_MODEL_GOLDENS'] === 'true' &&
+      this.fakeResponsesPath
+    ) {
+      fs.copyFileSync(this.fakeResponsesPath, this.originalFakeResponsesPath!);
+    }
+    // Clean up test directory and home directory
+    if (this.testDir && !env['KEEP_OUTPUT']) {
+      try {
+        fs.rmSync(this.testDir, { recursive: true, force: true });
+      } catch (error) {
+        // Ignore cleanup errors
+        if (env['VERBOSE'] === 'true') {
+          console.warn('Cleanup warning:', (error as Error).message);
+        }
+      }
+    }
+    if (this.homeDir && !env['KEEP_OUTPUT']) {
+      try {
+        fs.rmSync(this.homeDir, { recursive: true, force: true });
+      } catch (error) {
+        // Ignore cleanup errors
+        if (env['VERBOSE'] === 'true') {
+          console.warn('Cleanup warning:', (error as Error).message);
+        }
+      }
+    }
+  }
+
+  async waitForTelemetryReady() {
+    // Telemetry is always written to the test directory
+    const logFilePath = join(this.homeDir!, 'telemetry.log');
+
+    if (!logFilePath) return;
+
+    // Wait for telemetry file to exist and have content
+    await poll(
+      () => {
+        if (!fs.existsSync(logFilePath)) return false;
+        try {
+          const content = readFileSync(logFilePath, 'utf-8');
+          // Check if file has meaningful content (at least one complete JSON object)
+          return content.includes('"scopeMetrics"');
+        } catch {
+          return false;
+        }
+      },
+      2000, // 2 seconds max - reduced since telemetry should flush on exit now
+      100, // check every 100ms
+    );
+  }
+
+  async waitForTelemetryEvent(eventName: string, timeout?: number) {
+    if (!timeout) {
+      timeout = getDefaultTimeout();
+    }
+
+    await this.waitForTelemetryReady();
+
+    return poll(
+      () => {
+        const logs = this._readAndParseTelemetryLog();
+        return logs.some(
+          (logData) =>
+            logData.attributes &&
+            logData.attributes['event.name'] === `gemini_cli.${eventName}`,
+        );
+      },
+      timeout,
+      100,
+    );
+  }
+
+  async waitForToolCall(
+    toolName: string,
+    timeout?: number,
+    matchArgs?: (args: string) => boolean,
+  ) {
+    // Use environment-specific timeout
+    if (!timeout) {
+      timeout = getDefaultTimeout();
+    }
+
+    // Wait for telemetry to be ready before polling for tool calls
+    await this.waitForTelemetryReady();
+
+    return poll(
+      () => {
+        const toolLogs = this.readToolLogs();
+        return toolLogs.some(
+          (log) =>
+            log.toolRequest.name === toolName &&
+            (matchArgs?.call(this, log.toolRequest.args) ?? true),
+        );
+      },
+      timeout,
+      100,
+    );
+  }
+
+  async expectToolCallSuccess(
+    toolNames: string[],
+    timeout?: number,
+    matchArgs?: (args: string) => boolean,
+  ) {
+    // Use environment-specific timeout
+    if (!timeout) {
+      timeout = getDefaultTimeout();
+    }
+
+    // Wait for telemetry to be ready before polling for tool calls
+    await this.waitForTelemetryReady();
+
+    const success = await poll(
+      () => {
+        const toolLogs = this.readToolLogs();
+        return toolNames.some((name) =>
+          toolLogs.some(
+            (log) =>
+              log.toolRequest.name === name &&
+              log.toolRequest.success &&
+              (matchArgs?.call(this, log.toolRequest.args) ?? true),
+          ),
+        );
+      },
+      timeout,
+      100,
+    );
+
+    expect(
+      success,
+      `Expected to find successful toolCalls for ${JSON.stringify(toolNames)}`,
+    ).toBe(true);
+  }
+
+  async waitForAnyToolCall(toolNames: string[], timeout?: number) {
+    if (!timeout) {
+      timeout = getDefaultTimeout();
+    }
+
+    // Wait for telemetry to be ready before polling for tool calls
+    await this.waitForTelemetryReady();
+
+    return poll(
+      () => {
+        const toolLogs = this.readToolLogs();
+        return toolNames.some((name) =>
+          toolLogs.some((log) => log.toolRequest.name === name),
+        );
+      },
+      timeout,
+      100,
+    );
+  }
+
+  _parseToolLogsFromStdout(stdout: string) {
+    const logs: {
+      timestamp: number;
+      toolRequest: {
+        name: string;
+        args: string;
+        success: boolean;
+        duration_ms: number;
+      };
+    }[] = [];
+
+    // The console output from Podman is JavaScript object notation, not JSON
+    // Look for tool call events in the output
+    // Updated regex to handle tool names with hyphens and underscores
+    const toolCallPattern =
+      /body:\s*'Tool call:\s*([\w-]+)\..*?Success:\s*(\w+)\..*?Duration:\s*(\d+)ms\.'/g;
+    const matches = [...stdout.matchAll(toolCallPattern)];
+
+    for (const match of matches) {
+      const toolName = match[1];
+      const success = match[2] === 'true';
+      const duration = parseInt(match[3], 10);
+
+      // Try to find function_args nearby
+      const matchIndex = match.index || 0;
+      const contextStart = Math.max(0, matchIndex - 500);
+      const contextEnd = Math.min(stdout.length, matchIndex + 500);
+      const context = stdout.substring(contextStart, contextEnd);
+
+      // Look for function_args in the context
+      let args = '{}';
+      const argsMatch = context.match(/function_args:\s*'([^']+)'/);
+      if (argsMatch) {
+        args = argsMatch[1];
+      }
+
+      // Also try to find function_name to double-check
+      // Updated regex to handle tool names with hyphens and underscores
+      const nameMatch = context.match(/function_name:\s*'([\w-]+)'/);
+      const actualToolName = nameMatch ? nameMatch[1] : toolName;
+
+      logs.push({
+        timestamp: Date.now(),
+        toolRequest: {
+          name: actualToolName,
+          args: args,
+          success: success,
+          duration_ms: duration,
+        },
+      });
+    }
+
+    // If no matches found with the simple pattern, try the JSON parsing approach
+    // in case the format changes
+    if (logs.length === 0) {
+      const lines = stdout.split(os.EOL);
+      let currentObject = '';
+      let inObject = false;
+      let braceDepth = 0;
+
+      for (const line of lines) {
+        if (!inObject && line.trim() === '{') {
+          inObject = true;
+          braceDepth = 1;
+          currentObject = line + '\n';
+        } else if (inObject) {
+          currentObject += line + '\n';
+
+          // Count braces
+          for (const char of line) {
+            if (char === '{') braceDepth++;
+            else if (char === '}') braceDepth--;
+          }
+
+          // If we've closed all braces, try to parse the object
+          if (braceDepth === 0) {
+            inObject = false;
+            try {
+              const obj = JSON.parse(currentObject);
+
+              // Check for tool call in different formats
+              if (
+                obj.body &&
+                obj.body.includes('Tool call:') &&
+                obj.attributes
+              ) {
+                const bodyMatch = obj.body.match(/Tool call: (\w+)\./);
+                if (bodyMatch) {
+                  logs.push({
+                    timestamp: obj.timestamp || Date.now(),
+                    toolRequest: {
+                      name: bodyMatch[1],
+                      args: obj.attributes.function_args || '{}',
+                      success: obj.attributes.success !== false,
+                      duration_ms: obj.attributes.duration_ms || 0,
+                    },
+                  });
+                }
+              } else if (
+                obj.attributes &&
+                obj.attributes['event.name'] === 'gemini_cli.tool_call'
+              ) {
+                logs.push({
+                  timestamp: obj.attributes['event.timestamp'],
+                  toolRequest: {
+                    name: obj.attributes.function_name,
+                    args: obj.attributes.function_args,
+                    success: obj.attributes.success,
+                    duration_ms: obj.attributes.duration_ms,
+                  },
+                });
+              }
+            } catch {
+              // Not valid JSON
+            }
+            currentObject = '';
+          }
+        }
+      }
+    }
+
+    return logs;
+  }
+
+  private _readAndParseTelemetryLog(): ParsedLog[] {
+    // Telemetry is always written to the test directory
+    const logFilePath = join(this.homeDir!, 'telemetry.log');
+
+    if (!logFilePath || !fs.existsSync(logFilePath)) {
+      return [];
+    }
+
+    const content = readFileSync(logFilePath, 'utf-8');
+
+    // Split the content into individual JSON objects
+    // They are separated by "}\n{"
+    const jsonObjects = content
+      .split(/}\n{/)
+      .map((obj, index, array) => {
+        // Add back the braces we removed during split
+        if (index > 0) obj = '{' + obj;
+        if (index < array.length - 1) obj = obj + '}';
+        return obj.trim();
+      })
+      .filter((obj) => obj);
+
+    const logs: ParsedLog[] = [];
+
+    for (const jsonStr of jsonObjects) {
+      try {
+        const logData = JSON.parse(jsonStr);
+        logs.push(logData);
+      } catch (e) {
+        // Skip objects that aren't valid JSON
+        if (env['VERBOSE'] === 'true') {
+          console.error('Failed to parse telemetry object:', e);
+        }
+      }
+    }
+
+    return logs;
+  }
+
+  readToolLogs() {
+    // For Podman, first check if telemetry file exists and has content
+    // If not, fall back to parsing from stdout
+    if (env['GEMINI_SANDBOX'] === 'podman') {
+      // Try reading from file first
+      const logFilePath = join(this.homeDir!, 'telemetry.log');
+
+      if (fs.existsSync(logFilePath)) {
+        try {
+          const content = readFileSync(logFilePath, 'utf-8');
+          if (content && content.includes('"event.name"')) {
+            // File has content, use normal file parsing
+            // Continue to the normal file parsing logic below
+          } else if (this._lastRunStdout) {
+            // File exists but is empty or doesn't have events, parse from stdout
+            return this._parseToolLogsFromStdout(this._lastRunStdout);
+          }
+        } catch {
+          // Error reading file, fall back to stdout
+          if (this._lastRunStdout) {
+            return this._parseToolLogsFromStdout(this._lastRunStdout);
+          }
+        }
+      } else if (this._lastRunStdout) {
+        // No file exists, parse from stdout
+        return this._parseToolLogsFromStdout(this._lastRunStdout);
+      }
+    }
+
+    const parsedLogs = this._readAndParseTelemetryLog();
+    const logs: {
+      toolRequest: {
+        name: string;
+        args: string;
+        success: boolean;
+        duration_ms: number;
+      };
+    }[] = [];
+
+    for (const logData of parsedLogs) {
+      // Look for tool call logs
+      if (
+        logData.attributes &&
+        logData.attributes['event.name'] === 'gemini_cli.tool_call'
+      ) {
+        const toolName = logData.attributes.function_name!;
+        logs.push({
+          toolRequest: {
+            name: toolName,
+            args: logData.attributes.function_args ?? '{}',
+            success: logData.attributes.success ?? false,
+            duration_ms: logData.attributes.duration_ms ?? 0,
+          },
+        });
+      }
+    }
+
+    return logs;
+  }
+
+  readAllApiRequest(): ParsedLog[] {
+    const logs = this._readAndParseTelemetryLog();
+    const apiRequests = logs.filter(
+      (logData) =>
+        logData.attributes &&
+        logData.attributes['event.name'] === `gemini_cli.api_request`,
+    );
+    return apiRequests;
+  }
+
+  readLastApiRequest(): ParsedLog | null {
+    const logs = this._readAndParseTelemetryLog();
+    const apiRequests = logs.filter(
+      (logData) =>
+        logData.attributes &&
+        logData.attributes['event.name'] === `gemini_cli.api_request`,
+    );
+    return apiRequests.pop() || null;
+  }
+
+  async waitForMetric(metricName: string, timeout?: number) {
+    await this.waitForTelemetryReady();
+
+    const fullName = metricName.startsWith('gemini_cli.')
+      ? metricName
+      : `gemini_cli.${metricName}`;
+
+    return poll(
+      () => {
+        const logs = this._readAndParseTelemetryLog();
+        for (const logData of logs) {
+          if (logData.scopeMetrics) {
+            for (const scopeMetric of logData.scopeMetrics) {
+              for (const metric of scopeMetric.metrics) {
+                if (metric.descriptor.name === fullName) {
+                  return true;
+                }
+              }
+            }
+          }
+        }
+        return false;
+      },
+      timeout ?? getDefaultTimeout(),
+      100,
+    );
+  }
+
+  readMetric(metricName: string): Record<string, unknown> | null {
+    const logs = this._readAndParseTelemetryLog();
+    for (const logData of logs) {
+      if (logData.scopeMetrics) {
+        for (const scopeMetric of logData.scopeMetrics) {
+          for (const metric of scopeMetric.metrics) {
+            if (metric.descriptor.name === `gemini_cli.${metricName}`) {
+              return metric;
+            }
+          }
+        }
+      }
+    }
+    return null;
+  }
+
+  async runInteractive(options?: {
+    args?: string | string[];
+    yolo?: boolean;
+    env?: Record<string, string | undefined>;
+  }): Promise<InteractiveRun> {
+    const yolo = options?.yolo !== false;
+    const { command, initialArgs } = this._getCommandAndArgs(
+      yolo ? ['--yolo'] : [],
+    );
+    const commandArgs = [...initialArgs];
+
+    const envVars = {
+      ...process.env,
+      GEMINI_CLI_HOME: this.homeDir!,
+      ...options?.env,
+    };
+
+    const ptyOptions: pty.IPtyForkOptions = {
+      name: 'xterm-color',
+      cols: 80,
+      rows: 80,
+      cwd: this.testDir!,
+      env: Object.fromEntries(
+        Object.entries(envVars).filter(([, v]) => v !== undefined),
+      ) as { [key: string]: string },
+    };
+
+    const executable = command === 'node' ? process.execPath : command;
+    const ptyProcess = pty.spawn(executable, commandArgs, ptyOptions);
+
+    const run = new InteractiveRun(ptyProcess);
+    this._interactiveRuns.push(run);
+    // Wait for the app to be ready
+    await run.expectText('  Type your message or @path/to/file', 30000);
+    return run;
+  }
+
+  readHookLogs() {
+    const parsedLogs = this._readAndParseTelemetryLog();
+    const logs: {
+      hookCall: {
+        hook_event_name: string;
+        hook_name: string;
+        hook_input: Record<string, unknown>;
+        hook_output: Record<string, unknown>;
+        exit_code: number;
+        stdout: string;
+        stderr: string;
+        duration_ms: number;
+        success: boolean;
+        error: string;
+      };
+    }[] = [];
+
+    for (const logData of parsedLogs) {
+      // Look for tool call logs
+      if (
+        logData.attributes &&
+        logData.attributes['event.name'] === 'gemini_cli.hook_call'
+      ) {
+        logs.push({
+          hookCall: {
+            hook_event_name: logData.attributes.hook_event_name ?? '',
+            hook_name: logData.attributes.hook_name ?? '',
+            hook_input: logData.attributes.hook_input ?? {},
+            hook_output: logData.attributes.hook_output ?? {},
+            exit_code: logData.attributes.exit_code ?? 0,
+            stdout: logData.attributes.stdout ?? '',
+            stderr: logData.attributes.stderr ?? '',
+            duration_ms: logData.attributes.duration_ms ?? 0,
+            success: logData.attributes.success ?? false,
+            error: logData.attributes.error ?? '',
+          },
+        });
+      }
+    }
+
+    return logs;
+  }
+
+  async pollCommand(
+    commandFn: () => Promise<void>,
+    predicateFn: () => boolean,
+    timeout: number = 30000,
+    interval: number = 1000,
+  ) {
+    const startTime = Date.now();
+    while (Date.now() - startTime < timeout) {
+      await commandFn();
+      // Give it a moment to process
+      await sleep(500);
+      if (predicateFn()) {
+        return;
+      }
+      await sleep(interval);
+    }
+    throw new Error(`pollCommand timed out after ${timeout}ms`);
+  }
+}