diff --git a/.github/workflows/chained_e2e.yml b/.github/workflows/chained_e2e.yml index 494163966e..487225d452 100644 --- a/.github/workflows/chained_e2e.yml +++ b/.github/workflows/chained_e2e.yml @@ -277,6 +277,37 @@ jobs: shell: 'pwsh' run: 'npm run test:integration:sandbox:none' + evals: + name: 'Evals (ALWAYS_PASSING)' + needs: + - 'merge_queue_skipper' + - 'parse_run_context' + runs-on: 'gemini-cli-ubuntu-16-core' + if: | + always() && (needs.merge_queue_skipper.result !='success' || needs.merge_queue_skipper.outputs.skip != 'true') + steps: + - name: 'Checkout' + uses: 'actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955' # ratchet:actions/checkout@v5 + with: + ref: '${{ needs.parse_run_context.outputs.sha }}' + repository: '${{ needs.parse_run_context.outputs.repository }}' + + - name: 'Set up Node.js 20.x' + uses: 'actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020' # ratchet:actions-node@v4 + with: + node-version: '20.x' + + - name: 'Install dependencies' + run: 'npm ci' + + - name: 'Build project' + run: 'npm run build' + + - name: 'Run Evals (Required to pass)' + env: + GEMINI_API_KEY: '${{ secrets.GEMINI_API_KEY }}' + run: 'npm run test:always_passing_evals' + e2e: name: 'E2E' if: | @@ -284,13 +315,15 @@ jobs: needs: - 'e2e_linux' - 'e2e_mac' + - 'evals' - 'merge_queue_skipper' runs-on: 'gemini-cli-ubuntu-16-core' steps: - name: 'Check E2E test results' run: | if [[ ${{ needs.e2e_linux.result }} != 'success' || \ - ${{ needs.e2e_mac.result }} != 'success' ]]; then + ${{ needs.e2e_mac.result }} != 'success' || \ + ${{ needs.evals.result }} != 'success' ]]; then echo "One or more E2E jobs failed." exit 1 fi diff --git a/.github/workflows/evals-nightly.yml b/.github/workflows/evals-nightly.yml new file mode 100644 index 0000000000..6d44de7c12 --- /dev/null +++ b/.github/workflows/evals-nightly.yml @@ -0,0 +1,41 @@ +name: 'Evals: Nightly' + +on: + schedule: + - cron: '0 1 * * *' # Runs at 1 AM every day + workflow_dispatch: + inputs: + run_all: + description: 'Run all evaluations (including usually passing)' + type: 'boolean' + default: true + +permissions: + contents: 'read' + checks: 'write' + +jobs: + evals: + name: 'Evals (USUALLY_PASSING) nightly run' + runs-on: 'gemini-cli-ubuntu-16-core' + steps: + - name: 'Checkout' + uses: 'actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8' # ratchet:actions/checkout@v5 + + - name: 'Set up Node.js' + uses: 'actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020' # ratchet:actions/setup-node@v4 + with: + node-version-file: '.nvmrc' + cache: 'npm' + + - name: 'Install dependencies' + run: 'npm ci' + + - name: 'Build project' + run: 'npm run build' + + - name: 'Run Evals' + env: + GEMINI_API_KEY: '${{ secrets.GEMINI_API_KEY }}' + RUN_EVALS: "${{ github.event.inputs.run_all != 'false' }}" + run: 'npm run test:all_evals' diff --git a/.gitignore b/.gitignore index bfb2b5e576..5128952039 100644 --- a/.gitignore +++ b/.gitignore @@ -59,3 +59,4 @@ patch_output.log .genkit .gemini-clipboard/ .eslintcache +evals/logs/ diff --git a/eslint.config.js b/eslint.config.js index c2d0d3b69b..0f20eeab42 100644 --- a/eslint.config.js +++ b/eslint.config.js @@ -35,6 +35,8 @@ export default tseslint.config( 'package/bundle/**', '.integration-tests/**', 'dist/**', + 'evals/**', + 'packages/test-utils/**', ], }, eslint.configs.recommended, diff --git a/evals/README.md b/evals/README.md new file mode 100644 index 0000000000..a339af842f --- /dev/null +++ b/evals/README.md @@ -0,0 +1,102 @@ +# Behavioral Evals + +Behavioral evaluations (evals) are tests designed to validate the agent's +behavior in response to specific prompts. They serve as a critical feedback loop +for changes to system prompts, tool definitions, and other model-steering +mechanisms. + +## Why Behavioral Evals? + +Unlike traditional **integration tests** which verify that the system functions +correctly (e.g., "does the file writer actually write to disk?"), behavioral +evals verify that the model _chooses_ to take the correct action (e.g., "does +the model decide to write to disk when asked to save code?"). + +They are also distinct from broad **industry benchmarks** (like SWE-bench). +While benchmarks measure general capabilities across complex challenges, our +behavioral evals focus on specific, granular behaviors relevant to the Gemini +CLI's features. + +### Key Characteristics + +- **Feedback Loop**: They help us understand how changes to prompts or tools + affect the model's decision-making. + - _Did a change to the system prompt make the model less likely to use tool + X?_ + - _Did a new tool definition confuse the model?_ +- **Regression Testing**: They prevent regressions in model steering. +- **Non-Determinism**: Unlike unit tests, LLM behavior can be non-deterministic. + We distinguish between behaviors that should be robust (`ALWAYS_PASSES`) and + those that are generally reliable but might occasionally vary + (`USUALLY_PASSES`). + +## Creating an Evaluation + +Evaluations are located in the `evals` directory. Each evaluation is a Vitest +test file that uses the `evalTest` function from `evals/test-helper.ts`. + +### `evalTest` + +The `evalTest` function is a helper that runs a single evaluation case. It takes +two arguments: + +1. `policy`: The consistency expectation for this test (`'ALWAYS_PASSES'` or + `'USUALLY_PASSES'`). +2. `evalCase`: An object defining the test case. + +#### Policies + +- `ALWAYS_PASSES`: Tests expected to pass 100% of the time. These are typically + trivial and test basic functionality. These run in every CI. +- `USUALLY_PASSES`: Tests expected to pass most of the time but may have some + flakiness due to non-deterministic behaviors. These are run nightly and used + to track the health of the product from build to build. + +#### `EvalCase` Properties + +- `name`: The name of the evaluation case. +- `prompt`: The prompt to send to the model. +- `params`: An optional object with parameters to pass to the test rig (e.g., + settings). +- `assert`: An async function that takes the test rig and the result of the run + and asserts that the result is correct. +- `log`: An optional boolean that, if set to `true`, will log the tool calls to + a file in the `evals/logs` directory. + +### Example + +```typescript +import { describe, expect } from 'vitest'; +import { evalTest } from './test-helper.js'; + +describe('my_feature', () => { + evalTest('ALWAYS_PASSES', { + name: 'should do something', + prompt: 'do it', + assert: async (rig, result) => { + // assertions + }, + }); +}); +``` + +## Running Evaluations + +### Always Passing Evals + +To run the evaluations that are expected to always pass (CI safe): + +```bash +npm run test:always_passing_evals +``` + +### All Evals + +To run all evaluations, including those that may be flaky ("usually passes"): + +```bash +npm run test:all_evals +``` + +This command sets the `RUN_EVALS` environment variable to `1`, which enables the +`USUALLY_PASSES` tests. diff --git a/evals/save_memory.eval.ts b/evals/save_memory.eval.ts new file mode 100644 index 0000000000..a64f21798a --- /dev/null +++ b/evals/save_memory.eval.ts @@ -0,0 +1,31 @@ +/** + * @license + * Copyright 2025 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { describe, expect } from 'vitest'; +import { evalTest } from './test-helper.js'; +import { validateModelOutput } from '../integration-tests/test-helper.js'; + +describe('save_memory', () => { + evalTest('ALWAYS_PASSES', { + name: 'should be able to save to memory', + log: true, + params: { + settings: { tools: { core: ['save_memory'] } }, + }, + prompt: `remember that my favorite color is blue. + + what is my favorite color? tell me that and surround it with $ symbol`, + assert: async (rig, result) => { + const foundToolCall = await rig.waitForToolCall('save_memory'); + expect( + foundToolCall, + 'Expected to find a save_memory tool call', + ).toBeTruthy(); + + validateModelOutput(result, 'blue', 'Save memory test'); + }, + }); +}); diff --git a/evals/test-helper.ts b/evals/test-helper.ts new file mode 100644 index 0000000000..f394521d1e --- /dev/null +++ b/evals/test-helper.ts @@ -0,0 +1,70 @@ +/** + * @license + * Copyright 2025 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { it } from 'vitest'; +import fs from 'node:fs'; +import { TestRig } from '@google/gemini-cli-test-utils'; + +export * from '@google/gemini-cli-test-utils'; + +// Indicates the consistency expectation for this test. +// - ALWAYS_PASSES - Means that the test is expected to pass 100% of the time. These +// These tests are typically trivial and test basic functionality with unambiguous +// prompts. For example: "call save_memory to remember foo" should be fairly reliable. +// These are the first line of defense against regressions in key behaviors and run in +// every CI. You can run these locally with 'npm run test:always_passing_evals'. +// +// - USUALLY_PASSES - Means that the test is expected to pass most of the time but +// may have some flakiness as a result of relying on non-deterministic prompted +// behaviors and/or ambiguous prompts or complex tasks. +// For example: "Please do build changes until the very end" --> ambiguous whether +// the agent should add to memory without more explicit system prompt or user +// instructions. There are many more of these tests and they may pass less consistently. +// The pass/fail trendline of this set of tests can be used as a general measure +// of product quality. You can run these locally with 'npm run test:all_evals'. +// This may take a really long time and is not recommended. +export type EvalPolicy = 'ALWAYS_PASSES' | 'USUALLY_PASSES'; + +export function evalTest(policy: EvalPolicy, evalCase: EvalCase) { + const fn = async () => { + const rig = new TestRig(); + try { + await rig.setup(evalCase.name, evalCase.params); + const result = await rig.run({ args: evalCase.prompt }); + await evalCase.assert(rig, result); + } finally { + if (evalCase.log) { + await logToFile( + evalCase.name, + JSON.stringify(rig.readToolLogs(), null, 2), + ); + } + await rig.cleanup(); + } + }; + + if (policy === 'USUALLY_PASSES' && !process.env.RUN_EVALS) { + it.skip(evalCase.name, fn); + } else { + it(evalCase.name, fn); + } +} + +export interface EvalCase { + name: string; + params?: Record; + prompt: string; + assert: (rig: TestRig, result: string) => Promise; + log?: boolean; +} + +async function logToFile(name: string, content: string) { + const logDir = 'evals/logs'; + await fs.promises.mkdir(logDir, { recursive: true }); + const sanitizedName = name.replace(/[^a-z0-9]/gi, '_').toLowerCase(); + const logFile = `${logDir}/${sanitizedName}.log`; + await fs.promises.writeFile(logFile, content); +} diff --git a/evals/vitest.config.ts b/evals/vitest.config.ts new file mode 100644 index 0000000000..8476b638ff --- /dev/null +++ b/evals/vitest.config.ts @@ -0,0 +1,15 @@ +/** + * @license + * Copyright 2025 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { defineConfig } from 'vitest/config'; + +export default defineConfig({ + test: { + testTimeout: 300000, // 5 minutes + reporters: ['default'], + include: ['**/*.eval.ts'], + }, +}); diff --git a/integration-tests/save_memory.test.ts b/integration-tests/save_memory.test.ts deleted file mode 100644 index 38b4d060fa..0000000000 --- a/integration-tests/save_memory.test.ts +++ /dev/null @@ -1,54 +0,0 @@ -/** - * @license - * Copyright 2025 Google LLC - * SPDX-License-Identifier: Apache-2.0 - */ - -import { describe, it, expect, beforeEach, afterEach } from 'vitest'; -import { TestRig, printDebugInfo, validateModelOutput } from './test-helper.js'; - -describe('save_memory', () => { - let rig: TestRig; - - beforeEach(() => { - rig = new TestRig(); - }); - - afterEach(async () => await rig.cleanup()); - - it('should be able to save to memory', async () => { - await rig.setup('should be able to save to memory', { - settings: { tools: { core: ['save_memory'] } }, - }); - - const prompt = `remember that my favorite color is blue. - - what is my favorite color? tell me that and surround it with $ symbol`; - const result = await rig.run({ args: prompt }); - - const foundToolCall = await rig.waitForToolCall('save_memory'); - - // Add debugging information - if (!foundToolCall || !result.toLowerCase().includes('blue')) { - const allTools = printDebugInfo(rig, result, { - 'Found tool call': foundToolCall, - 'Contains blue': result.toLowerCase().includes('blue'), - }); - - console.error( - 'Memory tool calls:', - allTools - .filter((t) => t.toolRequest.name === 'save_memory') - .map((t) => t.toolRequest.args), - ); - } - - expect( - foundToolCall, - 'Expected to find a save_memory tool call', - ).toBeTruthy(); - - // Validate model output - will throw if no output, warn if missing expected content - validateModelOutput(result, 'blue', 'Save memory test'); - }); -}); diff --git a/integration-tests/test-helper.ts b/integration-tests/test-helper.ts index 9a2a6cefca..a13f260c4b 100644 --- a/integration-tests/test-helper.ts +++ b/integration-tests/test-helper.ts @@ -4,1225 +4,4 @@ * SPDX-License-Identifier: Apache-2.0 */ -import { expect } from 'vitest'; -import { execSync, spawn, type ChildProcess } from 'node:child_process'; -import { mkdirSync, writeFileSync, readFileSync } from 'node:fs'; -import { join, dirname } from 'node:path'; -import { fileURLToPath } from 'node:url'; -import { env } from 'node:process'; -import { setTimeout as sleep } from 'node:timers/promises'; -import { DEFAULT_GEMINI_MODEL } from '../packages/core/src/config/models.js'; -import fs from 'node:fs'; -import * as pty from '@lydell/node-pty'; -import stripAnsi from 'strip-ansi'; -import * as os from 'node:os'; -import { GEMINI_DIR } from '../packages/core/src/utils/paths.js'; - -const __dirname = dirname(fileURLToPath(import.meta.url)); -const BUNDLE_PATH = join(__dirname, '..', 'bundle/gemini.js'); - -// Get timeout based on environment -function getDefaultTimeout() { - if (env['CI']) return 60000; // 1 minute in CI - if (env['GEMINI_SANDBOX']) return 30000; // 30s in containers - return 15000; // 15s locally -} - -export async function poll( - predicate: () => boolean, - timeout: number, - interval: number, -): Promise { - const startTime = Date.now(); - let attempts = 0; - while (Date.now() - startTime < timeout) { - attempts++; - const result = predicate(); - if (env['VERBOSE'] === 'true' && attempts % 5 === 0) { - console.log( - `Poll attempt ${attempts}: ${result ? 'success' : 'waiting...'}`, - ); - } - if (result) { - return true; - } - await sleep(interval); - } - if (env['VERBOSE'] === 'true') { - console.log(`Poll timed out after ${attempts} attempts`); - } - return false; -} - -function sanitizeTestName(name: string) { - return name - .toLowerCase() - .replace(/[^a-z0-9]/g, '-') - .replace(/-+/g, '-'); -} - -// Helper to create detailed error messages -export function createToolCallErrorMessage( - expectedTools: string | string[], - foundTools: string[], - result: string, -) { - const expectedStr = Array.isArray(expectedTools) - ? expectedTools.join(' or ') - : expectedTools; - return ( - `Expected to find ${expectedStr} tool call(s). ` + - `Found: ${foundTools.length > 0 ? foundTools.join(', ') : 'none'}. ` + - `Output preview: ${result ? result.substring(0, 200) + '...' : 'no output'}` - ); -} - -// Helper to print debug information when tests fail -export function printDebugInfo( - rig: TestRig, - result: string, - context: Record = {}, -) { - console.error('Test failed - Debug info:'); - console.error('Result length:', result.length); - console.error('Result (first 500 chars):', result.substring(0, 500)); - console.error( - 'Result (last 500 chars):', - result.substring(result.length - 500), - ); - - // Print any additional context provided - Object.entries(context).forEach(([key, value]) => { - console.error(`${key}:`, value); - }); - - // Check what tools were actually called - const allTools = rig.readToolLogs(); - console.error( - 'All tool calls found:', - allTools.map((t) => t.toolRequest.name), - ); - - return allTools; -} - -// Helper to validate model output and warn about unexpected content -export function validateModelOutput( - result: string, - expectedContent: string | (string | RegExp)[] | null = null, - testName = '', -) { - // First, check if there's any output at all (this should fail the test if missing) - if (!result || result.trim().length === 0) { - throw new Error('Expected LLM to return some output'); - } - - // If expectedContent is provided, check for it and warn if missing - if (expectedContent) { - const contents = Array.isArray(expectedContent) - ? expectedContent - : [expectedContent]; - const missingContent = contents.filter((content) => { - if (typeof content === 'string') { - return !result.toLowerCase().includes(content.toLowerCase()); - } else if (content instanceof RegExp) { - return !content.test(result); - } - return false; - }); - - if (missingContent.length > 0) { - console.warn( - `Warning: LLM did not include expected content in response: ${missingContent.join( - ', ', - )}.`, - 'This is not ideal but not a test failure.', - ); - console.warn( - 'The tool was called successfully, which is the main requirement.', - ); - console.warn('Expected content:', expectedContent); - console.warn('Actual output:', result); - return false; - } else if (env['VERBOSE'] === 'true') { - console.log(`${testName}: Model output validated successfully.`); - } - return true; - } - - return true; -} - -interface ParsedLog { - attributes?: { - 'event.name'?: string; - function_name?: string; - function_args?: string; - success?: boolean; - duration_ms?: number; - request_text?: string; - hook_event_name?: string; - hook_name?: string; - hook_input?: Record; - hook_output?: Record; - exit_code?: number; - stdout?: string; - stderr?: string; - error?: string; - }; - scopeMetrics?: { - metrics: { - descriptor: { - name: string; - }; - }[]; - }[]; -} - -export class InteractiveRun { - ptyProcess: pty.IPty; - public output = ''; - - constructor(ptyProcess: pty.IPty) { - this.ptyProcess = ptyProcess; - ptyProcess.onData((data) => { - this.output += data; - if (env['KEEP_OUTPUT'] === 'true' || env['VERBOSE'] === 'true') { - process.stdout.write(data); - } - }); - } - - async expectText(text: string, timeout?: number) { - if (!timeout) { - timeout = getDefaultTimeout(); - } - await poll( - () => stripAnsi(this.output).toLowerCase().includes(text.toLowerCase()), - timeout, - 200, - ); - expect(stripAnsi(this.output).toLowerCase()).toContain(text.toLowerCase()); - } - - // This types slowly to make sure command is correct, but only work for short - // commands that are not multi-line, use sendKeys to type long prompts - async type(text: string) { - let typedSoFar = ''; - for (const char of text) { - if (char === '\r') { - // wait >30ms before `enter` to avoid fast return conversion - // from bufferFastReturn() in KeypressContent.tsx - await sleep(50); - } - - this.ptyProcess.write(char); - typedSoFar += char; - - // Wait for the typed sequence so far to be echoed back. - const found = await poll( - () => stripAnsi(this.output).includes(typedSoFar), - 5000, // 5s timeout per character (generous for CI) - 10, // check frequently - ); - - if (!found) { - throw new Error( - `Timed out waiting for typed text to appear in output: "${typedSoFar}".\nStripped output:\n${stripAnsi( - this.output, - )}`, - ); - } - } - } - - // Types an entire string at once, necessary for some things like commands - // but may run into paste detection issues for larger strings. - async sendText(text: string) { - this.ptyProcess.write(text); - await sleep(5); - } - - // Simulates typing a string one character at a time to avoid paste detection. - async sendKeys(text: string) { - const delay = 5; - for (const char of text) { - this.ptyProcess.write(char); - await sleep(delay); - } - } - - async kill() { - this.ptyProcess.kill(); - } - - expectExit(): Promise { - return new Promise((resolve, reject) => { - const timer = setTimeout( - () => - reject( - new Error(`Test timed out: process did not exit within a minute.`), - ), - 60000, - ); - this.ptyProcess.onExit(({ exitCode }) => { - clearTimeout(timer); - resolve(exitCode); - }); - }); - } -} - -export class TestRig { - testDir: string | null = null; - homeDir: string | null = null; - testName?: string; - _lastRunStdout?: string; - // Path to the copied fake responses file for this test. - fakeResponsesPath?: string; - // Original fake responses file path for rewriting goldens in record mode. - originalFakeResponsesPath?: string; - private _interactiveRuns: InteractiveRun[] = []; - private _spawnedProcesses: ChildProcess[] = []; - - setup( - testName: string, - options: { - settings?: Record; - fakeResponsesPath?: string; - } = {}, - ) { - this.testName = testName; - const sanitizedName = sanitizeTestName(testName); - const testFileDir = - env['INTEGRATION_TEST_FILE_DIR'] || join(os.tmpdir(), 'gemini-cli-tests'); - this.testDir = join(testFileDir, sanitizedName); - this.homeDir = join(testFileDir, sanitizedName + '-home'); - mkdirSync(this.testDir, { recursive: true }); - mkdirSync(this.homeDir, { recursive: true }); - if (options.fakeResponsesPath) { - this.fakeResponsesPath = join(this.testDir, 'fake-responses.json'); - this.originalFakeResponsesPath = options.fakeResponsesPath; - if (process.env['REGENERATE_MODEL_GOLDENS'] !== 'true') { - fs.copyFileSync(options.fakeResponsesPath, this.fakeResponsesPath); - } - } - - // Create a settings file to point the CLI to the local collector - this._createSettingsFile(options.settings); - } - - private _createSettingsFile(overrideSettings?: Record) { - const projectGeminiDir = join(this.testDir!, GEMINI_DIR); - mkdirSync(projectGeminiDir, { recursive: true }); - - // In sandbox mode, use an absolute path for telemetry inside the container - // The container mounts the test directory at the same path as the host - const telemetryPath = join(this.homeDir!, 'telemetry.log'); // Always use home directory for telemetry - - const settings = { - general: { - // Nightly releases sometimes becomes out of sync with local code and - // triggers auto-update, which causes tests to fail. - disableAutoUpdate: true, - previewFeatures: false, - }, - telemetry: { - enabled: true, - target: 'local', - otlpEndpoint: '', - outfile: telemetryPath, - }, - security: { - auth: { - selectedType: 'gemini-api-key', - }, - }, - ui: { - useAlternateBuffer: true, - }, - model: { - name: DEFAULT_GEMINI_MODEL, - }, - sandbox: - env['GEMINI_SANDBOX'] !== 'false' ? env['GEMINI_SANDBOX'] : false, - // Don't show the IDE connection dialog when running from VsCode - ide: { enabled: false, hasSeenNudge: true }, - ...overrideSettings, // Allow tests to override/add settings - }; - writeFileSync( - join(projectGeminiDir, 'settings.json'), - JSON.stringify(settings, null, 2), - ); - } - - createFile(fileName: string, content: string) { - const filePath = join(this.testDir!, fileName); - writeFileSync(filePath, content); - return filePath; - } - - mkdir(dir: string) { - mkdirSync(join(this.testDir!, dir), { recursive: true }); - } - - sync() { - if (os.platform() === 'win32') return; - // ensure file system is done before spawning - execSync('sync', { cwd: this.testDir! }); - } - - /** - * The command and args to use to invoke Gemini CLI. Allows us to switch - * between using the bundled gemini.js (the default) and using the installed - * 'gemini' (used to verify npm bundles). - */ - private _getCommandAndArgs(extraInitialArgs: string[] = []): { - command: string; - initialArgs: string[]; - } { - const isNpmReleaseTest = - env['INTEGRATION_TEST_USE_INSTALLED_GEMINI'] === 'true'; - const command = isNpmReleaseTest ? 'gemini' : 'node'; - const initialArgs = isNpmReleaseTest - ? extraInitialArgs - : [BUNDLE_PATH, ...extraInitialArgs]; - if (this.fakeResponsesPath) { - if (process.env['REGENERATE_MODEL_GOLDENS'] === 'true') { - initialArgs.push('--record-responses', this.fakeResponsesPath); - } else { - initialArgs.push('--fake-responses', this.fakeResponsesPath); - } - } - return { command, initialArgs }; - } - - run(options: { - args?: string | string[]; - stdin?: string; - stdinDoesNotEnd?: boolean; - yolo?: boolean; - timeout?: number; - env?: Record; - }): Promise { - const yolo = options.yolo !== false; - const { command, initialArgs } = this._getCommandAndArgs( - yolo ? ['--yolo'] : [], - ); - const commandArgs = [...initialArgs]; - const execOptions: { - cwd: string; - encoding: 'utf-8'; - input?: string; - } = { - cwd: this.testDir!, - encoding: 'utf-8', - }; - - if (options.args) { - if (Array.isArray(options.args)) { - commandArgs.push(...options.args); - } else { - commandArgs.push(options.args); - } - } - - if (options.stdin) { - execOptions.input = options.stdin; - } - - const child = spawn(command, commandArgs, { - cwd: this.testDir!, - stdio: 'pipe', - env: { - ...process.env, - GEMINI_CLI_HOME: this.homeDir!, - ...options.env, - }, - }); - this._spawnedProcesses.push(child); - - let stdout = ''; - let stderr = ''; - - // Handle stdin if provided - if (execOptions.input) { - child.stdin!.write(execOptions.input); - } - - if (!options.stdinDoesNotEnd) { - child.stdin!.end(); - } - - child.stdout!.setEncoding('utf8'); - child.stdout!.on('data', (data: string) => { - stdout += data; - if (env['KEEP_OUTPUT'] === 'true' || env['VERBOSE'] === 'true') { - process.stdout.write(data); - } - }); - - child.stderr!.setEncoding('utf8'); - child.stderr!.on('data', (data: string) => { - stderr += data; - if (env['KEEP_OUTPUT'] === 'true' || env['VERBOSE'] === 'true') { - process.stderr.write(data); - } - }); - - const timeout = options.timeout ?? 120000; - const promise = new Promise((resolve, reject) => { - const timer = setTimeout(() => { - child.kill('SIGKILL'); - reject( - new Error( - `Process timed out after ${timeout}ms.\nStdout:\n${stdout}\nStderr:\n${stderr}`, - ), - ); - }, timeout); - - child.on('error', (err) => { - clearTimeout(timer); - reject(err); - }); - - child.on('close', (code: number) => { - clearTimeout(timer); - if (code === 0) { - // Store the raw stdout for Podman telemetry parsing - this._lastRunStdout = stdout; - - // Filter out telemetry output when running with Podman - const result = this._filterPodmanTelemetry(stdout); - - // Check if this is a JSON output test - if so, don't include stderr - // as it would corrupt the JSON - const isJsonOutput = - commandArgs.includes('--output-format') && - commandArgs.includes('json'); - - // If we have stderr output and it's not a JSON test, include that also - const finalResult = - stderr && !isJsonOutput - ? `${result}\n\nStdErr:\n${stderr}` - : result; - - resolve(finalResult); - } else { - reject(new Error(`Process exited with code ${code}:\n${stderr}`)); - } - }); - }); - - return promise; - } - - private _filterPodmanTelemetry(stdout: string): string { - if (env['GEMINI_SANDBOX'] !== 'podman') { - return stdout; - } - - // Remove telemetry JSON objects from output - // They are multi-line JSON objects that start with { and contain telemetry fields - const lines = stdout.split(os.EOL); - const filteredLines = []; - let inTelemetryObject = false; - let braceDepth = 0; - - for (const line of lines) { - if (!inTelemetryObject && line.trim() === '{') { - // Check if this might be start of telemetry object - inTelemetryObject = true; - braceDepth = 1; - } else if (inTelemetryObject) { - // Count braces to track nesting - for (const char of line) { - if (char === '{') braceDepth++; - else if (char === '}') braceDepth--; - } - - // Check if we've closed all braces - if (braceDepth === 0) { - inTelemetryObject = false; - // Skip this line (the closing brace) - continue; - } - } else { - // Not in telemetry object, keep the line - filteredLines.push(line); - } - } - - return filteredLines.join('\n'); - } - - runCommand( - args: string[], - options: { - stdin?: string; - timeout?: number; - env?: Record; - } = {}, - ): Promise { - const { command, initialArgs } = this._getCommandAndArgs(); - const commandArgs = [...initialArgs, ...args]; - - const child = spawn(command, commandArgs, { - cwd: this.testDir!, - stdio: 'pipe', - env: { - ...process.env, - GEMINI_CLI_HOME: this.homeDir!, - ...options.env, - }, - }); - this._spawnedProcesses.push(child); - - let stdout = ''; - let stderr = ''; - - if (options.stdin) { - child.stdin!.write(options.stdin); - child.stdin!.end(); - } - - child.stdout!.setEncoding('utf8'); - child.stdout!.on('data', (data: string) => { - stdout += data; - if (env['KEEP_OUTPUT'] === 'true' || env['VERBOSE'] === 'true') { - process.stdout.write(data); - } - }); - - child.stderr!.setEncoding('utf8'); - child.stderr!.on('data', (data: string) => { - stderr += data; - if (env['KEEP_OUTPUT'] === 'true' || env['VERBOSE'] === 'true') { - process.stderr.write(data); - } - }); - - const timeout = options.timeout ?? 120000; - const promise = new Promise((resolve, reject) => { - const timer = setTimeout(() => { - child.kill('SIGKILL'); - reject( - new Error( - `Process timed out after ${timeout}ms.\nStdout:\n${stdout}\nStderr:\n${stderr}`, - ), - ); - }, timeout); - - child.on('error', (err) => { - clearTimeout(timer); - reject(err); - }); - - child.on('close', (code: number) => { - clearTimeout(timer); - if (code === 0) { - this._lastRunStdout = stdout; - const result = this._filterPodmanTelemetry(stdout); - - // Check if this is a JSON output test - if so, don't include stderr - // as it would corrupt the JSON - const isJsonOutput = - commandArgs.includes('--output-format') && - commandArgs.includes('json'); - - const finalResult = - stderr && !isJsonOutput - ? `${result}\n\nStdErr:\n${stderr}` - : result; - resolve(finalResult); - } else { - reject(new Error(`Process exited with code ${code}:\n${stderr}`)); - } - }); - }); - - return promise; - } - - readFile(fileName: string) { - const filePath = join(this.testDir!, fileName); - const content = readFileSync(filePath, 'utf-8'); - if (env['KEEP_OUTPUT'] === 'true' || env['VERBOSE'] === 'true') { - console.log(`--- FILE: ${filePath} ---`); - console.log(content); - console.log(`--- END FILE: ${filePath} ---`); - } - return content; - } - - async cleanup() { - // Kill any interactive runs that are still active - for (const run of this._interactiveRuns) { - try { - await run.kill(); - } catch (error) { - if (env['VERBOSE'] === 'true') { - console.warn('Failed to kill interactive run during cleanup:', error); - } - } - } - this._interactiveRuns = []; - - // Kill any other spawned processes that are still running - for (const child of this._spawnedProcesses) { - if (child.exitCode === null && child.signalCode === null) { - try { - child.kill('SIGKILL'); - } catch (error) { - if (env['VERBOSE'] === 'true') { - console.warn( - 'Failed to kill spawned process during cleanup:', - error, - ); - } - } - } - } - this._spawnedProcesses = []; - - if ( - process.env['REGENERATE_MODEL_GOLDENS'] === 'true' && - this.fakeResponsesPath - ) { - fs.copyFileSync(this.fakeResponsesPath, this.originalFakeResponsesPath!); - } - // Clean up test directory and home directory - if (this.testDir && !env['KEEP_OUTPUT']) { - try { - fs.rmSync(this.testDir, { recursive: true, force: true }); - } catch (error) { - // Ignore cleanup errors - if (env['VERBOSE'] === 'true') { - console.warn('Cleanup warning:', (error as Error).message); - } - } - } - if (this.homeDir && !env['KEEP_OUTPUT']) { - try { - fs.rmSync(this.homeDir, { recursive: true, force: true }); - } catch (error) { - // Ignore cleanup errors - if (env['VERBOSE'] === 'true') { - console.warn('Cleanup warning:', (error as Error).message); - } - } - } - } - - async waitForTelemetryReady() { - // Telemetry is always written to the test directory - const logFilePath = join(this.homeDir!, 'telemetry.log'); - - if (!logFilePath) return; - - // Wait for telemetry file to exist and have content - await poll( - () => { - if (!fs.existsSync(logFilePath)) return false; - try { - const content = readFileSync(logFilePath, 'utf-8'); - // Check if file has meaningful content (at least one complete JSON object) - return content.includes('"scopeMetrics"'); - } catch { - return false; - } - }, - 2000, // 2 seconds max - reduced since telemetry should flush on exit now - 100, // check every 100ms - ); - } - - async waitForTelemetryEvent(eventName: string, timeout?: number) { - if (!timeout) { - timeout = getDefaultTimeout(); - } - - await this.waitForTelemetryReady(); - - return poll( - () => { - const logs = this._readAndParseTelemetryLog(); - return logs.some( - (logData) => - logData.attributes && - logData.attributes['event.name'] === `gemini_cli.${eventName}`, - ); - }, - timeout, - 100, - ); - } - - async waitForToolCall( - toolName: string, - timeout?: number, - matchArgs?: (args: string) => boolean, - ) { - // Use environment-specific timeout - if (!timeout) { - timeout = getDefaultTimeout(); - } - - // Wait for telemetry to be ready before polling for tool calls - await this.waitForTelemetryReady(); - - return poll( - () => { - const toolLogs = this.readToolLogs(); - return toolLogs.some( - (log) => - log.toolRequest.name === toolName && - (matchArgs?.call(this, log.toolRequest.args) ?? true), - ); - }, - timeout, - 100, - ); - } - - async expectToolCallSuccess( - toolNames: string[], - timeout?: number, - matchArgs?: (args: string) => boolean, - ) { - // Use environment-specific timeout - if (!timeout) { - timeout = getDefaultTimeout(); - } - - // Wait for telemetry to be ready before polling for tool calls - await this.waitForTelemetryReady(); - - const success = await poll( - () => { - const toolLogs = this.readToolLogs(); - return toolNames.some((name) => - toolLogs.some( - (log) => - log.toolRequest.name === name && - log.toolRequest.success && - (matchArgs?.call(this, log.toolRequest.args) ?? true), - ), - ); - }, - timeout, - 100, - ); - - expect( - success, - `Expected to find successful toolCalls for ${JSON.stringify(toolNames)}`, - ).toBe(true); - } - - async waitForAnyToolCall(toolNames: string[], timeout?: number) { - if (!timeout) { - timeout = getDefaultTimeout(); - } - - // Wait for telemetry to be ready before polling for tool calls - await this.waitForTelemetryReady(); - - return poll( - () => { - const toolLogs = this.readToolLogs(); - return toolNames.some((name) => - toolLogs.some((log) => log.toolRequest.name === name), - ); - }, - timeout, - 100, - ); - } - - _parseToolLogsFromStdout(stdout: string) { - const logs: { - timestamp: number; - toolRequest: { - name: string; - args: string; - success: boolean; - duration_ms: number; - }; - }[] = []; - - // The console output from Podman is JavaScript object notation, not JSON - // Look for tool call events in the output - // Updated regex to handle tool names with hyphens and underscores - const toolCallPattern = - /body:\s*'Tool call:\s*([\w-]+)\..*?Success:\s*(\w+)\..*?Duration:\s*(\d+)ms\.'/g; - const matches = [...stdout.matchAll(toolCallPattern)]; - - for (const match of matches) { - const toolName = match[1]; - const success = match[2] === 'true'; - const duration = parseInt(match[3], 10); - - // Try to find function_args nearby - const matchIndex = match.index || 0; - const contextStart = Math.max(0, matchIndex - 500); - const contextEnd = Math.min(stdout.length, matchIndex + 500); - const context = stdout.substring(contextStart, contextEnd); - - // Look for function_args in the context - let args = '{}'; - const argsMatch = context.match(/function_args:\s*'([^']+)'/); - if (argsMatch) { - args = argsMatch[1]; - } - - // Also try to find function_name to double-check - // Updated regex to handle tool names with hyphens and underscores - const nameMatch = context.match(/function_name:\s*'([\w-]+)'/); - const actualToolName = nameMatch ? nameMatch[1] : toolName; - - logs.push({ - timestamp: Date.now(), - toolRequest: { - name: actualToolName, - args: args, - success: success, - duration_ms: duration, - }, - }); - } - - // If no matches found with the simple pattern, try the JSON parsing approach - // in case the format changes - if (logs.length === 0) { - const lines = stdout.split(os.EOL); - let currentObject = ''; - let inObject = false; - let braceDepth = 0; - - for (const line of lines) { - if (!inObject && line.trim() === '{') { - inObject = true; - braceDepth = 1; - currentObject = line + '\n'; - } else if (inObject) { - currentObject += line + '\n'; - - // Count braces - for (const char of line) { - if (char === '{') braceDepth++; - else if (char === '}') braceDepth--; - } - - // If we've closed all braces, try to parse the object - if (braceDepth === 0) { - inObject = false; - try { - const obj = JSON.parse(currentObject); - - // Check for tool call in different formats - if ( - obj.body && - obj.body.includes('Tool call:') && - obj.attributes - ) { - const bodyMatch = obj.body.match(/Tool call: (\w+)\./); - if (bodyMatch) { - logs.push({ - timestamp: obj.timestamp || Date.now(), - toolRequest: { - name: bodyMatch[1], - args: obj.attributes.function_args || '{}', - success: obj.attributes.success !== false, - duration_ms: obj.attributes.duration_ms || 0, - }, - }); - } - } else if ( - obj.attributes && - obj.attributes['event.name'] === 'gemini_cli.tool_call' - ) { - logs.push({ - timestamp: obj.attributes['event.timestamp'], - toolRequest: { - name: obj.attributes.function_name, - args: obj.attributes.function_args, - success: obj.attributes.success, - duration_ms: obj.attributes.duration_ms, - }, - }); - } - } catch { - // Not valid JSON - } - currentObject = ''; - } - } - } - } - - return logs; - } - - private _readAndParseTelemetryLog(): ParsedLog[] { - // Telemetry is always written to the test directory - const logFilePath = join(this.homeDir!, 'telemetry.log'); - - if (!logFilePath || !fs.existsSync(logFilePath)) { - return []; - } - - const content = readFileSync(logFilePath, 'utf-8'); - - // Split the content into individual JSON objects - // They are separated by "}\n{" - const jsonObjects = content - .split(/}\n{/) - .map((obj, index, array) => { - // Add back the braces we removed during split - if (index > 0) obj = '{' + obj; - if (index < array.length - 1) obj = obj + '}'; - return obj.trim(); - }) - .filter((obj) => obj); - - const logs: ParsedLog[] = []; - - for (const jsonStr of jsonObjects) { - try { - const logData = JSON.parse(jsonStr); - logs.push(logData); - } catch (e) { - // Skip objects that aren't valid JSON - if (env['VERBOSE'] === 'true') { - console.error('Failed to parse telemetry object:', e); - } - } - } - - return logs; - } - - readToolLogs() { - // For Podman, first check if telemetry file exists and has content - // If not, fall back to parsing from stdout - if (env['GEMINI_SANDBOX'] === 'podman') { - // Try reading from file first - const logFilePath = join(this.homeDir!, 'telemetry.log'); - - if (fs.existsSync(logFilePath)) { - try { - const content = readFileSync(logFilePath, 'utf-8'); - if (content && content.includes('"event.name"')) { - // File has content, use normal file parsing - // Continue to the normal file parsing logic below - } else if (this._lastRunStdout) { - // File exists but is empty or doesn't have events, parse from stdout - return this._parseToolLogsFromStdout(this._lastRunStdout); - } - } catch { - // Error reading file, fall back to stdout - if (this._lastRunStdout) { - return this._parseToolLogsFromStdout(this._lastRunStdout); - } - } - } else if (this._lastRunStdout) { - // No file exists, parse from stdout - return this._parseToolLogsFromStdout(this._lastRunStdout); - } - } - - const parsedLogs = this._readAndParseTelemetryLog(); - const logs: { - toolRequest: { - name: string; - args: string; - success: boolean; - duration_ms: number; - }; - }[] = []; - - for (const logData of parsedLogs) { - // Look for tool call logs - if ( - logData.attributes && - logData.attributes['event.name'] === 'gemini_cli.tool_call' - ) { - const toolName = logData.attributes.function_name!; - logs.push({ - toolRequest: { - name: toolName, - args: logData.attributes.function_args ?? '{}', - success: logData.attributes.success ?? false, - duration_ms: logData.attributes.duration_ms ?? 0, - }, - }); - } - } - - return logs; - } - - readAllApiRequest(): ParsedLog[] { - const logs = this._readAndParseTelemetryLog(); - const apiRequests = logs.filter( - (logData) => - logData.attributes && - logData.attributes['event.name'] === `gemini_cli.api_request`, - ); - return apiRequests; - } - - readLastApiRequest(): ParsedLog | null { - const logs = this._readAndParseTelemetryLog(); - const apiRequests = logs.filter( - (logData) => - logData.attributes && - logData.attributes['event.name'] === `gemini_cli.api_request`, - ); - return apiRequests.pop() || null; - } - - async waitForMetric(metricName: string, timeout?: number) { - await this.waitForTelemetryReady(); - - const fullName = metricName.startsWith('gemini_cli.') - ? metricName - : `gemini_cli.${metricName}`; - - return poll( - () => { - const logs = this._readAndParseTelemetryLog(); - for (const logData of logs) { - if (logData.scopeMetrics) { - for (const scopeMetric of logData.scopeMetrics) { - for (const metric of scopeMetric.metrics) { - if (metric.descriptor.name === fullName) { - return true; - } - } - } - } - } - return false; - }, - timeout ?? getDefaultTimeout(), - 100, - ); - } - - readMetric(metricName: string): Record | null { - const logs = this._readAndParseTelemetryLog(); - for (const logData of logs) { - if (logData.scopeMetrics) { - for (const scopeMetric of logData.scopeMetrics) { - for (const metric of scopeMetric.metrics) { - if (metric.descriptor.name === `gemini_cli.${metricName}`) { - return metric; - } - } - } - } - } - return null; - } - - async runInteractive(options?: { - args?: string | string[]; - yolo?: boolean; - env?: Record; - }): Promise { - const yolo = options?.yolo !== false; - const { command, initialArgs } = this._getCommandAndArgs( - yolo ? ['--yolo'] : [], - ); - const commandArgs = [...initialArgs]; - - const envVars = { - ...process.env, - GEMINI_CLI_HOME: this.homeDir!, - ...options?.env, - }; - - const ptyOptions: pty.IPtyForkOptions = { - name: 'xterm-color', - cols: 80, - rows: 80, - cwd: this.testDir!, - env: Object.fromEntries( - Object.entries(envVars).filter(([, v]) => v !== undefined), - ) as { [key: string]: string }, - }; - - const executable = command === 'node' ? process.execPath : command; - const ptyProcess = pty.spawn(executable, commandArgs, ptyOptions); - - const run = new InteractiveRun(ptyProcess); - this._interactiveRuns.push(run); - // Wait for the app to be ready - await run.expectText(' Type your message or @path/to/file', 30000); - return run; - } - - readHookLogs() { - const parsedLogs = this._readAndParseTelemetryLog(); - const logs: { - hookCall: { - hook_event_name: string; - hook_name: string; - hook_input: Record; - hook_output: Record; - exit_code: number; - stdout: string; - stderr: string; - duration_ms: number; - success: boolean; - error: string; - }; - }[] = []; - - for (const logData of parsedLogs) { - // Look for tool call logs - if ( - logData.attributes && - logData.attributes['event.name'] === 'gemini_cli.hook_call' - ) { - logs.push({ - hookCall: { - hook_event_name: logData.attributes.hook_event_name ?? '', - hook_name: logData.attributes.hook_name ?? '', - hook_input: logData.attributes.hook_input ?? {}, - hook_output: logData.attributes.hook_output ?? {}, - exit_code: logData.attributes.exit_code ?? 0, - stdout: logData.attributes.stdout ?? '', - stderr: logData.attributes.stderr ?? '', - duration_ms: logData.attributes.duration_ms ?? 0, - success: logData.attributes.success ?? false, - error: logData.attributes.error ?? '', - }, - }); - } - } - - return logs; - } - - async pollCommand( - commandFn: () => Promise, - predicateFn: () => boolean, - timeout: number = 30000, - interval: number = 1000, - ) { - const startTime = Date.now(); - while (Date.now() - startTime < timeout) { - await commandFn(); - // Give it a moment to process - await sleep(500); - if (predicateFn()) { - return; - } - await sleep(interval); - } - throw new Error(`pollCommand timed out after ${timeout}ms`); - } -} +export * from '@google/gemini-cli-test-utils'; diff --git a/package-lock.json b/package-lock.json index b9f71c339f..7d036d9b96 100644 --- a/package-lock.json +++ b/package-lock.json @@ -522,7 +522,7 @@ "version": "2.0.1", "resolved": "https://registry.npmjs.org/@bundled-es-modules/cookie/-/cookie-2.0.1.tgz", "integrity": "sha512-8o+5fRPLNbjbdGRRmJj3h6Hh1AQJf2dk3qQ/5ZFb+PXkRNiSoMGGUKlsgLfrxneb72axVJyIYji64E2+nNfYyw==", - "dev": true, + "devOptional": true, "license": "ISC", "dependencies": { "cookie": "^0.7.2" @@ -532,7 +532,7 @@ "version": "1.0.1", "resolved": "https://registry.npmjs.org/@bundled-es-modules/statuses/-/statuses-1.0.1.tgz", "integrity": "sha512-yn7BklA5acgcBr+7w064fGV+SGIFySjCKpqjcWgBAIfrAkY+4GQTJJHQMeT3V/sgz23VTEVV8TtOmkvJAhFVfg==", - "dev": true, + "devOptional": true, "license": "ISC", "dependencies": { "statuses": "^2.0.1" @@ -542,7 +542,7 @@ "version": "0.1.6", "resolved": "https://registry.npmjs.org/@bundled-es-modules/tough-cookie/-/tough-cookie-0.1.6.tgz", "integrity": "sha512-dvMHbL464C0zI+Yqxbz6kZ5TOEp7GLW+pry/RWndAR8MJQAXZ2rPmIs8tziTZjeIyhSNZgZbCePtfSbdWqStJw==", - "dev": true, + "devOptional": true, "license": "ISC", "dependencies": { "@types/tough-cookie": "^4.0.5", @@ -553,7 +553,7 @@ "version": "4.1.4", "resolved": "https://registry.npmjs.org/tough-cookie/-/tough-cookie-4.1.4.tgz", "integrity": "sha512-Loo5UUvLD9ScZ6jh8beX1T6sO1w2/MpCRpEP7V280GKMVUQ0Jzar2U3UJPsrdbziLEMMhu3Ujnq//rhiFuIeag==", - "dev": true, + "devOptional": true, "license": "BSD-3-Clause", "dependencies": { "psl": "^1.1.33", @@ -592,7 +592,6 @@ "cpu": [ "ppc64" ], - "dev": true, "license": "MIT", "optional": true, "os": [ @@ -609,7 +608,6 @@ "cpu": [ "arm" ], - "dev": true, "license": "MIT", "optional": true, "os": [ @@ -626,7 +624,6 @@ "cpu": [ "arm64" ], - "dev": true, "license": "MIT", "optional": true, "os": [ @@ -643,7 +640,6 @@ "cpu": [ "x64" ], - "dev": true, "license": "MIT", "optional": true, "os": [ @@ -660,7 +656,6 @@ "cpu": [ "arm64" ], - "dev": true, "license": "MIT", "optional": true, "os": [ @@ -677,7 +672,6 @@ "cpu": [ "x64" ], - "dev": true, "license": "MIT", "optional": true, "os": [ @@ -694,7 +688,6 @@ "cpu": [ "arm64" ], - "dev": true, "license": "MIT", "optional": true, "os": [ @@ -711,7 +704,6 @@ "cpu": [ "x64" ], - "dev": true, "license": "MIT", "optional": true, "os": [ @@ -728,7 +720,6 @@ "cpu": [ "arm" ], - "dev": true, "license": "MIT", "optional": true, "os": [ @@ -745,7 +736,6 @@ "cpu": [ "arm64" ], - "dev": true, "license": "MIT", "optional": true, "os": [ @@ -762,7 +752,6 @@ "cpu": [ "ia32" ], - "dev": true, "license": "MIT", "optional": true, "os": [ @@ -779,7 +768,6 @@ "cpu": [ "loong64" ], - "dev": true, "license": "MIT", "optional": true, "os": [ @@ -796,7 +784,6 @@ "cpu": [ "mips64el" ], - "dev": true, "license": "MIT", "optional": true, "os": [ @@ -813,7 +800,6 @@ "cpu": [ "ppc64" ], - "dev": true, "license": "MIT", "optional": true, "os": [ @@ -830,7 +816,6 @@ "cpu": [ "riscv64" ], - "dev": true, "license": "MIT", "optional": true, "os": [ @@ -847,7 +832,6 @@ "cpu": [ "s390x" ], - "dev": true, "license": "MIT", "optional": true, "os": [ @@ -864,7 +848,6 @@ "cpu": [ "x64" ], - "dev": true, "license": "MIT", "optional": true, "os": [ @@ -881,7 +864,6 @@ "cpu": [ "arm64" ], - "dev": true, "license": "MIT", "optional": true, "os": [ @@ -898,7 +880,6 @@ "cpu": [ "x64" ], - "dev": true, "license": "MIT", "optional": true, "os": [ @@ -915,7 +896,6 @@ "cpu": [ "arm64" ], - "dev": true, "license": "MIT", "optional": true, "os": [ @@ -932,7 +912,6 @@ "cpu": [ "x64" ], - "dev": true, "license": "MIT", "optional": true, "os": [ @@ -949,7 +928,6 @@ "cpu": [ "arm64" ], - "dev": true, "license": "MIT", "optional": true, "os": [ @@ -966,7 +944,6 @@ "cpu": [ "x64" ], - "dev": true, "license": "MIT", "optional": true, "os": [ @@ -983,7 +960,6 @@ "cpu": [ "arm64" ], - "dev": true, "license": "MIT", "optional": true, "os": [ @@ -1000,7 +976,6 @@ "cpu": [ "ia32" ], - "dev": true, "license": "MIT", "optional": true, "os": [ @@ -1017,7 +992,6 @@ "cpu": [ "x64" ], - "dev": true, "license": "MIT", "optional": true, "os": [ @@ -1639,7 +1613,7 @@ "version": "5.1.14", "resolved": "https://registry.npmjs.org/@inquirer/confirm/-/confirm-5.1.14.tgz", "integrity": "sha512-5yR4IBfe0kXe59r1YCTG8WXkUbl7Z35HK87Sw+WUyGD8wNUx7JvY7laahzeytyE1oLn74bQnL7hstctQxisQ8Q==", - "dev": true, + "devOptional": true, "license": "MIT", "dependencies": { "@inquirer/core": "^10.1.15", @@ -1661,7 +1635,7 @@ "version": "10.1.15", "resolved": "https://registry.npmjs.org/@inquirer/core/-/core-10.1.15.tgz", "integrity": "sha512-8xrp836RZvKkpNbVvgWUlxjT4CraKk2q+I3Ksy+seI2zkcE+y6wNs1BVhgcv8VyImFecUhdQrYLdW32pAjwBdA==", - "dev": true, + "devOptional": true, "license": "MIT", "dependencies": { "@inquirer/figures": "^1.0.13", @@ -1689,7 +1663,7 @@ "version": "4.3.2", "resolved": "https://registry.npmjs.org/ansi-escapes/-/ansi-escapes-4.3.2.tgz", "integrity": "sha512-gKXj5ALrKWQLsYG9jlTRmR/xKluxHV+Z9QEwNIgCfM1/uwPMCuzVVnh5mwTd+OuBZcwSIMbqssNWRm1lE51QaQ==", - "dev": true, + "devOptional": true, "license": "MIT", "dependencies": { "type-fest": "^0.21.3" @@ -1705,7 +1679,7 @@ "version": "0.21.3", "resolved": "https://registry.npmjs.org/type-fest/-/type-fest-0.21.3.tgz", "integrity": "sha512-t0rzBq87m3fVcduHDUFhKmyyX+9eo6WQjZvf51Ea/M0Q7+T374Jp1aUiyUl0GKxp8M/OETVHSDvmkyPgvX+X2w==", - "dev": true, + "devOptional": true, "license": "(MIT OR CC0-1.0)", "engines": { "node": ">=10" @@ -1718,7 +1692,7 @@ "version": "1.0.13", "resolved": "https://registry.npmjs.org/@inquirer/figures/-/figures-1.0.13.tgz", "integrity": "sha512-lGPVU3yO9ZNqA7vTYz26jny41lE7yoQansmqdMLBEfqaGsmdg7V3W9mK9Pvb5IL4EVZ9GnSDGMO/cJXud5dMaw==", - "dev": true, + "devOptional": true, "license": "MIT", "engines": { "node": ">=18" @@ -1728,7 +1702,7 @@ "version": "3.0.8", "resolved": "https://registry.npmjs.org/@inquirer/type/-/type-3.0.8.tgz", "integrity": "sha512-lg9Whz8onIHRthWaN1Q9EGLa/0LFJjyM8mEUbL1eTi6yMGvBf8gvyDLtxSXztQsxMvhxxNpJYrwa1YHdq+w4Jw==", - "dev": true, + "devOptional": true, "license": "MIT", "engines": { "node": ">=18" @@ -1863,7 +1837,6 @@ "version": "1.5.5", "resolved": "https://registry.npmjs.org/@jridgewell/sourcemap-codec/-/sourcemap-codec-1.5.5.tgz", "integrity": "sha512-cYQ9310grqxueWbl+WuIUIaiUaDcj7WOq5fVhEljNVgRfOUhY9fy2zTvfoqWsnebh8Sl70VScFbICvJnLKB0Og==", - "dev": true, "license": "MIT" }, "node_modules/@jridgewell/trace-mapping": { @@ -2032,7 +2005,6 @@ "resolved": "https://registry.npmjs.org/@lydell/node-pty/-/node-pty-1.1.0.tgz", "integrity": "sha512-VDD8LtlMTOrPKWMXUAcB9+LTktzuunqrMwkYR1DMRBkS6LQrCt+0/Ws1o2rMml/n3guePpS7cxhHF7Nm5K4iMw==", "license": "MIT", - "optional": true, "optionalDependencies": { "@lydell/node-pty-darwin-arm64": "1.1.0", "@lydell/node-pty-darwin-x64": "1.1.0", @@ -2420,7 +2392,7 @@ "version": "0.39.5", "resolved": "https://registry.npmjs.org/@mswjs/interceptors/-/interceptors-0.39.5.tgz", "integrity": "sha512-B9nHSJYtsv79uo7QdkZ/b/WoKm20IkVSmTc/WCKarmDtFwM0dRx2ouEniqwNkzCSLn3fydzKmnMzjtfdOWt3VQ==", - "dev": true, + "devOptional": true, "license": "MIT", "dependencies": { "@open-draft/deferred-promise": "^2.2.0", @@ -2655,14 +2627,14 @@ "version": "2.2.0", "resolved": "https://registry.npmjs.org/@open-draft/deferred-promise/-/deferred-promise-2.2.0.tgz", "integrity": "sha512-CecwLWx3rhxVQF6V4bAgPS5t+So2sTbPgAzafKkVizyi7tlwpcFpdFqq+wqF2OwNBmqFuu6tOyouTuxgpMfzmA==", - "dev": true, + "devOptional": true, "license": "MIT" }, "node_modules/@open-draft/logger": { "version": "0.3.0", "resolved": "https://registry.npmjs.org/@open-draft/logger/-/logger-0.3.0.tgz", "integrity": "sha512-X2g45fzhxH238HKO4xbSr7+wBS8Fvw6ixhTDuvLd5mqh6bJJCFAPwU9mPDxbcrRtfxv4u5IHCEH77BmxvXmmxQ==", - "dev": true, + "devOptional": true, "license": "MIT", "dependencies": { "is-node-process": "^1.2.0", @@ -2673,7 +2645,7 @@ "version": "2.1.0", "resolved": "https://registry.npmjs.org/@open-draft/until/-/until-2.1.0.tgz", "integrity": "sha512-U69T3ItWHvLwGg5eJ0n3I62nWuE6ilHlmz7zM0npLBRvPRd7e6NYmg54vvRtP5mZG7kZqZCFVdsTWo7BPtBujg==", - "dev": true, + "devOptional": true, "license": "MIT" }, "node_modules/@opentelemetry/api": { @@ -3337,7 +3309,6 @@ "cpu": [ "arm" ], - "dev": true, "license": "MIT", "optional": true, "os": [ @@ -3351,7 +3322,6 @@ "cpu": [ "arm64" ], - "dev": true, "license": "MIT", "optional": true, "os": [ @@ -3365,7 +3335,6 @@ "cpu": [ "arm64" ], - "dev": true, "license": "MIT", "optional": true, "os": [ @@ -3379,7 +3348,6 @@ "cpu": [ "x64" ], - "dev": true, "license": "MIT", "optional": true, "os": [ @@ -3393,7 +3361,6 @@ "cpu": [ "arm64" ], - "dev": true, "license": "MIT", "optional": true, "os": [ @@ -3407,7 +3374,6 @@ "cpu": [ "x64" ], - "dev": true, "license": "MIT", "optional": true, "os": [ @@ -3421,7 +3387,6 @@ "cpu": [ "arm" ], - "dev": true, "license": "MIT", "optional": true, "os": [ @@ -3435,7 +3400,6 @@ "cpu": [ "arm" ], - "dev": true, "license": "MIT", "optional": true, "os": [ @@ -3449,7 +3413,6 @@ "cpu": [ "arm64" ], - "dev": true, "license": "MIT", "optional": true, "os": [ @@ -3463,7 +3426,6 @@ "cpu": [ "arm64" ], - "dev": true, "license": "MIT", "optional": true, "os": [ @@ -3477,7 +3439,6 @@ "cpu": [ "loong64" ], - "dev": true, "license": "MIT", "optional": true, "os": [ @@ -3491,7 +3452,6 @@ "cpu": [ "ppc64" ], - "dev": true, "license": "MIT", "optional": true, "os": [ @@ -3505,7 +3465,6 @@ "cpu": [ "riscv64" ], - "dev": true, "license": "MIT", "optional": true, "os": [ @@ -3519,7 +3478,6 @@ "cpu": [ "riscv64" ], - "dev": true, "license": "MIT", "optional": true, "os": [ @@ -3533,7 +3491,6 @@ "cpu": [ "s390x" ], - "dev": true, "license": "MIT", "optional": true, "os": [ @@ -3547,7 +3504,6 @@ "cpu": [ "x64" ], - "dev": true, "license": "MIT", "optional": true, "os": [ @@ -3561,7 +3517,6 @@ "cpu": [ "x64" ], - "dev": true, "license": "MIT", "optional": true, "os": [ @@ -3575,7 +3530,6 @@ "cpu": [ "arm64" ], - "dev": true, "license": "MIT", "optional": true, "os": [ @@ -3589,7 +3543,6 @@ "cpu": [ "arm64" ], - "dev": true, "license": "MIT", "optional": true, "os": [ @@ -3603,7 +3556,6 @@ "cpu": [ "ia32" ], - "dev": true, "license": "MIT", "optional": true, "os": [ @@ -3617,7 +3569,6 @@ "cpu": [ "x64" ], - "dev": true, "license": "MIT", "optional": true, "os": [ @@ -3631,7 +3582,6 @@ "cpu": [ "x64" ], - "dev": true, "license": "MIT", "optional": true, "os": [ @@ -4088,7 +4038,6 @@ "version": "5.2.3", "resolved": "https://registry.npmjs.org/@types/chai/-/chai-5.2.3.tgz", "integrity": "sha512-Mw558oeA9fFbv65/y4mHtXDs9bPnFMZAL/jxdPFUpOHHIXX91mcgEHbS5Lahr+pwZFR8A7GQleRWeI6cGFC2UA==", - "dev": true, "license": "MIT", "dependencies": { "@types/deep-eql": "*", @@ -4122,7 +4071,7 @@ "version": "0.6.0", "resolved": "https://registry.npmjs.org/@types/cookie/-/cookie-0.6.0.tgz", "integrity": "sha512-4Kh9a6B2bQciAhf7FSuMRRkUWecJgJu9nPnx3yzpsfXX/c50REIqpHY4C82bXP90qrLtXtkDxTZosYO3UpOwlA==", - "dev": true, + "devOptional": true, "license": "MIT" }, "node_modules/@types/cookiejar": { @@ -4146,7 +4095,6 @@ "version": "4.0.2", "resolved": "https://registry.npmjs.org/@types/deep-eql/-/deep-eql-4.0.2.tgz", "integrity": "sha512-c9h9dVVMigMPc4bwTvC5dxqtqJZwQPePsWjPlpSOnojbor6pGqdk541lfA7AqFQr5pB1BRdq0juY9db81BwyFw==", - "dev": true, "license": "MIT" }, "node_modules/@types/diff": { @@ -4170,7 +4118,6 @@ "version": "1.0.8", "resolved": "https://registry.npmjs.org/@types/estree/-/estree-1.0.8.tgz", "integrity": "sha512-dWHzHa2WqEXI/O1E9OjrocMTKJl2mSrEolh1Iomrv6U+JuNwaHXsXx9bLu5gG7BUWFIN0skIQJQ/L1rIex4X6w==", - "dev": true, "license": "MIT" }, "node_modules/@types/express": { @@ -4526,7 +4473,7 @@ "version": "2.0.6", "resolved": "https://registry.npmjs.org/@types/statuses/-/statuses-2.0.6.tgz", "integrity": "sha512-xMAgYwceFhRA2zY+XbEA7mxYbA093wdiW8Vu6gZPGWy9cmOyU9XesH1tNcEWsKFd5Vzrqx5T3D38PWx1FIIXkA==", - "dev": true, + "devOptional": true, "license": "MIT" }, "node_modules/@types/superagent": { @@ -5140,7 +5087,6 @@ "version": "3.2.4", "resolved": "https://registry.npmjs.org/@vitest/expect/-/expect-3.2.4.tgz", "integrity": "sha512-Io0yyORnB6sikFlt8QW5K7slY4OjqNX9jmJQ02QDda8lyM6B5oNgVWoSoKPac8/kgnCUzuHQKrSLtu/uOqqrig==", - "dev": true, "license": "MIT", "dependencies": { "@types/chai": "^5.2.2", @@ -5157,7 +5103,6 @@ "version": "3.2.4", "resolved": "https://registry.npmjs.org/@vitest/mocker/-/mocker-3.2.4.tgz", "integrity": "sha512-46ryTE9RZO/rfDd7pEqFl7etuyzekzEhUbTW3BvmeO/BcCMEgq59BKhek3dXDWgAj4oMK6OZi+vRr1wPW6qjEQ==", - "dev": true, "license": "MIT", "dependencies": { "@vitest/spy": "3.2.4", @@ -5184,7 +5129,6 @@ "version": "3.2.4", "resolved": "https://registry.npmjs.org/@vitest/pretty-format/-/pretty-format-3.2.4.tgz", "integrity": "sha512-IVNZik8IVRJRTr9fxlitMKeJeXFFFN0JaB9PHPGQ8NKQbGpfjlTx9zO4RefN8gp7eqjNy8nyK3NZmBzOPeIxtA==", - "dev": true, "license": "MIT", "dependencies": { "tinyrainbow": "^2.0.0" @@ -5197,7 +5141,6 @@ "version": "3.2.4", "resolved": "https://registry.npmjs.org/@vitest/runner/-/runner-3.2.4.tgz", "integrity": "sha512-oukfKT9Mk41LreEW09vt45f8wx7DordoWUZMYdY/cyAk7w5TWkTRCNZYF7sX7n2wB7jyGAl74OxgwhPgKaqDMQ==", - "dev": true, "license": "MIT", "dependencies": { "@vitest/utils": "3.2.4", @@ -5212,7 +5155,6 @@ "version": "3.2.4", "resolved": "https://registry.npmjs.org/@vitest/snapshot/-/snapshot-3.2.4.tgz", "integrity": "sha512-dEYtS7qQP2CjU27QBC5oUOxLE/v5eLkGqPE0ZKEIDGMs4vKWe7IjgLOeauHsR0D5YuuycGRO5oSRXnwnmA78fQ==", - "dev": true, "license": "MIT", "dependencies": { "@vitest/pretty-format": "3.2.4", @@ -5227,7 +5169,6 @@ "version": "3.2.4", "resolved": "https://registry.npmjs.org/@vitest/spy/-/spy-3.2.4.tgz", "integrity": "sha512-vAfasCOe6AIK70iP5UD11Ac4siNUNJ9i/9PZ3NKx07sG6sUxeag1LWdNrMWeKKYBLlzuK+Gn65Yd5nyL6ds+nw==", - "dev": true, "license": "MIT", "dependencies": { "tinyspy": "^4.0.3" @@ -5240,7 +5181,6 @@ "version": "3.2.4", "resolved": "https://registry.npmjs.org/@vitest/utils/-/utils-3.2.4.tgz", "integrity": "sha512-fB2V0JFrQSMsCo9HiSq3Ezpdv4iYaXRG1Sx8edX3MwxfyNn83mKiGzOcH+Fkxt4MHxr3y42fQi1oeAInqgX2QA==", - "dev": true, "license": "MIT", "dependencies": { "@vitest/pretty-format": "3.2.4", @@ -6318,7 +6258,6 @@ "version": "2.0.1", "resolved": "https://registry.npmjs.org/assertion-error/-/assertion-error-2.0.1.tgz", "integrity": "sha512-Izi8RQcffqCeNVgFigKli1ssklIbpHnCYc6AknXGYoB6grJqyeby7jv12JUQgmTAnIDnbck1uxksT4dzN3PWBA==", - "dev": true, "license": "MIT", "engines": { "node": ">=12" @@ -6714,7 +6653,6 @@ "version": "6.7.14", "resolved": "https://registry.npmjs.org/cac/-/cac-6.7.14.tgz", "integrity": "sha512-b6Ilus+c3RrdDk+JhLKUAQfzzgLEPy6wcXqS7f/xe1EETvsDP6GORG7SFuOs6cID5YkqchW/LXZbX5bc8j7ZcQ==", - "dev": true, "license": "MIT", "engines": { "node": ">=8" @@ -6830,7 +6768,6 @@ "version": "5.3.3", "resolved": "https://registry.npmjs.org/chai/-/chai-5.3.3.tgz", "integrity": "sha512-4zNhdJD/iOjSH0A05ea+Ke6MU5mmpQcbQsSOkgdaUMJ9zTlDTD/GYlwohmIE2u0gaxHYiVHEn1Fw9mZ/ktJWgw==", - "dev": true, "license": "MIT", "dependencies": { "assertion-error": "^2.0.1", @@ -6881,7 +6818,6 @@ "version": "2.1.1", "resolved": "https://registry.npmjs.org/check-error/-/check-error-2.1.1.tgz", "integrity": "sha512-OAlb+T7V4Op9OwdkjmguYRqncdlx5JiofwOAUkmTF+jNdHwzTaTs4sRAGpzLF3oOz5xAyDGrPgeIDFQmDOTiJw==", - "dev": true, "license": "MIT", "engines": { "node": ">= 16" @@ -7101,7 +7037,7 @@ "version": "4.1.0", "resolved": "https://registry.npmjs.org/cli-width/-/cli-width-4.1.0.tgz", "integrity": "sha512-ouuZd4/dm2Sw5Gmqy6bGyNNNe1qt9RpmxveLSO7KcgsTnU7RXfsw+/bukWGo1abgBiMAic068rclZsO4IWmmxQ==", - "dev": true, + "devOptional": true, "license": "ISC", "engines": { "node": ">= 12" @@ -7805,7 +7741,6 @@ "version": "5.0.2", "resolved": "https://registry.npmjs.org/deep-eql/-/deep-eql-5.0.2.tgz", "integrity": "sha512-h5k/5U50IJJFpzfL6nO9jaaumfjO/f2NjK/oYB2Djzm4p9L+3T9qWpZqZ2hAbLPuuYq9wrU08WQyBTL5GbPk5Q==", - "dev": true, "license": "MIT", "engines": { "node": ">=6" @@ -8587,7 +8522,6 @@ "version": "1.7.0", "resolved": "https://registry.npmjs.org/es-module-lexer/-/es-module-lexer-1.7.0.tgz", "integrity": "sha512-jEQoCwk8hyb2AZziIOLhDqpm5+2ww5uIE6lkO/6jcOCusfk6LhMHpXXfBLXTZ7Ydyt0j4VoUQv6uGNYbdW+kBA==", - "dev": true, "license": "MIT" }, "node_modules/es-object-atoms": { @@ -8662,7 +8596,6 @@ "version": "0.25.6", "resolved": "https://registry.npmjs.org/esbuild/-/esbuild-0.25.6.tgz", "integrity": "sha512-GVuzuUwtdsghE3ocJ9Bs8PNoF13HNQ5TXbEi2AhvVb8xU1Iwt9Fos9FEamfoee+u/TOsn7GUWc04lz46n2bbTg==", - "dev": true, "hasInstallScript": true, "license": "MIT", "bin": { @@ -9111,7 +9044,6 @@ "version": "3.0.3", "resolved": "https://registry.npmjs.org/estree-walker/-/estree-walker-3.0.3.tgz", "integrity": "sha512-7RUKfXgSMMkzt6ZuXmqapOurLGPPfgj6l9uRZ7lRGolvk0y2yocc35LdcxKC5PQZdn2DMqioAQ2NoWcrTKmm6g==", - "dev": true, "license": "MIT", "dependencies": { "@types/estree": "^1.0.0" @@ -9279,7 +9211,6 @@ "version": "1.2.2", "resolved": "https://registry.npmjs.org/expect-type/-/expect-type-1.2.2.tgz", "integrity": "sha512-JhFGDVJ7tmDJItKhYgJCGLOWjuK9vPxiXoUFLwLDc99NlmklilbiQJwoctZtt13+xMw91MCk/REan6MWHqDjyA==", - "dev": true, "license": "Apache-2.0", "engines": { "node": ">=12.0.0" @@ -9898,7 +9829,6 @@ "version": "2.3.3", "resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.3.tgz", "integrity": "sha512-5xoDfX+fL7faATnagmWPpbFtwh/R77WmMMqqHGS65C3vvB0YHrgF+B1YmZ3441tMj5n63k0212XNoJwzlhffQw==", - "dev": true, "hasInstallScript": true, "license": "MIT", "optional": true, @@ -10097,7 +10027,7 @@ "version": "4.10.1", "resolved": "https://registry.npmjs.org/get-tsconfig/-/get-tsconfig-4.10.1.tgz", "integrity": "sha512-auHyJ4AgMz7vgS8Hp3N6HXSmlMdUyhSUrfBF16w153rxtLIEOE+HGqaBppczZvnHLqQJfiHotCYpNhl0lUROFQ==", - "dev": true, + "devOptional": true, "license": "MIT", "dependencies": { "resolve-pkg-maps": "^1.0.0" @@ -10498,7 +10428,7 @@ "version": "16.11.0", "resolved": "https://registry.npmjs.org/graphql/-/graphql-16.11.0.tgz", "integrity": "sha512-mS1lbMsxgQj6hge1XZ6p7GPhbrtFwUFYi3wRzXAC/FmYnyXMTvvI3td3rjmQ2u8ewXueaSvRPWaEcgVVOT9Jnw==", - "dev": true, + "devOptional": true, "license": "MIT", "engines": { "node": "^12.22.0 || ^14.16.0 || ^16.0.0 || >=17.0.0" @@ -10620,7 +10550,7 @@ "version": "4.0.3", "resolved": "https://registry.npmjs.org/headers-polyfill/-/headers-polyfill-4.0.3.tgz", "integrity": "sha512-IScLbePpkvO846sIwOtOTDjutRMWdXdJmXdMvk6gCBHxFO8d+QKOQedyZSxFTTFYRSmlgSTDtXqqq4pcenBXLQ==", - "dev": true, + "devOptional": true, "license": "MIT" }, "node_modules/highlight.js": { @@ -11411,7 +11341,7 @@ "version": "1.2.0", "resolved": "https://registry.npmjs.org/is-node-process/-/is-node-process-1.2.0.tgz", "integrity": "sha512-Vg4o6/fqPxIjtxgUH5QLJhwZ7gW5diGCVlXpuUfELC62CuxM1iHcRe51f2W1FDy04Ai4KJkagKjx3XaqyfRKXw==", - "dev": true, + "devOptional": true, "license": "MIT" }, "node_modules/is-number": { @@ -11824,7 +11754,6 @@ "version": "9.0.1", "resolved": "https://registry.npmjs.org/js-tokens/-/js-tokens-9.0.1.tgz", "integrity": "sha512-mxa9E9ITFOt0ban3j6L5MpjwegGz6lBQmM1IJkWeBZGcMxto50+eWdjC/52xDbS2vy0k7vIMK0Fe2wfL9OQSpQ==", - "dev": true, "license": "MIT" }, "node_modules/js-yaml": { @@ -12564,7 +12493,6 @@ "version": "3.2.1", "resolved": "https://registry.npmjs.org/loupe/-/loupe-3.2.1.tgz", "integrity": "sha512-CdzqowRJCeLU72bHvWqwRBBlLcMEtIvGrlvef74kMnV2AolS9Y8xUv1I0U/MNAWMhBlKIoyuEgoJ0t/bbwHbLQ==", - "dev": true, "license": "MIT" }, "node_modules/lowercase-keys": { @@ -12604,7 +12532,6 @@ "version": "0.30.21", "resolved": "https://registry.npmjs.org/magic-string/-/magic-string-0.30.21.tgz", "integrity": "sha512-vd2F4YUyEXKGcLHoq+TEyCjxueSeHnFxyyjNp80yg0XV4vUhnDer/lvvlqM/arB5bXQN5K2/3oinyCRyx8T2CQ==", - "dev": true, "license": "MIT", "dependencies": { "@jridgewell/sourcemap-codec": "^1.5.5" @@ -12931,7 +12858,7 @@ "version": "2.10.4", "resolved": "https://registry.npmjs.org/msw/-/msw-2.10.4.tgz", "integrity": "sha512-6R1or/qyele7q3RyPwNuvc0IxO8L8/Aim6Sz5ncXEgcWUNxSKE+udriTOWHtpMwmfkLYlacA2y7TIx4cL5lgHA==", - "dev": true, + "devOptional": true, "hasInstallScript": true, "license": "MIT", "dependencies": { @@ -12976,14 +12903,14 @@ "version": "6.3.0", "resolved": "https://registry.npmjs.org/path-to-regexp/-/path-to-regexp-6.3.0.tgz", "integrity": "sha512-Yhpw4T9C6hPpgPeA28us07OJeqZ5EzQTkbfwuhsUg0c237RomFoETJgmp2sa3F/41gfLE6G5cqcYwznmeEeOlQ==", - "dev": true, + "devOptional": true, "license": "MIT" }, "node_modules/msw/node_modules/type-fest": { "version": "4.41.0", "resolved": "https://registry.npmjs.org/type-fest/-/type-fest-4.41.0.tgz", "integrity": "sha512-TeTSQ6H5YHvpqVwBRcnLDCBnDOHWYu7IvGbHT6N8AOymcr9PJGjc1GTtiWZTYg0NCgYwvnYWEkVChQAr9bjfwA==", - "dev": true, + "devOptional": true, "license": "(MIT OR CC0-1.0)", "engines": { "node": ">=16" @@ -13023,7 +12950,7 @@ "version": "2.0.0", "resolved": "https://registry.npmjs.org/mute-stream/-/mute-stream-2.0.0.tgz", "integrity": "sha512-WWdIxpyjEn+FhQJQQv9aQAYlHoNVdzIzUySNV1gHUPDSdZJ3yZn7pAAbQcV7B56Mvu881q9FZV+0Vx2xC44VWA==", - "dev": true, + "devOptional": true, "license": "ISC", "engines": { "node": "^18.17.0 || >=20.5.0" @@ -13053,7 +12980,6 @@ "version": "3.3.11", "resolved": "https://registry.npmjs.org/nanoid/-/nanoid-3.3.11.tgz", "integrity": "sha512-N8SpfPUnUp1bK+PMYW8qSWdl9U+wwNWI4QKxOYDy9JAro3WMX7p2OeVRF9v+347pnakNevPmiHhNmZ2HbFA76w==", - "dev": true, "funding": [ { "type": "github", @@ -13807,7 +13733,7 @@ "version": "1.4.3", "resolved": "https://registry.npmjs.org/outvariant/-/outvariant-1.4.3.tgz", "integrity": "sha512-+Sl2UErvtsoajRDKCE5/dBz4DIvHXQQnAxtQTF04OJxY0+DyZXSo5P5Bb7XYWOh81syohlYL24hbDwxedPUJCA==", - "dev": true, + "devOptional": true, "license": "MIT" }, "node_modules/own-keys": { @@ -14154,14 +14080,12 @@ "version": "2.0.3", "resolved": "https://registry.npmjs.org/pathe/-/pathe-2.0.3.tgz", "integrity": "sha512-WUjGcAqP1gQacoQe+OBJsFA7Ld4DyXuUIjZ5cc75cLHvJ7dtNsTugphxIADwspS+AraAUePCKrSVtPLFj/F88w==", - "dev": true, "license": "MIT" }, "node_modules/pathval": { "version": "2.0.1", "resolved": "https://registry.npmjs.org/pathval/-/pathval-2.0.1.tgz", "integrity": "sha512-//nshmD55c46FuFw26xV/xFAaB5HF9Xdap7HJBBnrKdAd6/GxDBaNA1870O79+9ueg61cZLSVc+OaFlfmObYVQ==", - "dev": true, "license": "MIT", "engines": { "node": ">= 14.16" @@ -14267,7 +14191,6 @@ "version": "8.5.6", "resolved": "https://registry.npmjs.org/postcss/-/postcss-8.5.6.tgz", "integrity": "sha512-3Ybi1tAuwAP9s0r1UQ2J4n5Y0G05bJkpUIO0/bI9MhwmD70S5aTWbXGBwxHrelT+XM1k6dM0pk+SwNkpTRN7Pg==", - "dev": true, "funding": [ { "type": "opencollective", @@ -14495,7 +14418,7 @@ "version": "1.15.0", "resolved": "https://registry.npmjs.org/psl/-/psl-1.15.0.tgz", "integrity": "sha512-JZd3gMVBAVQkSs6HdNZo9Sdo0LNcQeMNP3CozBJb3JYC/QUYZTnKxP+f8oWRX4rHP5EurWxqAHTSwUCjlNKa1w==", - "dev": true, + "devOptional": true, "license": "MIT", "dependencies": { "punycode": "^2.3.1" @@ -14529,7 +14452,7 @@ "version": "2.3.1", "resolved": "https://registry.npmjs.org/punycode/-/punycode-2.3.1.tgz", "integrity": "sha512-vYt7UD1U9Wg6138shLtLOvdAu+8DsC/ilFtEVHcH+wydcSpNE20AfSOduf6MkRFahL5FY7X1oU7nKVZFtfq8Fg==", - "dev": true, + "devOptional": true, "license": "MIT", "engines": { "node": ">=6" @@ -14564,7 +14487,7 @@ "version": "2.2.0", "resolved": "https://registry.npmjs.org/querystringify/-/querystringify-2.2.0.tgz", "integrity": "sha512-FIqgj2EUvTa7R50u0rGsyTftzjYmv/a3hO345bZNrqabNqjtgiDMgmo4mkUjd+nzU5oF3dClKqFIPUKybUyqoQ==", - "dev": true, + "devOptional": true, "license": "MIT" }, "node_modules/queue-microtask": { @@ -15070,7 +14993,7 @@ "version": "1.0.0", "resolved": "https://registry.npmjs.org/requires-port/-/requires-port-1.0.0.tgz", "integrity": "sha512-KigOCHcocU3XODJxsu8i/j8T9tzT4adHiecwORRQ0ZZFcp7ahwXuRU1m+yuO90C5ZUyGeGfocHDI14M3L3yDAQ==", - "dev": true, + "devOptional": true, "license": "MIT" }, "node_modules/resolve": { @@ -15127,7 +15050,7 @@ "version": "1.0.0", "resolved": "https://registry.npmjs.org/resolve-pkg-maps/-/resolve-pkg-maps-1.0.0.tgz", "integrity": "sha512-seS2Tj26TBVOC2NIc2rOe2y2ZO7efxITtLZcGSOnHHNOQ7CkiUBfw0Iw2ck6xkIhPwLhKNLS8BO+hEpngQlqzw==", - "dev": true, + "devOptional": true, "license": "MIT", "funding": { "url": "https://github.com/privatenumber/resolve-pkg-maps?sponsor=1" @@ -15305,7 +15228,6 @@ "version": "4.53.2", "resolved": "https://registry.npmjs.org/rollup/-/rollup-4.53.2.tgz", "integrity": "sha512-MHngMYwGJVi6Fmnk6ISmnk7JAHRNF0UkuucA0CUW3N3a4KnONPEZz+vUanQP/ZC/iY1Qkf3bwPWzyY84wEks1g==", - "dev": true, "license": "MIT", "dependencies": { "@types/estree": "1.0.8" @@ -15808,7 +15730,6 @@ "version": "2.0.0", "resolved": "https://registry.npmjs.org/siginfo/-/siginfo-2.0.0.tgz", "integrity": "sha512-ybx0WO1/8bSBLEWXZvEd7gMW3Sn3JFlW3TvX1nREbDLRNQNaeNN8WK0meBwPdAaOI7TtRRRJn/Es1zhrrCHu7g==", - "dev": true, "license": "ISC" }, "node_modules/signal-exit": { @@ -15966,7 +15887,6 @@ "version": "1.2.1", "resolved": "https://registry.npmjs.org/source-map-js/-/source-map-js-1.2.1.tgz", "integrity": "sha512-UXWMKhLOwVKb728IUtQPXxfYU+usdybtUrK/8uGE8CQMvrhOpwvzDBwj0QhSL7MQc7vIsISBG8VQ8+IDQxpfQA==", - "dev": true, "license": "BSD-3-Clause", "engines": { "node": ">=0.10.0" @@ -16045,7 +15965,6 @@ "version": "0.0.2", "resolved": "https://registry.npmjs.org/stackback/-/stackback-0.0.2.tgz", "integrity": "sha512-1XMJE5fQo1jGH6Y/7ebnwPOBEkIEnT4QF32d5R1+VXdXveM0IBMJt8zfaxX1P3QhVwrYe+576+jkANtSS2mBbw==", - "dev": true, "license": "MIT" }, "node_modules/statuses": { @@ -16061,7 +15980,6 @@ "version": "3.10.0", "resolved": "https://registry.npmjs.org/std-env/-/std-env-3.10.0.tgz", "integrity": "sha512-5GS12FdOZNliM5mAOxFRg7Ir0pWz8MdpYm6AY6VPkGpbA7ZzmbzNcBJQ0GPvvyWgcY7QAhCgf9Uy89I03faLkg==", - "dev": true, "license": "MIT" }, "node_modules/stop-iteration-iterator": { @@ -16109,7 +16027,7 @@ "version": "0.5.1", "resolved": "https://registry.npmjs.org/strict-event-emitter/-/strict-event-emitter-0.5.1.tgz", "integrity": "sha512-vMgjE/GGEPEFnhFub6pa4FmJBRBVOLpIII2hvCZ8Kzb7K0hlHo7mQv6xYrBvCL2LtAIBwFUK8wvuJgTVSQ5MFQ==", - "dev": true, + "devOptional": true, "license": "MIT" }, "node_modules/string_decoder": { @@ -16382,7 +16300,6 @@ "version": "3.1.0", "resolved": "https://registry.npmjs.org/strip-literal/-/strip-literal-3.1.0.tgz", "integrity": "sha512-8r3mkIM/2+PpjHoOtiAW8Rg3jJLHaV7xPwG+YRGrv6FP0wwk/toTpATxWYOW0BKdWwl82VT2tFYi5DlROa0Mxg==", - "dev": true, "license": "MIT", "dependencies": { "js-tokens": "^9.0.1" @@ -16938,7 +16855,6 @@ "version": "2.9.0", "resolved": "https://registry.npmjs.org/tinybench/-/tinybench-2.9.0.tgz", "integrity": "sha512-0+DUvqWMValLmha6lr4kD8iAMK1HzV0/aKnCtWb9v9641TnP/MFb7Pc2bxoxQjTXAErryXVgUOfv2YqNllqGeg==", - "dev": true, "license": "MIT" }, "node_modules/tinycolor2": { @@ -16951,14 +16867,12 @@ "version": "0.3.2", "resolved": "https://registry.npmjs.org/tinyexec/-/tinyexec-0.3.2.tgz", "integrity": "sha512-KQQR9yN7R5+OSwaK0XQoj22pwHoTlgYqmUscPYoknOoWCWfj/5/ABTMRi69FrKU5ffPVh5QcFikpWJI/P1ocHA==", - "dev": true, "license": "MIT" }, "node_modules/tinyglobby": { "version": "0.2.15", "resolved": "https://registry.npmjs.org/tinyglobby/-/tinyglobby-0.2.15.tgz", "integrity": "sha512-j2Zq4NyQYG5XMST4cbs02Ak8iJUdxRM0XI5QyxXuZOzKOINmWurp3smXu3y5wDcJrptwpSjgXHzIQxR0omXljQ==", - "dev": true, "license": "MIT", "dependencies": { "fdir": "^6.5.0", @@ -16975,7 +16889,6 @@ "version": "6.5.0", "resolved": "https://registry.npmjs.org/fdir/-/fdir-6.5.0.tgz", "integrity": "sha512-tIbYtZbucOs0BRGqPJkshJUYdL+SDH7dVM8gjy+ERp3WAUjLEFJE+02kanyHtwjWOnwrKYBiwAmM0p4kLJAnXg==", - "dev": true, "license": "MIT", "engines": { "node": ">=12.0.0" @@ -16993,7 +16906,6 @@ "version": "4.0.3", "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-4.0.3.tgz", "integrity": "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q==", - "dev": true, "license": "MIT", "engines": { "node": ">=12" @@ -17016,7 +16928,6 @@ "version": "1.1.1", "resolved": "https://registry.npmjs.org/tinypool/-/tinypool-1.1.1.tgz", "integrity": "sha512-Zba82s87IFq9A9XmjiX5uZA/ARWDrB03OHlq+Vw1fSdt0I+4/Kutwy8BP4Y/y/aORMo61FQ0vIb5j44vSo5Pkg==", - "dev": true, "license": "MIT", "engines": { "node": "^18.0.0 || >=20.0.0" @@ -17026,7 +16937,6 @@ "version": "2.0.0", "resolved": "https://registry.npmjs.org/tinyrainbow/-/tinyrainbow-2.0.0.tgz", "integrity": "sha512-op4nsTR47R6p0vMUUoYl/a+ljLFVtlfaXkLQmqfLR1qHma1h/ysYk4hEXZ880bf2CYgTskvTa/e196Vd5dDQXw==", - "dev": true, "license": "MIT", "engines": { "node": ">=14.0.0" @@ -17036,7 +16946,6 @@ "version": "4.0.4", "resolved": "https://registry.npmjs.org/tinyspy/-/tinyspy-4.0.4.tgz", "integrity": "sha512-azl+t0z7pw/z958Gy9svOTuzqIk6xq+NSheJzn5MMWtWTFywIacg2wUlzKFGtt3cthx0r2SxMK0yzJOR0IES7Q==", - "dev": true, "license": "MIT", "engines": { "node": ">=14.0.0" @@ -17227,7 +17136,7 @@ "version": "4.20.3", "resolved": "https://registry.npmjs.org/tsx/-/tsx-4.20.3.tgz", "integrity": "sha512-qjbnuR9Tr+FJOMBqJCW5ehvIo/buZq7vH7qD7JziU98h6l3qGy0a/yPFjwO+y0/T7GFpNgNAvEcPPVfyT8rrPQ==", - "dev": true, + "devOptional": true, "license": "MIT", "dependencies": { "esbuild": "~0.25.0", @@ -17410,7 +17319,7 @@ "version": "5.8.3", "resolved": "https://registry.npmjs.org/typescript/-/typescript-5.8.3.tgz", "integrity": "sha512-p1diW6TqL9L07nNxvRMM7hMMw4c5XOo/1ibL4aAIGmSAt9slTE1Xgw5KWuof2uTOvCg9BY7ZRi+GaF+7sfgPeQ==", - "dev": true, + "devOptional": true, "license": "Apache-2.0", "bin": { "tsc": "bin/tsc", @@ -17514,7 +17423,7 @@ "version": "0.2.0", "resolved": "https://registry.npmjs.org/universalify/-/universalify-0.2.0.tgz", "integrity": "sha512-CJ1QgKmNg3CwvAv/kOFmtnEN05f0D/cn9QntgNOQlQF9dgvVTHj3t+8JPdjqawCHk7V/KA+fbUqzZ9XWhcqPUg==", - "dev": true, + "devOptional": true, "license": "MIT", "engines": { "node": ">= 4.0.0" @@ -17550,7 +17459,7 @@ "version": "1.5.10", "resolved": "https://registry.npmjs.org/url-parse/-/url-parse-1.5.10.tgz", "integrity": "sha512-WypcfiRhfeUP9vvF0j6rw0J3hrWrw6iZv3+22h6iRMJ/8z1Tj6XfLP4DsUix5MhMPnXpiHDoKyoZ/bdCkwBCiQ==", - "dev": true, + "devOptional": true, "license": "MIT", "dependencies": { "querystringify": "^2.1.1", @@ -17628,7 +17537,6 @@ "version": "7.2.2", "resolved": "https://registry.npmjs.org/vite/-/vite-7.2.2.tgz", "integrity": "sha512-BxAKBWmIbrDgrokdGZH1IgkIk/5mMHDreLDmCJ0qpyJaAteP8NvMhkwr/ZCQNqNH97bw/dANTE9PDzqwJghfMQ==", - "dev": true, "license": "MIT", "dependencies": { "esbuild": "^0.25.0", @@ -17703,7 +17611,6 @@ "version": "3.2.4", "resolved": "https://registry.npmjs.org/vite-node/-/vite-node-3.2.4.tgz", "integrity": "sha512-EbKSKh+bh1E1IFxeO0pg1n4dvoOTt0UDiXMd/qn++r98+jPO1xtJilvXldeuQ8giIB5IkpjCgMleHMNEsGH6pg==", - "dev": true, "license": "MIT", "dependencies": { "cac": "^6.7.14", @@ -17726,7 +17633,6 @@ "version": "6.5.0", "resolved": "https://registry.npmjs.org/fdir/-/fdir-6.5.0.tgz", "integrity": "sha512-tIbYtZbucOs0BRGqPJkshJUYdL+SDH7dVM8gjy+ERp3WAUjLEFJE+02kanyHtwjWOnwrKYBiwAmM0p4kLJAnXg==", - "dev": true, "license": "MIT", "engines": { "node": ">=12.0.0" @@ -17744,7 +17650,6 @@ "version": "4.0.3", "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-4.0.3.tgz", "integrity": "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q==", - "dev": true, "license": "MIT", "engines": { "node": ">=12" @@ -17757,7 +17662,6 @@ "version": "3.2.4", "resolved": "https://registry.npmjs.org/vitest/-/vitest-3.2.4.tgz", "integrity": "sha512-LUCP5ev3GURDysTWiP47wRRUpLKMOfPh+yKTx3kVIEiu5KOMeqzpnYNsKyOoVrULivR8tLcks4+lga33Whn90A==", - "dev": true, "license": "MIT", "dependencies": { "@types/chai": "^5.2.2", @@ -17830,7 +17734,6 @@ "version": "4.0.3", "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-4.0.3.tgz", "integrity": "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q==", - "dev": true, "license": "MIT", "engines": { "node": ">=12" @@ -17993,7 +17896,6 @@ "version": "2.3.0", "resolved": "https://registry.npmjs.org/why-is-node-running/-/why-is-node-running-2.3.0.tgz", "integrity": "sha512-hUrmaWBdVDcxvYqnyh09zunKzROWjbZTiNy8dBEjkS7ehEDQibXJ7XvlmtbwuTclUiIyN+CyXQD4Vmko8fNm8w==", - "dev": true, "license": "MIT", "dependencies": { "siginfo": "^2.0.0", @@ -18263,7 +18165,7 @@ "version": "2.8.1", "resolved": "https://registry.npmjs.org/yaml/-/yaml-2.8.1.tgz", "integrity": "sha512-lcYcMxX2PO9XMGvAJkJ3OsNMw+/7FKes7/hgerGUYWIoWu5j/+YQqcZr5JnPZWzOsEBgMbSbiSTn/dv/69Mkpw==", - "dev": true, + "devOptional": true, "license": "ISC", "bin": { "yaml": "bin.mjs" @@ -18388,7 +18290,7 @@ "version": "2.1.2", "resolved": "https://registry.npmjs.org/yoctocolors-cjs/-/yoctocolors-cjs-2.1.2.tgz", "integrity": "sha512-cYVsTjKl8b+FrnidjibDWskAv7UKOfcwaVZdp/it9n1s9fU3IkgDbhdIRKCW4JDsAlECJY0ytoVPT3sK6kideA==", - "dev": true, + "devOptional": true, "license": "MIT", "engines": { "node": ">=18" @@ -19054,6 +18956,12 @@ "name": "@google/gemini-cli-test-utils", "version": "0.25.0-nightly.20260107.59a18e710", "license": "Apache-2.0", + "dependencies": { + "@google/gemini-cli-core": "file:../core", + "@lydell/node-pty": "1.1.0", + "strip-ansi": "^7.1.2", + "vitest": "^3.2.4" + }, "devDependencies": { "typescript": "^5.3.3" }, diff --git a/package.json b/package.json index f5c10deaf5..b69c37d69b 100644 --- a/package.json +++ b/package.json @@ -41,6 +41,8 @@ "test": "npm run test --workspaces --if-present", "test:ci": "npm run test:ci --workspaces --if-present && npm run test:scripts", "test:scripts": "vitest run --config ./scripts/tests/vitest.config.ts", + "test:always_passing_evals": "vitest run --config evals/vitest.config.ts", + "test:all_evals": "cross-env RUN_EVALS=1 vitest run --config evals/vitest.config.ts", "test:e2e": "cross-env VERBOSE=true KEEP_OUTPUT=true npm run test:integration:sandbox:none", "test:integration:all": "npm run test:integration:sandbox:none && npm run test:integration:sandbox:docker && npm run test:integration:sandbox:podman", "test:integration:sandbox:none": "cross-env GEMINI_SANDBOX=false vitest run --root ./integration-tests", diff --git a/packages/test-utils/package.json b/packages/test-utils/package.json index dddb6c01f2..a05464d3e5 100644 --- a/packages/test-utils/package.json +++ b/packages/test-utils/package.json @@ -9,6 +9,12 @@ "build": "node ../../scripts/build_package.js", "typecheck": "tsc --noEmit" }, + "dependencies": { + "@google/gemini-cli-core": "file:../core", + "@lydell/node-pty": "1.1.0", + "strip-ansi": "^7.1.2", + "vitest": "^3.2.4" + }, "devDependencies": { "typescript": "^5.3.3" }, diff --git a/packages/test-utils/src/index.ts b/packages/test-utils/src/index.ts index b8af8aa7d6..c1f2f09d3e 100644 --- a/packages/test-utils/src/index.ts +++ b/packages/test-utils/src/index.ts @@ -5,3 +5,4 @@ */ export * from './file-system-test-helpers.js'; +export * from './test-rig.js'; diff --git a/packages/test-utils/src/test-rig.ts b/packages/test-utils/src/test-rig.ts new file mode 100644 index 0000000000..8b55637715 --- /dev/null +++ b/packages/test-utils/src/test-rig.ts @@ -0,0 +1,1227 @@ +/** + * @license + * Copyright 2025 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { expect } from 'vitest'; +import { execSync, spawn, type ChildProcess } from 'node:child_process'; +import { mkdirSync, writeFileSync, readFileSync } from 'node:fs'; +import { join, dirname } from 'node:path'; +import { fileURLToPath } from 'node:url'; +import { env } from 'node:process'; +import { setTimeout as sleep } from 'node:timers/promises'; +import { DEFAULT_GEMINI_MODEL, GEMINI_DIR } from '@google/gemini-cli-core'; +import fs from 'node:fs'; +import * as pty from '@lydell/node-pty'; +import stripAnsi from 'strip-ansi'; +import * as os from 'node:os'; + +const __dirname = dirname(fileURLToPath(import.meta.url)); +const BUNDLE_PATH = join(__dirname, '..', '..', '..', 'bundle/gemini.js'); + +// Get timeout based on environment +export function getDefaultTimeout() { + if (env['CI']) return 60000; // 1 minute in CI + if (env['GEMINI_SANDBOX']) return 30000; // 30s in containers + return 15000; // 15s locally +} + +export async function poll( + predicate: () => boolean, + timeout: number, + interval: number, +): Promise { + const startTime = Date.now(); + let attempts = 0; + while (Date.now() - startTime < timeout) { + attempts++; + const result = predicate(); + if (env['VERBOSE'] === 'true' && attempts % 5 === 0) { + console.log( + `Poll attempt ${attempts}: ${result ? 'success' : 'waiting...'}`, + ); + } + if (result) { + return true; + } + await sleep(interval); + } + if (env['VERBOSE'] === 'true') { + console.log(`Poll timed out after ${attempts} attempts`); + } + return false; +} + +export function sanitizeTestName(name: string) { + return name + .toLowerCase() + .replace(/[^a-z0-9]/g, '-') + .replace(/-+/g, '-'); +} + +// Helper to create detailed error messages +export function createToolCallErrorMessage( + expectedTools: string | string[], + foundTools: string[], + result: string, +) { + const expectedStr = Array.isArray(expectedTools) + ? expectedTools.join(' or ') + : expectedTools; + return ( + `Expected to find ${expectedStr} tool call(s). ` + + `Found: ${foundTools.length > 0 ? foundTools.join(', ') : 'none'}. ` + + `Output preview: ${result ? result.substring(0, 200) + '...' : 'no output'}` + ); +} + +// Helper to print debug information when tests fail +export function printDebugInfo( + rig: TestRig, + result: string, + context: Record = {}, +) { + console.error('Test failed - Debug info:'); + console.error('Result length:', result.length); + console.error('Result (first 500 chars):', result.substring(0, 500)); + console.error( + 'Result (last 500 chars):', + result.substring(result.length - 500), + ); + + // Print any additional context provided + Object.entries(context).forEach(([key, value]) => { + console.error(`${key}:`, value); + }); + + // Check what tools were actually called + const allTools = rig.readToolLogs(); + console.error( + 'All tool calls found:', + allTools.map((t) => t.toolRequest.name), + ); + + return allTools; +} + +// Helper to validate model output and warn about unexpected content +export function validateModelOutput( + result: string, + expectedContent: string | (string | RegExp)[] | null = null, + testName = '', +) { + // First, check if there's any output at all (this should fail the test if missing) + if (!result || result.trim().length === 0) { + throw new Error('Expected LLM to return some output'); + } + + // If expectedContent is provided, check for it and warn if missing + if (expectedContent) { + const contents = Array.isArray(expectedContent) + ? expectedContent + : [expectedContent]; + const missingContent = contents.filter((content) => { + if (typeof content === 'string') { + return !result.toLowerCase().includes(content.toLowerCase()); + } else if (content instanceof RegExp) { + return !content.test(result); + } + return false; + }); + + if (missingContent.length > 0) { + console.warn( + `Warning: LLM did not include expected content in response: ${missingContent.join( + ', ', + )}.`, + 'This is not ideal but not a test failure.', + ); + console.warn( + 'The tool was called successfully, which is the main requirement.', + ); + console.warn('Expected content:', expectedContent); + console.warn('Actual output:', result); + return false; + } else if (env['VERBOSE'] === 'true') { + console.log(`${testName}: Model output validated successfully.`); + } + return true; + } + + return true; +} + +export interface ParsedLog { + attributes?: { + 'event.name'?: string; + function_name?: string; + function_args?: string; + success?: boolean; + duration_ms?: number; + request_text?: string; + hook_event_name?: string; + hook_name?: string; + hook_input?: Record; + hook_output?: Record; + exit_code?: number; + stdout?: string; + stderr?: string; + error?: string; + }; + scopeMetrics?: { + metrics: { + descriptor: { + name: string; + }; + }[]; + }[]; +} + +export class InteractiveRun { + ptyProcess: pty.IPty; + public output = ''; + + constructor(ptyProcess: pty.IPty) { + this.ptyProcess = ptyProcess; + ptyProcess.onData((data) => { + this.output += data; + if (env['KEEP_OUTPUT'] === 'true' || env['VERBOSE'] === 'true') { + process.stdout.write(data); + } + }); + } + + async expectText(text: string, timeout?: number) { + if (!timeout) { + timeout = getDefaultTimeout(); + } + await poll( + () => stripAnsi(this.output).toLowerCase().includes(text.toLowerCase()), + timeout, + 200, + ); + expect(stripAnsi(this.output).toLowerCase()).toContain(text.toLowerCase()); + } + + // This types slowly to make sure command is correct, but only work for short + // commands that are not multi-line, use sendKeys to type long prompts + async type(text: string) { + let typedSoFar = ''; + for (const char of text) { + if (char === '\r') { + // wait >30ms before `enter` to avoid fast return conversion + // from bufferFastReturn() in KeypressContent.tsx + await sleep(50); + } + + this.ptyProcess.write(char); + typedSoFar += char; + + // Wait for the typed sequence so far to be echoed back. + const found = await poll( + () => stripAnsi(this.output).includes(typedSoFar), + 5000, // 5s timeout per character (generous for CI) + 10, // check frequently + ); + + if (!found) { + throw new Error( + `Timed out waiting for typed text to appear in output: "${typedSoFar}".\nStripped output:\n${stripAnsi( + this.output, + )}`, + ); + } + } + } + + // Types an entire string at once, necessary for some things like commands + // but may run into paste detection issues for larger strings. + async sendText(text: string) { + this.ptyProcess.write(text); + await sleep(5); + } + + // Simulates typing a string one character at a time to avoid paste detection. + async sendKeys(text: string) { + const delay = 5; + for (const char of text) { + this.ptyProcess.write(char); + await sleep(delay); + } + } + + async kill() { + this.ptyProcess.kill(); + } + + expectExit(): Promise { + return new Promise((resolve, reject) => { + const timer = setTimeout( + () => + reject( + new Error(`Test timed out: process did not exit within a minute.`), + ), + 60000, + ); + this.ptyProcess.onExit(({ exitCode }) => { + clearTimeout(timer); + resolve(exitCode); + }); + }); + } +} + +export class TestRig { + testDir: string | null = null; + homeDir: string | null = null; + testName?: string; + _lastRunStdout?: string; + // Path to the copied fake responses file for this test. + fakeResponsesPath?: string; + // Original fake responses file path for rewriting goldens in record mode. + originalFakeResponsesPath?: string; + private _interactiveRuns: InteractiveRun[] = []; + private _spawnedProcesses: ChildProcess[] = []; + + setup( + testName: string, + options: { + settings?: Record; + fakeResponsesPath?: string; + } = {}, + ) { + this.testName = testName; + const sanitizedName = sanitizeTestName(testName); + const testFileDir = + env['INTEGRATION_TEST_FILE_DIR'] || join(os.tmpdir(), 'gemini-cli-tests'); + this.testDir = join(testFileDir, sanitizedName); + this.homeDir = join(testFileDir, sanitizedName + '-home'); + mkdirSync(this.testDir, { recursive: true }); + mkdirSync(this.homeDir, { recursive: true }); + if (options.fakeResponsesPath) { + this.fakeResponsesPath = join(this.testDir, 'fake-responses.json'); + this.originalFakeResponsesPath = options.fakeResponsesPath; + if (process.env['REGENERATE_MODEL_GOLDENS'] !== 'true') { + fs.copyFileSync(options.fakeResponsesPath, this.fakeResponsesPath); + } + } + + // Create a settings file to point the CLI to the local collector + this._createSettingsFile(options.settings); + } + + private _createSettingsFile(overrideSettings?: Record) { + const projectGeminiDir = join(this.testDir!, GEMINI_DIR); + mkdirSync(projectGeminiDir, { recursive: true }); + + // In sandbox mode, use an absolute path for telemetry inside the container + // The container mounts the test directory at the same path as the host + const telemetryPath = join(this.homeDir!, 'telemetry.log'); // Always use home directory for telemetry + + const settings = { + general: { + // Nightly releases sometimes becomes out of sync with local code and + // triggers auto-update, which causes tests to fail. + disableAutoUpdate: true, + previewFeatures: false, + }, + telemetry: { + enabled: true, + target: 'local', + otlpEndpoint: '', + outfile: telemetryPath, + }, + security: { + auth: { + selectedType: 'gemini-api-key', + }, + }, + ui: { + useAlternateBuffer: true, + }, + model: { + name: DEFAULT_GEMINI_MODEL, + }, + sandbox: + env['GEMINI_SANDBOX'] !== 'false' ? env['GEMINI_SANDBOX'] : false, + // Don't show the IDE connection dialog when running from VsCode + ide: { enabled: false, hasSeenNudge: true }, + ...overrideSettings, // Allow tests to override/add settings + }; + writeFileSync( + join(projectGeminiDir, 'settings.json'), + JSON.stringify(settings, null, 2), + ); + } + + createFile(fileName: string, content: string) { + const filePath = join(this.testDir!, fileName); + writeFileSync(filePath, content); + return filePath; + } + + mkdir(dir: string) { + mkdirSync(join(this.testDir!, dir), { recursive: true }); + } + + sync() { + if (os.platform() === 'win32') return; + // ensure file system is done before spawning + execSync('sync', { cwd: this.testDir! }); + } + + /** + * The command and args to use to invoke Gemini CLI. Allows us to switch + * between using the bundled gemini.js (the default) and using the installed + * 'gemini' (used to verify npm bundles). + */ + private _getCommandAndArgs(extraInitialArgs: string[] = []): { + command: string; + initialArgs: string[]; + } { + const isNpmReleaseTest = + env['INTEGRATION_TEST_USE_INSTALLED_GEMINI'] === 'true'; + const command = isNpmReleaseTest ? 'gemini' : 'node'; + const initialArgs = isNpmReleaseTest + ? extraInitialArgs + : [BUNDLE_PATH, ...extraInitialArgs]; + if (this.fakeResponsesPath) { + if (process.env['REGENERATE_MODEL_GOLDENS'] === 'true') { + initialArgs.push('--record-responses', this.fakeResponsesPath); + } else { + initialArgs.push('--fake-responses', this.fakeResponsesPath); + } + } + return { command, initialArgs }; + } + + run(options: { + args?: string | string[]; + stdin?: string; + stdinDoesNotEnd?: boolean; + yolo?: boolean; + timeout?: number; + env?: Record; + }): Promise { + const yolo = options.yolo !== false; + const { command, initialArgs } = this._getCommandAndArgs( + yolo ? ['--yolo'] : [], + ); + const commandArgs = [...initialArgs]; + const execOptions: { + cwd: string; + encoding: 'utf-8'; + input?: string; + } = { + cwd: this.testDir!, + encoding: 'utf-8', + }; + + if (options.args) { + if (Array.isArray(options.args)) { + commandArgs.push(...options.args); + } else { + commandArgs.push(options.args); + } + } + + if (options.stdin) { + execOptions.input = options.stdin; + } + + const child = spawn(command, commandArgs, { + cwd: this.testDir!, + stdio: 'pipe', + env: { + ...process.env, + GEMINI_CLI_HOME: this.homeDir!, + ...options.env, + }, + }); + this._spawnedProcesses.push(child); + + let stdout = ''; + let stderr = ''; + + // Handle stdin if provided + if (execOptions.input) { + child.stdin!.write(execOptions.input); + } + + if (!options.stdinDoesNotEnd) { + child.stdin!.end(); + } + + child.stdout!.setEncoding('utf8'); + child.stdout!.on('data', (data: string) => { + stdout += data; + if (env['KEEP_OUTPUT'] === 'true' || env['VERBOSE'] === 'true') { + process.stdout.write(data); + } + }); + + child.stderr!.setEncoding('utf8'); + child.stderr!.on('data', (data: string) => { + stderr += data; + if (env['KEEP_OUTPUT'] === 'true' || env['VERBOSE'] === 'true') { + process.stderr.write(data); + } + }); + + const timeout = options.timeout ?? 120000; + const promise = new Promise((resolve, reject) => { + const timer = setTimeout(() => { + child.kill('SIGKILL'); + reject( + new Error( + `Process timed out after ${timeout}ms.\nStdout:\n${stdout}\nStderr:\n${stderr}`, + ), + ); + }, timeout); + + child.on('error', (err) => { + clearTimeout(timer); + reject(err); + }); + + child.on('close', (code: number) => { + clearTimeout(timer); + if (code === 0) { + // Store the raw stdout for Podman telemetry parsing + this._lastRunStdout = stdout; + + // Filter out telemetry output when running with Podman + const result = this._filterPodmanTelemetry(stdout); + + // Check if this is a JSON output test - if so, don't include stderr + // as it would corrupt the JSON + const isJsonOutput = + commandArgs.includes('--output-format') && + commandArgs.includes('json'); + + // If we have stderr output and it's not a JSON test, include that also + const finalResult = + stderr && !isJsonOutput + ? `${result}\n\nStdErr:\n${stderr}` + : result; + + resolve(finalResult); + } else { + reject(new Error(`Process exited with code ${code}:\n${stderr}`)); + } + }); + }); + + return promise; + } + + private _filterPodmanTelemetry(stdout: string): string { + if (env['GEMINI_SANDBOX'] !== 'podman') { + return stdout; + } + + // Remove telemetry JSON objects from output + // They are multi-line JSON objects that start with { and contain telemetry fields + const lines = stdout.split(os.EOL); + const filteredLines = []; + let inTelemetryObject = false; + let braceDepth = 0; + + for (const line of lines) { + if (!inTelemetryObject && line.trim() === '{') { + // Check if this might be start of telemetry object + inTelemetryObject = true; + braceDepth = 1; + } else if (inTelemetryObject) { + // Count braces to track nesting + for (const char of line) { + if (char === '{') braceDepth++; + else if (char === '}') braceDepth--; + } + + // Check if we've closed all braces + if (braceDepth === 0) { + inTelemetryObject = false; + // Skip this line (the closing brace) + continue; + } + } else { + // Not in telemetry object, keep the line + filteredLines.push(line); + } + } + + return filteredLines.join('\n'); + } + + runCommand( + args: string[], + options: { + stdin?: string; + timeout?: number; + env?: Record; + } = {}, + ): Promise { + const { command, initialArgs } = this._getCommandAndArgs(); + const commandArgs = [...initialArgs, ...args]; + + const child = spawn(command, commandArgs, { + cwd: this.testDir!, + stdio: 'pipe', + env: { + ...process.env, + GEMINI_CLI_HOME: this.homeDir!, + ...options.env, + }, + }); + this._spawnedProcesses.push(child); + + let stdout = ''; + let stderr = ''; + + if (options.stdin) { + child.stdin!.write(options.stdin); + child.stdin!.end(); + } + + child.stdout!.setEncoding('utf8'); + child.stdout!.on('data', (data: string) => { + stdout += data; + if (env['KEEP_OUTPUT'] === 'true' || env['VERBOSE'] === 'true') { + process.stdout.write(data); + } + }); + + child.stderr!.setEncoding('utf8'); + child.stderr!.on('data', (data: string) => { + stderr += data; + if (env['KEEP_OUTPUT'] === 'true' || env['VERBOSE'] === 'true') { + process.stderr.write(data); + } + }); + + const timeout = options.timeout ?? 120000; + const promise = new Promise((resolve, reject) => { + const timer = setTimeout(() => { + child.kill('SIGKILL'); + reject( + new Error( + `Process timed out after ${timeout}ms.\nStdout:\n${stdout}\nStderr:\n${stderr}`, + ), + ); + }, timeout); + + child.on('error', (err) => { + clearTimeout(timer); + reject(err); + }); + + child.on('close', (code: number) => { + clearTimeout(timer); + if (code === 0) { + this._lastRunStdout = stdout; + const result = this._filterPodmanTelemetry(stdout); + + // Check if this is a JSON output test - if so, don't include stderr + // as it would corrupt the JSON + const isJsonOutput = + commandArgs.includes('--output-format') && + commandArgs.includes('json'); + + const finalResult = + stderr && !isJsonOutput + ? `${result}\n\nStdErr:\n${stderr}` + : result; + resolve(finalResult); + } else { + reject(new Error(`Process exited with code ${code}:\n${stderr}`)); + } + }); + }); + + return promise; + } + + readFile(fileName: string) { + const filePath = join(this.testDir!, fileName); + const content = readFileSync(filePath, 'utf-8'); + if (env['KEEP_OUTPUT'] === 'true' || env['VERBOSE'] === 'true') { + console.log(`--- FILE: ${filePath} ---`); + console.log(content); + console.log(`--- END FILE: ${filePath} ---`); + } + return content; + } + + async cleanup() { + // Kill any interactive runs that are still active + for (const run of this._interactiveRuns) { + try { + await run.kill(); + } catch (error) { + if (env['VERBOSE'] === 'true') { + console.warn('Failed to kill interactive run during cleanup:', error); + } + } + } + this._interactiveRuns = []; + + // Kill any other spawned processes that are still running + for (const child of this._spawnedProcesses) { + if (child.exitCode === null && child.signalCode === null) { + try { + child.kill('SIGKILL'); + } catch (error) { + if (env['VERBOSE'] === 'true') { + console.warn( + 'Failed to kill spawned process during cleanup:', + error, + ); + } + } + } + } + this._spawnedProcesses = []; + + if ( + process.env['REGENERATE_MODEL_GOLDENS'] === 'true' && + this.fakeResponsesPath + ) { + fs.copyFileSync(this.fakeResponsesPath, this.originalFakeResponsesPath!); + } + // Clean up test directory and home directory + if (this.testDir && !env['KEEP_OUTPUT']) { + try { + fs.rmSync(this.testDir, { recursive: true, force: true }); + } catch (error) { + // Ignore cleanup errors + if (env['VERBOSE'] === 'true') { + console.warn('Cleanup warning:', (error as Error).message); + } + } + } + if (this.homeDir && !env['KEEP_OUTPUT']) { + try { + fs.rmSync(this.homeDir, { recursive: true, force: true }); + } catch (error) { + // Ignore cleanup errors + if (env['VERBOSE'] === 'true') { + console.warn('Cleanup warning:', (error as Error).message); + } + } + } + } + + async waitForTelemetryReady() { + // Telemetry is always written to the test directory + const logFilePath = join(this.homeDir!, 'telemetry.log'); + + if (!logFilePath) return; + + // Wait for telemetry file to exist and have content + await poll( + () => { + if (!fs.existsSync(logFilePath)) return false; + try { + const content = readFileSync(logFilePath, 'utf-8'); + // Check if file has meaningful content (at least one complete JSON object) + return content.includes('"scopeMetrics"'); + } catch { + return false; + } + }, + 2000, // 2 seconds max - reduced since telemetry should flush on exit now + 100, // check every 100ms + ); + } + + async waitForTelemetryEvent(eventName: string, timeout?: number) { + if (!timeout) { + timeout = getDefaultTimeout(); + } + + await this.waitForTelemetryReady(); + + return poll( + () => { + const logs = this._readAndParseTelemetryLog(); + return logs.some( + (logData) => + logData.attributes && + logData.attributes['event.name'] === `gemini_cli.${eventName}`, + ); + }, + timeout, + 100, + ); + } + + async waitForToolCall( + toolName: string, + timeout?: number, + matchArgs?: (args: string) => boolean, + ) { + // Use environment-specific timeout + if (!timeout) { + timeout = getDefaultTimeout(); + } + + // Wait for telemetry to be ready before polling for tool calls + await this.waitForTelemetryReady(); + + return poll( + () => { + const toolLogs = this.readToolLogs(); + return toolLogs.some( + (log) => + log.toolRequest.name === toolName && + (matchArgs?.call(this, log.toolRequest.args) ?? true), + ); + }, + timeout, + 100, + ); + } + + async expectToolCallSuccess( + toolNames: string[], + timeout?: number, + matchArgs?: (args: string) => boolean, + ) { + // Use environment-specific timeout + if (!timeout) { + timeout = getDefaultTimeout(); + } + + // Wait for telemetry to be ready before polling for tool calls + await this.waitForTelemetryReady(); + + const success = await poll( + () => { + const toolLogs = this.readToolLogs(); + return toolNames.some((name) => + toolLogs.some( + (log) => + log.toolRequest.name === name && + log.toolRequest.success && + (matchArgs?.call(this, log.toolRequest.args) ?? true), + ), + ); + }, + timeout, + 100, + ); + + expect( + success, + `Expected to find successful toolCalls for ${JSON.stringify(toolNames)}`, + ).toBe(true); + } + + async waitForAnyToolCall(toolNames: string[], timeout?: number) { + if (!timeout) { + timeout = getDefaultTimeout(); + } + + // Wait for telemetry to be ready before polling for tool calls + await this.waitForTelemetryReady(); + + return poll( + () => { + const toolLogs = this.readToolLogs(); + return toolNames.some((name) => + toolLogs.some((log) => log.toolRequest.name === name), + ); + }, + timeout, + 100, + ); + } + + _parseToolLogsFromStdout(stdout: string) { + const logs: { + timestamp: number; + toolRequest: { + name: string; + args: string; + success: boolean; + duration_ms: number; + }; + }[] = []; + + // The console output from Podman is JavaScript object notation, not JSON + // Look for tool call events in the output + // Updated regex to handle tool names with hyphens and underscores + const toolCallPattern = + /body:\s*'Tool call:\s*([\w-]+)\..*?Success:\s*(\w+)\..*?Duration:\s*(\d+)ms\.'/g; + const matches = [...stdout.matchAll(toolCallPattern)]; + + for (const match of matches) { + const toolName = match[1]; + const success = match[2] === 'true'; + const duration = parseInt(match[3], 10); + + // Try to find function_args nearby + const matchIndex = match.index || 0; + const contextStart = Math.max(0, matchIndex - 500); + const contextEnd = Math.min(stdout.length, matchIndex + 500); + const context = stdout.substring(contextStart, contextEnd); + + // Look for function_args in the context + let args = '{}'; + const argsMatch = context.match(/function_args:\s*'([^']+)'/); + if (argsMatch) { + args = argsMatch[1]; + } + + // Also try to find function_name to double-check + // Updated regex to handle tool names with hyphens and underscores + const nameMatch = context.match(/function_name:\s*'([\w-]+)'/); + const actualToolName = nameMatch ? nameMatch[1] : toolName; + + logs.push({ + timestamp: Date.now(), + toolRequest: { + name: actualToolName, + args: args, + success: success, + duration_ms: duration, + }, + }); + } + + // If no matches found with the simple pattern, try the JSON parsing approach + // in case the format changes + if (logs.length === 0) { + const lines = stdout.split(os.EOL); + let currentObject = ''; + let inObject = false; + let braceDepth = 0; + + for (const line of lines) { + if (!inObject && line.trim() === '{') { + inObject = true; + braceDepth = 1; + currentObject = line + '\n'; + } else if (inObject) { + currentObject += line + '\n'; + + // Count braces + for (const char of line) { + if (char === '{') braceDepth++; + else if (char === '}') braceDepth--; + } + + // If we've closed all braces, try to parse the object + if (braceDepth === 0) { + inObject = false; + try { + const obj = JSON.parse(currentObject); + + // Check for tool call in different formats + if ( + obj.body && + obj.body.includes('Tool call:') && + obj.attributes + ) { + const bodyMatch = obj.body.match(/Tool call: (\w+)\./); + if (bodyMatch) { + logs.push({ + timestamp: obj.timestamp || Date.now(), + toolRequest: { + name: bodyMatch[1], + args: obj.attributes.function_args || '{}', + success: obj.attributes.success !== false, + duration_ms: obj.attributes.duration_ms || 0, + }, + }); + } + } else if ( + obj.attributes && + obj.attributes['event.name'] === 'gemini_cli.tool_call' + ) { + logs.push({ + timestamp: obj.attributes['event.timestamp'], + toolRequest: { + name: obj.attributes.function_name, + args: obj.attributes.function_args, + success: obj.attributes.success, + duration_ms: obj.attributes.duration_ms, + }, + }); + } + } catch { + // Not valid JSON + } + currentObject = ''; + } + } + } + } + + return logs; + } + + private _readAndParseTelemetryLog(): ParsedLog[] { + // Telemetry is always written to the test directory + const logFilePath = join(this.homeDir!, 'telemetry.log'); + + if (!logFilePath || !fs.existsSync(logFilePath)) { + return []; + } + + const content = readFileSync(logFilePath, 'utf-8'); + + // Split the content into individual JSON objects + // They are separated by "}\n{" + const jsonObjects = content + .split(/}\n{/) + .map((obj, index, array) => { + // Add back the braces we removed during split + if (index > 0) obj = '{' + obj; + if (index < array.length - 1) obj = obj + '}'; + return obj.trim(); + }) + .filter((obj) => obj); + + const logs: ParsedLog[] = []; + + for (const jsonStr of jsonObjects) { + try { + const logData = JSON.parse(jsonStr); + logs.push(logData); + } catch (e) { + // Skip objects that aren't valid JSON + if (env['VERBOSE'] === 'true') { + console.error('Failed to parse telemetry object:', e); + } + } + } + + return logs; + } + + readToolLogs() { + // For Podman, first check if telemetry file exists and has content + // If not, fall back to parsing from stdout + if (env['GEMINI_SANDBOX'] === 'podman') { + // Try reading from file first + const logFilePath = join(this.homeDir!, 'telemetry.log'); + + if (fs.existsSync(logFilePath)) { + try { + const content = readFileSync(logFilePath, 'utf-8'); + if (content && content.includes('"event.name"')) { + // File has content, use normal file parsing + // Continue to the normal file parsing logic below + } else if (this._lastRunStdout) { + // File exists but is empty or doesn't have events, parse from stdout + return this._parseToolLogsFromStdout(this._lastRunStdout); + } + } catch { + // Error reading file, fall back to stdout + if (this._lastRunStdout) { + return this._parseToolLogsFromStdout(this._lastRunStdout); + } + } + } else if (this._lastRunStdout) { + // No file exists, parse from stdout + return this._parseToolLogsFromStdout(this._lastRunStdout); + } + } + + const parsedLogs = this._readAndParseTelemetryLog(); + const logs: { + toolRequest: { + name: string; + args: string; + success: boolean; + duration_ms: number; + }; + }[] = []; + + for (const logData of parsedLogs) { + // Look for tool call logs + if ( + logData.attributes && + logData.attributes['event.name'] === 'gemini_cli.tool_call' + ) { + const toolName = logData.attributes.function_name!; + logs.push({ + toolRequest: { + name: toolName, + args: logData.attributes.function_args ?? '{}', + success: logData.attributes.success ?? false, + duration_ms: logData.attributes.duration_ms ?? 0, + }, + }); + } + } + + return logs; + } + + readAllApiRequest(): ParsedLog[] { + const logs = this._readAndParseTelemetryLog(); + const apiRequests = logs.filter( + (logData) => + logData.attributes && + logData.attributes['event.name'] === `gemini_cli.api_request`, + ); + return apiRequests; + } + + readLastApiRequest(): ParsedLog | null { + const logs = this._readAndParseTelemetryLog(); + const apiRequests = logs.filter( + (logData) => + logData.attributes && + logData.attributes['event.name'] === `gemini_cli.api_request`, + ); + return apiRequests.pop() || null; + } + + async waitForMetric(metricName: string, timeout?: number) { + await this.waitForTelemetryReady(); + + const fullName = metricName.startsWith('gemini_cli.') + ? metricName + : `gemini_cli.${metricName}`; + + return poll( + () => { + const logs = this._readAndParseTelemetryLog(); + for (const logData of logs) { + if (logData.scopeMetrics) { + for (const scopeMetric of logData.scopeMetrics) { + for (const metric of scopeMetric.metrics) { + if (metric.descriptor.name === fullName) { + return true; + } + } + } + } + } + return false; + }, + timeout ?? getDefaultTimeout(), + 100, + ); + } + + readMetric(metricName: string): Record | null { + const logs = this._readAndParseTelemetryLog(); + for (const logData of logs) { + if (logData.scopeMetrics) { + for (const scopeMetric of logData.scopeMetrics) { + for (const metric of scopeMetric.metrics) { + if (metric.descriptor.name === `gemini_cli.${metricName}`) { + return metric; + } + } + } + } + } + return null; + } + + async runInteractive(options?: { + args?: string | string[]; + yolo?: boolean; + env?: Record; + }): Promise { + const yolo = options?.yolo !== false; + const { command, initialArgs } = this._getCommandAndArgs( + yolo ? ['--yolo'] : [], + ); + const commandArgs = [...initialArgs]; + + const envVars = { + ...process.env, + GEMINI_CLI_HOME: this.homeDir!, + ...options?.env, + }; + + const ptyOptions: pty.IPtyForkOptions = { + name: 'xterm-color', + cols: 80, + rows: 80, + cwd: this.testDir!, + env: Object.fromEntries( + Object.entries(envVars).filter(([, v]) => v !== undefined), + ) as { [key: string]: string }, + }; + + const executable = command === 'node' ? process.execPath : command; + const ptyProcess = pty.spawn(executable, commandArgs, ptyOptions); + + const run = new InteractiveRun(ptyProcess); + this._interactiveRuns.push(run); + // Wait for the app to be ready + await run.expectText(' Type your message or @path/to/file', 30000); + return run; + } + + readHookLogs() { + const parsedLogs = this._readAndParseTelemetryLog(); + const logs: { + hookCall: { + hook_event_name: string; + hook_name: string; + hook_input: Record; + hook_output: Record; + exit_code: number; + stdout: string; + stderr: string; + duration_ms: number; + success: boolean; + error: string; + }; + }[] = []; + + for (const logData of parsedLogs) { + // Look for tool call logs + if ( + logData.attributes && + logData.attributes['event.name'] === 'gemini_cli.hook_call' + ) { + logs.push({ + hookCall: { + hook_event_name: logData.attributes.hook_event_name ?? '', + hook_name: logData.attributes.hook_name ?? '', + hook_input: logData.attributes.hook_input ?? {}, + hook_output: logData.attributes.hook_output ?? {}, + exit_code: logData.attributes.exit_code ?? 0, + stdout: logData.attributes.stdout ?? '', + stderr: logData.attributes.stderr ?? '', + duration_ms: logData.attributes.duration_ms ?? 0, + success: logData.attributes.success ?? false, + error: logData.attributes.error ?? '', + }, + }); + } + } + + return logs; + } + + async pollCommand( + commandFn: () => Promise, + predicateFn: () => boolean, + timeout: number = 30000, + interval: number = 1000, + ) { + const startTime = Date.now(); + while (Date.now() - startTime < timeout) { + await commandFn(); + // Give it a moment to process + await sleep(500); + if (predicateFn()) { + return; + } + await sleep(interval); + } + throw new Error(`pollCommand timed out after ${timeout}ms`); + } +}