From 3183e4137a63767a36f6619baab98fcc057e315f Mon Sep 17 00:00:00 2001 From: Coco Sheng Date: Tue, 3 Feb 2026 14:05:26 -0500 Subject: [PATCH] fix(test): improve test isolation and enable subagent evaluations (#18138) --- evals/test-helper.ts | 58 ++++++++++++++++++++++++++++- packages/test-utils/src/test-rig.ts | 51 +++++++++++++++++-------- 2 files changed, 92 insertions(+), 17 deletions(-) diff --git a/evals/test-helper.ts b/evals/test-helper.ts index 65656742ef..37d79eb6a4 100644 --- a/evals/test-helper.ts +++ b/evals/test-helper.ts @@ -7,9 +7,13 @@ import { it } from 'vitest'; import fs from 'node:fs'; import path from 'node:path'; +import crypto from 'node:crypto'; import { execSync } from 'node:child_process'; import { TestRig } from '@google/gemini-cli-test-utils'; -import { createUnauthorizedToolError } from '@google/gemini-cli-core'; +import { + createUnauthorizedToolError, + parseAgentMarkdown, +} from '@google/gemini-cli-core'; export * from '@google/gemini-cli-test-utils'; @@ -42,10 +46,55 @@ export function evalTest(policy: EvalPolicy, evalCase: EvalCase) { rig.setup(evalCase.name, evalCase.params); if (evalCase.files) { + const acknowledgedAgents: Record> = {}; + const projectRoot = fs.realpathSync(rig.testDir!); + for (const [filePath, content] of Object.entries(evalCase.files)) { const fullPath = path.join(rig.testDir!, filePath); fs.mkdirSync(path.dirname(fullPath), { recursive: true }); fs.writeFileSync(fullPath, content); + + // If it's an agent file, calculate hash for acknowledgement + if ( + filePath.startsWith('.gemini/agents/') && + filePath.endsWith('.md') + ) { + const hash = crypto + .createHash('sha256') + .update(content) + .digest('hex'); + + try { + const agentDefs = await parseAgentMarkdown(fullPath, content); + if (agentDefs.length > 0) { + const agentName = agentDefs[0].name; + if (!acknowledgedAgents[projectRoot]) { + acknowledgedAgents[projectRoot] = {}; + } + acknowledgedAgents[projectRoot][agentName] = hash; + } + } catch (error) { + console.warn( + `Failed to parse agent for test acknowledgement: ${filePath}`, + error, + ); + } + } + } + + // Write acknowledged_agents.json to the home directory + if (Object.keys(acknowledgedAgents).length > 0) { + const ackPath = path.join( + rig.homeDir!, + '.gemini', + 'acknowledgments', + 'agents.json', + ); + fs.mkdirSync(path.dirname(ackPath), { recursive: true }); + fs.writeFileSync( + ackPath, + JSON.stringify(acknowledgedAgents, null, 2), + ); } const execOptions = { cwd: rig.testDir!, stdio: 'inherit' as const }; @@ -66,6 +115,7 @@ export function evalTest(policy: EvalPolicy, evalCase: EvalCase) { const result = await rig.run({ args: evalCase.prompt, approvalMode: evalCase.approvalMode ?? 'yolo', + timeout: evalCase.timeout, env: { GEMINI_CLI_ACTIVITY_LOG_FILE: activityLogFile, }, @@ -88,6 +138,11 @@ export function evalTest(policy: EvalPolicy, evalCase: EvalCase) { }); } + if (rig._lastRunStderr) { + const stderrFile = path.join(logDir, `${sanitizedName}.stderr.log`); + await fs.promises.writeFile(stderrFile, rig._lastRunStderr); + } + await fs.promises.writeFile( logFile, JSON.stringify(rig.readToolLogs(), null, 2), @@ -114,6 +169,7 @@ export interface EvalCase { name: string; params?: Record; prompt: string; + timeout?: number; files?: Record; approvalMode?: 'default' | 'auto_edit' | 'yolo' | 'plan'; assert: (rig: TestRig, result: string) => Promise; diff --git a/packages/test-utils/src/test-rig.ts b/packages/test-utils/src/test-rig.ts index b1dcadb097..1401304560 100644 --- a/packages/test-utils/src/test-rig.ts +++ b/packages/test-utils/src/test-rig.ts @@ -277,6 +277,7 @@ export class TestRig { homeDir: string | null = null; testName?: string; _lastRunStdout?: string; + _lastRunStderr?: string; // Path to the copied fake responses file for this test. fakeResponsesPath?: string; // Original fake responses file path for rewriting goldens in record mode. @@ -396,6 +397,34 @@ export class TestRig { return { command, initialArgs }; } + private _getCleanEnv( + extraEnv?: Record, + ): Record { + const cleanEnv: Record = { ...process.env }; + + // Clear all GEMINI_ environment variables that might interfere with tests + // except for those we explicitly want to keep or set. + for (const key of Object.keys(cleanEnv)) { + if ( + (key.startsWith('GEMINI_') || key.startsWith('GOOGLE_GEMINI_')) && + key !== 'GEMINI_API_KEY' && + key !== 'GOOGLE_API_KEY' && + key !== 'GEMINI_MODEL' && + key !== 'GEMINI_DEBUG' && + key !== 'GEMINI_CLI_TEST_VAR' && + !key.startsWith('GEMINI_CLI_ACTIVITY_LOG') + ) { + delete cleanEnv[key]; + } + } + + return { + ...cleanEnv, + GEMINI_CLI_HOME: this.homeDir!, + ...extraEnv, + }; + } + run(options: { args?: string | string[]; stdin?: string; @@ -433,11 +462,7 @@ export class TestRig { const child = spawn(command, commandArgs, { cwd: this.testDir!, stdio: 'pipe', - env: { - ...process.env, - GEMINI_CLI_HOME: this.homeDir!, - ...options.env, - }, + env: this._getCleanEnv(options.env), }); this._spawnedProcesses.push(child); @@ -487,6 +512,7 @@ export class TestRig { child.on('close', (code: number) => { clearTimeout(timer); + this._lastRunStderr = stderr; if (code === 0) { // Store the raw stdout for Podman telemetry parsing this._lastRunStdout = stdout; @@ -573,7 +599,7 @@ export class TestRig { const child = spawn(command, allArgs, { cwd: this.testDir!, stdio: 'pipe', - env: { ...process.env, GEMINI_CLI_HOME: this.homeDir! }, + env: this._getCleanEnv(), signal: options?.signal, }); this._spawnedProcesses.push(child); @@ -611,11 +637,7 @@ export class TestRig { const child = spawn(command, commandArgs, { cwd: this.testDir!, stdio: 'pipe', - env: { - ...process.env, - GEMINI_CLI_HOME: this.homeDir!, - ...options.env, - }, + env: this._getCleanEnv(options.env), }); this._spawnedProcesses.push(child); @@ -661,6 +683,7 @@ export class TestRig { child.on('close', (code: number) => { clearTimeout(timer); + this._lastRunStderr = stderr; if (code === 0) { this._lastRunStdout = stdout; const result = this._filterPodmanTelemetry(stdout); @@ -1179,11 +1202,7 @@ export class TestRig { ]); const commandArgs = [...initialArgs]; - const envVars = { - ...process.env, - GEMINI_CLI_HOME: this.homeDir!, - ...options?.env, - }; + const envVars = this._getCleanEnv(options?.env); const ptyOptions: pty.IPtyForkOptions = { name: 'xterm-color',