fix(test): improve test isolation and enable subagent evaluations (#18138)

This commit is contained in:
Coco Sheng
2026-02-03 14:05:26 -05:00
committed by GitHub
parent 4aa295994d
commit 3183e4137a
2 changed files with 92 additions and 17 deletions

View File

@@ -7,9 +7,13 @@
import { it } from 'vitest';
import fs from 'node:fs';
import path from 'node:path';
import crypto from 'node:crypto';
import { execSync } from 'node:child_process';
import { TestRig } from '@google/gemini-cli-test-utils';
import { createUnauthorizedToolError } from '@google/gemini-cli-core';
import {
createUnauthorizedToolError,
parseAgentMarkdown,
} from '@google/gemini-cli-core';
export * from '@google/gemini-cli-test-utils';
@@ -42,10 +46,55 @@ export function evalTest(policy: EvalPolicy, evalCase: EvalCase) {
rig.setup(evalCase.name, evalCase.params);
if (evalCase.files) {
const acknowledgedAgents: Record<string, Record<string, string>> = {};
const projectRoot = fs.realpathSync(rig.testDir!);
for (const [filePath, content] of Object.entries(evalCase.files)) {
const fullPath = path.join(rig.testDir!, filePath);
fs.mkdirSync(path.dirname(fullPath), { recursive: true });
fs.writeFileSync(fullPath, content);
// If it's an agent file, calculate hash for acknowledgement
if (
filePath.startsWith('.gemini/agents/') &&
filePath.endsWith('.md')
) {
const hash = crypto
.createHash('sha256')
.update(content)
.digest('hex');
try {
const agentDefs = await parseAgentMarkdown(fullPath, content);
if (agentDefs.length > 0) {
const agentName = agentDefs[0].name;
if (!acknowledgedAgents[projectRoot]) {
acknowledgedAgents[projectRoot] = {};
}
acknowledgedAgents[projectRoot][agentName] = hash;
}
} catch (error) {
console.warn(
`Failed to parse agent for test acknowledgement: ${filePath}`,
error,
);
}
}
}
// Write acknowledged_agents.json to the home directory
if (Object.keys(acknowledgedAgents).length > 0) {
const ackPath = path.join(
rig.homeDir!,
'.gemini',
'acknowledgments',
'agents.json',
);
fs.mkdirSync(path.dirname(ackPath), { recursive: true });
fs.writeFileSync(
ackPath,
JSON.stringify(acknowledgedAgents, null, 2),
);
}
const execOptions = { cwd: rig.testDir!, stdio: 'inherit' as const };
@@ -66,6 +115,7 @@ export function evalTest(policy: EvalPolicy, evalCase: EvalCase) {
const result = await rig.run({
args: evalCase.prompt,
approvalMode: evalCase.approvalMode ?? 'yolo',
timeout: evalCase.timeout,
env: {
GEMINI_CLI_ACTIVITY_LOG_FILE: activityLogFile,
},
@@ -88,6 +138,11 @@ export function evalTest(policy: EvalPolicy, evalCase: EvalCase) {
});
}
if (rig._lastRunStderr) {
const stderrFile = path.join(logDir, `${sanitizedName}.stderr.log`);
await fs.promises.writeFile(stderrFile, rig._lastRunStderr);
}
await fs.promises.writeFile(
logFile,
JSON.stringify(rig.readToolLogs(), null, 2),
@@ -114,6 +169,7 @@ export interface EvalCase {
name: string;
params?: Record<string, any>;
prompt: string;
timeout?: number;
files?: Record<string, string>;
approvalMode?: 'default' | 'auto_edit' | 'yolo' | 'plan';
assert: (rig: TestRig, result: string) => Promise<void>;

View File

@@ -277,6 +277,7 @@ export class TestRig {
homeDir: string | null = null;
testName?: string;
_lastRunStdout?: string;
_lastRunStderr?: string;
// Path to the copied fake responses file for this test.
fakeResponsesPath?: string;
// Original fake responses file path for rewriting goldens in record mode.
@@ -396,6 +397,34 @@ export class TestRig {
return { command, initialArgs };
}
private _getCleanEnv(
extraEnv?: Record<string, string | undefined>,
): Record<string, string | undefined> {
const cleanEnv: Record<string, string | undefined> = { ...process.env };
// Clear all GEMINI_ environment variables that might interfere with tests
// except for those we explicitly want to keep or set.
for (const key of Object.keys(cleanEnv)) {
if (
(key.startsWith('GEMINI_') || key.startsWith('GOOGLE_GEMINI_')) &&
key !== 'GEMINI_API_KEY' &&
key !== 'GOOGLE_API_KEY' &&
key !== 'GEMINI_MODEL' &&
key !== 'GEMINI_DEBUG' &&
key !== 'GEMINI_CLI_TEST_VAR' &&
!key.startsWith('GEMINI_CLI_ACTIVITY_LOG')
) {
delete cleanEnv[key];
}
}
return {
...cleanEnv,
GEMINI_CLI_HOME: this.homeDir!,
...extraEnv,
};
}
run(options: {
args?: string | string[];
stdin?: string;
@@ -433,11 +462,7 @@ export class TestRig {
const child = spawn(command, commandArgs, {
cwd: this.testDir!,
stdio: 'pipe',
env: {
...process.env,
GEMINI_CLI_HOME: this.homeDir!,
...options.env,
},
env: this._getCleanEnv(options.env),
});
this._spawnedProcesses.push(child);
@@ -487,6 +512,7 @@ export class TestRig {
child.on('close', (code: number) => {
clearTimeout(timer);
this._lastRunStderr = stderr;
if (code === 0) {
// Store the raw stdout for Podman telemetry parsing
this._lastRunStdout = stdout;
@@ -573,7 +599,7 @@ export class TestRig {
const child = spawn(command, allArgs, {
cwd: this.testDir!,
stdio: 'pipe',
env: { ...process.env, GEMINI_CLI_HOME: this.homeDir! },
env: this._getCleanEnv(),
signal: options?.signal,
});
this._spawnedProcesses.push(child);
@@ -611,11 +637,7 @@ export class TestRig {
const child = spawn(command, commandArgs, {
cwd: this.testDir!,
stdio: 'pipe',
env: {
...process.env,
GEMINI_CLI_HOME: this.homeDir!,
...options.env,
},
env: this._getCleanEnv(options.env),
});
this._spawnedProcesses.push(child);
@@ -661,6 +683,7 @@ export class TestRig {
child.on('close', (code: number) => {
clearTimeout(timer);
this._lastRunStderr = stderr;
if (code === 0) {
this._lastRunStdout = stdout;
const result = this._filterPodmanTelemetry(stdout);
@@ -1179,11 +1202,7 @@ export class TestRig {
]);
const commandArgs = [...initialArgs];
const envVars = {
...process.env,
GEMINI_CLI_HOME: this.homeDir!,
...options?.env,
};
const envVars = this._getCleanEnv(options?.env);
const ptyOptions: pty.IPtyForkOptions = {
name: 'xterm-color',