fix(test): improve test isolation and enable subagent evaluations (#18138)

This commit is contained in:
Coco Sheng
2026-02-03 14:05:26 -05:00
committed by GitHub
parent 4aa295994d
commit 3183e4137a
2 changed files with 92 additions and 17 deletions

View File

@@ -7,9 +7,13 @@
import { it } from 'vitest';
import fs from 'node:fs';
import path from 'node:path';
import crypto from 'node:crypto';
import { execSync } from 'node:child_process';
import { TestRig } from '@google/gemini-cli-test-utils';
import { createUnauthorizedToolError } from '@google/gemini-cli-core';
import {
createUnauthorizedToolError,
parseAgentMarkdown,
} from '@google/gemini-cli-core';
export * from '@google/gemini-cli-test-utils';
@@ -42,10 +46,55 @@ export function evalTest(policy: EvalPolicy, evalCase: EvalCase) {
rig.setup(evalCase.name, evalCase.params);
if (evalCase.files) {
const acknowledgedAgents: Record<string, Record<string, string>> = {};
const projectRoot = fs.realpathSync(rig.testDir!);
for (const [filePath, content] of Object.entries(evalCase.files)) {
const fullPath = path.join(rig.testDir!, filePath);
fs.mkdirSync(path.dirname(fullPath), { recursive: true });
fs.writeFileSync(fullPath, content);
// If it's an agent file, calculate hash for acknowledgement
if (
filePath.startsWith('.gemini/agents/') &&
filePath.endsWith('.md')
) {
const hash = crypto
.createHash('sha256')
.update(content)
.digest('hex');
try {
const agentDefs = await parseAgentMarkdown(fullPath, content);
if (agentDefs.length > 0) {
const agentName = agentDefs[0].name;
if (!acknowledgedAgents[projectRoot]) {
acknowledgedAgents[projectRoot] = {};
}
acknowledgedAgents[projectRoot][agentName] = hash;
}
} catch (error) {
console.warn(
`Failed to parse agent for test acknowledgement: ${filePath}`,
error,
);
}
}
}
// Write acknowledged_agents.json to the home directory
if (Object.keys(acknowledgedAgents).length > 0) {
const ackPath = path.join(
rig.homeDir!,
'.gemini',
'acknowledgments',
'agents.json',
);
fs.mkdirSync(path.dirname(ackPath), { recursive: true });
fs.writeFileSync(
ackPath,
JSON.stringify(acknowledgedAgents, null, 2),
);
}
const execOptions = { cwd: rig.testDir!, stdio: 'inherit' as const };
@@ -66,6 +115,7 @@ export function evalTest(policy: EvalPolicy, evalCase: EvalCase) {
const result = await rig.run({
args: evalCase.prompt,
approvalMode: evalCase.approvalMode ?? 'yolo',
timeout: evalCase.timeout,
env: {
GEMINI_CLI_ACTIVITY_LOG_FILE: activityLogFile,
},
@@ -88,6 +138,11 @@ export function evalTest(policy: EvalPolicy, evalCase: EvalCase) {
});
}
if (rig._lastRunStderr) {
const stderrFile = path.join(logDir, `${sanitizedName}.stderr.log`);
await fs.promises.writeFile(stderrFile, rig._lastRunStderr);
}
await fs.promises.writeFile(
logFile,
JSON.stringify(rig.readToolLogs(), null, 2),
@@ -114,6 +169,7 @@ export interface EvalCase {
name: string;
params?: Record<string, any>;
prompt: string;
timeout?: number;
files?: Record<string, string>;
approvalMode?: 'default' | 'auto_edit' | 'yolo' | 'plan';
assert: (rig: TestRig, result: string) => Promise<void>;