mirror of
https://github.com/google-gemini/gemini-cli.git
synced 2026-03-10 14:10:37 -07:00
fix(test): improve test isolation and enable subagent evaluations (#18138)
This commit is contained in:
@@ -7,9 +7,13 @@
|
||||
import { it } from 'vitest';
|
||||
import fs from 'node:fs';
|
||||
import path from 'node:path';
|
||||
import crypto from 'node:crypto';
|
||||
import { execSync } from 'node:child_process';
|
||||
import { TestRig } from '@google/gemini-cli-test-utils';
|
||||
import { createUnauthorizedToolError } from '@google/gemini-cli-core';
|
||||
import {
|
||||
createUnauthorizedToolError,
|
||||
parseAgentMarkdown,
|
||||
} from '@google/gemini-cli-core';
|
||||
|
||||
export * from '@google/gemini-cli-test-utils';
|
||||
|
||||
@@ -42,10 +46,55 @@ export function evalTest(policy: EvalPolicy, evalCase: EvalCase) {
|
||||
rig.setup(evalCase.name, evalCase.params);
|
||||
|
||||
if (evalCase.files) {
|
||||
const acknowledgedAgents: Record<string, Record<string, string>> = {};
|
||||
const projectRoot = fs.realpathSync(rig.testDir!);
|
||||
|
||||
for (const [filePath, content] of Object.entries(evalCase.files)) {
|
||||
const fullPath = path.join(rig.testDir!, filePath);
|
||||
fs.mkdirSync(path.dirname(fullPath), { recursive: true });
|
||||
fs.writeFileSync(fullPath, content);
|
||||
|
||||
// If it's an agent file, calculate hash for acknowledgement
|
||||
if (
|
||||
filePath.startsWith('.gemini/agents/') &&
|
||||
filePath.endsWith('.md')
|
||||
) {
|
||||
const hash = crypto
|
||||
.createHash('sha256')
|
||||
.update(content)
|
||||
.digest('hex');
|
||||
|
||||
try {
|
||||
const agentDefs = await parseAgentMarkdown(fullPath, content);
|
||||
if (agentDefs.length > 0) {
|
||||
const agentName = agentDefs[0].name;
|
||||
if (!acknowledgedAgents[projectRoot]) {
|
||||
acknowledgedAgents[projectRoot] = {};
|
||||
}
|
||||
acknowledgedAgents[projectRoot][agentName] = hash;
|
||||
}
|
||||
} catch (error) {
|
||||
console.warn(
|
||||
`Failed to parse agent for test acknowledgement: ${filePath}`,
|
||||
error,
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Write acknowledged_agents.json to the home directory
|
||||
if (Object.keys(acknowledgedAgents).length > 0) {
|
||||
const ackPath = path.join(
|
||||
rig.homeDir!,
|
||||
'.gemini',
|
||||
'acknowledgments',
|
||||
'agents.json',
|
||||
);
|
||||
fs.mkdirSync(path.dirname(ackPath), { recursive: true });
|
||||
fs.writeFileSync(
|
||||
ackPath,
|
||||
JSON.stringify(acknowledgedAgents, null, 2),
|
||||
);
|
||||
}
|
||||
|
||||
const execOptions = { cwd: rig.testDir!, stdio: 'inherit' as const };
|
||||
@@ -66,6 +115,7 @@ export function evalTest(policy: EvalPolicy, evalCase: EvalCase) {
|
||||
const result = await rig.run({
|
||||
args: evalCase.prompt,
|
||||
approvalMode: evalCase.approvalMode ?? 'yolo',
|
||||
timeout: evalCase.timeout,
|
||||
env: {
|
||||
GEMINI_CLI_ACTIVITY_LOG_FILE: activityLogFile,
|
||||
},
|
||||
@@ -88,6 +138,11 @@ export function evalTest(policy: EvalPolicy, evalCase: EvalCase) {
|
||||
});
|
||||
}
|
||||
|
||||
if (rig._lastRunStderr) {
|
||||
const stderrFile = path.join(logDir, `${sanitizedName}.stderr.log`);
|
||||
await fs.promises.writeFile(stderrFile, rig._lastRunStderr);
|
||||
}
|
||||
|
||||
await fs.promises.writeFile(
|
||||
logFile,
|
||||
JSON.stringify(rig.readToolLogs(), null, 2),
|
||||
@@ -114,6 +169,7 @@ export interface EvalCase {
|
||||
name: string;
|
||||
params?: Record<string, any>;
|
||||
prompt: string;
|
||||
timeout?: number;
|
||||
files?: Record<string, string>;
|
||||
approvalMode?: 'default' | 'auto_edit' | 'yolo' | 'plan';
|
||||
assert: (rig: TestRig, result: string) => Promise<void>;
|
||||
|
||||
@@ -277,6 +277,7 @@ export class TestRig {
|
||||
homeDir: string | null = null;
|
||||
testName?: string;
|
||||
_lastRunStdout?: string;
|
||||
_lastRunStderr?: string;
|
||||
// Path to the copied fake responses file for this test.
|
||||
fakeResponsesPath?: string;
|
||||
// Original fake responses file path for rewriting goldens in record mode.
|
||||
@@ -396,6 +397,34 @@ export class TestRig {
|
||||
return { command, initialArgs };
|
||||
}
|
||||
|
||||
private _getCleanEnv(
|
||||
extraEnv?: Record<string, string | undefined>,
|
||||
): Record<string, string | undefined> {
|
||||
const cleanEnv: Record<string, string | undefined> = { ...process.env };
|
||||
|
||||
// Clear all GEMINI_ environment variables that might interfere with tests
|
||||
// except for those we explicitly want to keep or set.
|
||||
for (const key of Object.keys(cleanEnv)) {
|
||||
if (
|
||||
(key.startsWith('GEMINI_') || key.startsWith('GOOGLE_GEMINI_')) &&
|
||||
key !== 'GEMINI_API_KEY' &&
|
||||
key !== 'GOOGLE_API_KEY' &&
|
||||
key !== 'GEMINI_MODEL' &&
|
||||
key !== 'GEMINI_DEBUG' &&
|
||||
key !== 'GEMINI_CLI_TEST_VAR' &&
|
||||
!key.startsWith('GEMINI_CLI_ACTIVITY_LOG')
|
||||
) {
|
||||
delete cleanEnv[key];
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
...cleanEnv,
|
||||
GEMINI_CLI_HOME: this.homeDir!,
|
||||
...extraEnv,
|
||||
};
|
||||
}
|
||||
|
||||
run(options: {
|
||||
args?: string | string[];
|
||||
stdin?: string;
|
||||
@@ -433,11 +462,7 @@ export class TestRig {
|
||||
const child = spawn(command, commandArgs, {
|
||||
cwd: this.testDir!,
|
||||
stdio: 'pipe',
|
||||
env: {
|
||||
...process.env,
|
||||
GEMINI_CLI_HOME: this.homeDir!,
|
||||
...options.env,
|
||||
},
|
||||
env: this._getCleanEnv(options.env),
|
||||
});
|
||||
this._spawnedProcesses.push(child);
|
||||
|
||||
@@ -487,6 +512,7 @@ export class TestRig {
|
||||
|
||||
child.on('close', (code: number) => {
|
||||
clearTimeout(timer);
|
||||
this._lastRunStderr = stderr;
|
||||
if (code === 0) {
|
||||
// Store the raw stdout for Podman telemetry parsing
|
||||
this._lastRunStdout = stdout;
|
||||
@@ -573,7 +599,7 @@ export class TestRig {
|
||||
const child = spawn(command, allArgs, {
|
||||
cwd: this.testDir!,
|
||||
stdio: 'pipe',
|
||||
env: { ...process.env, GEMINI_CLI_HOME: this.homeDir! },
|
||||
env: this._getCleanEnv(),
|
||||
signal: options?.signal,
|
||||
});
|
||||
this._spawnedProcesses.push(child);
|
||||
@@ -611,11 +637,7 @@ export class TestRig {
|
||||
const child = spawn(command, commandArgs, {
|
||||
cwd: this.testDir!,
|
||||
stdio: 'pipe',
|
||||
env: {
|
||||
...process.env,
|
||||
GEMINI_CLI_HOME: this.homeDir!,
|
||||
...options.env,
|
||||
},
|
||||
env: this._getCleanEnv(options.env),
|
||||
});
|
||||
this._spawnedProcesses.push(child);
|
||||
|
||||
@@ -661,6 +683,7 @@ export class TestRig {
|
||||
|
||||
child.on('close', (code: number) => {
|
||||
clearTimeout(timer);
|
||||
this._lastRunStderr = stderr;
|
||||
if (code === 0) {
|
||||
this._lastRunStdout = stdout;
|
||||
const result = this._filterPodmanTelemetry(stdout);
|
||||
@@ -1179,11 +1202,7 @@ export class TestRig {
|
||||
]);
|
||||
const commandArgs = [...initialArgs];
|
||||
|
||||
const envVars = {
|
||||
...process.env,
|
||||
GEMINI_CLI_HOME: this.homeDir!,
|
||||
...options?.env,
|
||||
};
|
||||
const envVars = this._getCleanEnv(options?.env);
|
||||
|
||||
const ptyOptions: pty.IPtyForkOptions = {
|
||||
name: 'xterm-color',
|
||||
|
||||
Reference in New Issue
Block a user