fix(test): improve test isolation and enable subagent evaluations (#18138)

2026-06-18 07:17:16 -07:00 · 2026-02-03 14:05:26 -05:00
parent 4aa295994d
commit 3183e4137a
2 changed files with 92 additions and 17 deletions
@@ -7,9 +7,13 @@
 import { it } from 'vitest';
 import fs from 'node:fs';
 import path from 'node:path';
+import crypto from 'node:crypto';
 import { execSync } from 'node:child_process';
 import { TestRig } from '@google/gemini-cli-test-utils';
-import { createUnauthorizedToolError } from '@google/gemini-cli-core';
+import {
+  createUnauthorizedToolError,
+  parseAgentMarkdown,
+} from '@google/gemini-cli-core';

 export * from '@google/gemini-cli-test-utils';

@@ -42,10 +46,55 @@ export function evalTest(policy: EvalPolicy, evalCase: EvalCase) {
      rig.setup(evalCase.name, evalCase.params);

      if (evalCase.files) {
+        const acknowledgedAgents: Record<string, Record<string, string>> = {};
+        const projectRoot = fs.realpathSync(rig.testDir!);
+
        for (const [filePath, content] of Object.entries(evalCase.files)) {
          const fullPath = path.join(rig.testDir!, filePath);
          fs.mkdirSync(path.dirname(fullPath), { recursive: true });
          fs.writeFileSync(fullPath, content);
+
+          // If it's an agent file, calculate hash for acknowledgement
+          if (
+            filePath.startsWith('.gemini/agents/') &&
+            filePath.endsWith('.md')
+          ) {
+            const hash = crypto
+              .createHash('sha256')
+              .update(content)
+              .digest('hex');
+
+            try {
+              const agentDefs = await parseAgentMarkdown(fullPath, content);
+              if (agentDefs.length > 0) {
+                const agentName = agentDefs[0].name;
+                if (!acknowledgedAgents[projectRoot]) {
+                  acknowledgedAgents[projectRoot] = {};
+                }
+                acknowledgedAgents[projectRoot][agentName] = hash;
+              }
+            } catch (error) {
+              console.warn(
+                `Failed to parse agent for test acknowledgement: ${filePath}`,
+                error,
+              );
+            }
+          }
+        }
+
+        // Write acknowledged_agents.json to the home directory
+        if (Object.keys(acknowledgedAgents).length > 0) {
+          const ackPath = path.join(
+            rig.homeDir!,
+            '.gemini',
+            'acknowledgments',
+            'agents.json',
+          );
+          fs.mkdirSync(path.dirname(ackPath), { recursive: true });
+          fs.writeFileSync(
+            ackPath,
+            JSON.stringify(acknowledgedAgents, null, 2),
+          );
        }

        const execOptions = { cwd: rig.testDir!, stdio: 'inherit' as const };
@@ -66,6 +115,7 @@ export function evalTest(policy: EvalPolicy, evalCase: EvalCase) {
      const result = await rig.run({
        args: evalCase.prompt,
        approvalMode: evalCase.approvalMode ?? 'yolo',
+        timeout: evalCase.timeout,
        env: {
          GEMINI_CLI_ACTIVITY_LOG_FILE: activityLogFile,
        },
@@ -88,6 +138,11 @@ export function evalTest(policy: EvalPolicy, evalCase: EvalCase) {
        });
      }

+      if (rig._lastRunStderr) {
+        const stderrFile = path.join(logDir, `${sanitizedName}.stderr.log`);
+        await fs.promises.writeFile(stderrFile, rig._lastRunStderr);
+      }
+
      await fs.promises.writeFile(
        logFile,
        JSON.stringify(rig.readToolLogs(), null, 2),
@@ -114,6 +169,7 @@ export interface EvalCase {
  name: string;
  params?: Record<string, any>;
  prompt: string;
+  timeout?: number;
  files?: Record<string, string>;
  approvalMode?: 'default' | 'auto_edit' | 'yolo' | 'plan';
  assert: (rig: TestRig, result: string) => Promise<void>;
@@ -277,6 +277,7 @@ export class TestRig {
  homeDir: string | null = null;
  testName?: string;
  _lastRunStdout?: string;
+  _lastRunStderr?: string;
  // Path to the copied fake responses file for this test.
  fakeResponsesPath?: string;
  // Original fake responses file path for rewriting goldens in record mode.
@@ -396,6 +397,34 @@ export class TestRig {
    return { command, initialArgs };
  }

+  private _getCleanEnv(
+    extraEnv?: Record<string, string | undefined>,
+  ): Record<string, string | undefined> {
+    const cleanEnv: Record<string, string | undefined> = { ...process.env };
+
+    // Clear all GEMINI_ environment variables that might interfere with tests
+    // except for those we explicitly want to keep or set.
+    for (const key of Object.keys(cleanEnv)) {
+      if (
+        (key.startsWith('GEMINI_') || key.startsWith('GOOGLE_GEMINI_')) &&
+        key !== 'GEMINI_API_KEY' &&
+        key !== 'GOOGLE_API_KEY' &&
+        key !== 'GEMINI_MODEL' &&
+        key !== 'GEMINI_DEBUG' &&
+        key !== 'GEMINI_CLI_TEST_VAR' &&
+        !key.startsWith('GEMINI_CLI_ACTIVITY_LOG')
+      ) {
+        delete cleanEnv[key];
+      }
+    }
+
+    return {
+      ...cleanEnv,
+      GEMINI_CLI_HOME: this.homeDir!,
+      ...extraEnv,
+    };
+  }
+
  run(options: {
    args?: string | string[];
    stdin?: string;
@@ -433,11 +462,7 @@ export class TestRig {
    const child = spawn(command, commandArgs, {
      cwd: this.testDir!,
      stdio: 'pipe',
-      env: {
-        ...process.env,
-        GEMINI_CLI_HOME: this.homeDir!,
-        ...options.env,
-      },
+      env: this._getCleanEnv(options.env),
    });
    this._spawnedProcesses.push(child);

@@ -487,6 +512,7 @@ export class TestRig {

      child.on('close', (code: number) => {
        clearTimeout(timer);
+        this._lastRunStderr = stderr;
        if (code === 0) {
          // Store the raw stdout for Podman telemetry parsing
          this._lastRunStdout = stdout;
@@ -573,7 +599,7 @@ export class TestRig {
      const child = spawn(command, allArgs, {
        cwd: this.testDir!,
        stdio: 'pipe',
-        env: { ...process.env, GEMINI_CLI_HOME: this.homeDir! },
+        env: this._getCleanEnv(),
        signal: options?.signal,
      });
      this._spawnedProcesses.push(child);
@@ -611,11 +637,7 @@ export class TestRig {
    const child = spawn(command, commandArgs, {
      cwd: this.testDir!,
      stdio: 'pipe',
-      env: {
-        ...process.env,
-        GEMINI_CLI_HOME: this.homeDir!,
-        ...options.env,
-      },
+      env: this._getCleanEnv(options.env),
    });
    this._spawnedProcesses.push(child);

@@ -661,6 +683,7 @@ export class TestRig {

      child.on('close', (code: number) => {
        clearTimeout(timer);
+        this._lastRunStderr = stderr;
        if (code === 0) {
          this._lastRunStdout = stdout;
          const result = this._filterPodmanTelemetry(stdout);
@@ -1179,11 +1202,7 @@ export class TestRig {
    ]);
    const commandArgs = [...initialArgs];

-    const envVars = {
-      ...process.env,
-      GEMINI_CLI_HOME: this.homeDir!,
-      ...options?.env,
-    };
+    const envVars = this._getCleanEnv(options?.env);

    const ptyOptions: pty.IPtyForkOptions = {
      name: 'xterm-color',