test: fix hook integration test flakiness on Windows CI (#18665)

2026-05-12 21:03:05 -07:00 · 2026-02-15 11:42:13 -08:00
parent 7f7424dd1e
commit 884acda2dc
7 changed files with 555 additions and 317 deletions
@@ -5,7 +5,7 @@
 */

 import { describe, it, expect, beforeEach, afterEach } from 'vitest';
-import { TestRig } from './test-helper.js';
+import { TestRig, normalizePath } from './test-helper.js';
 import { join } from 'node:path';
 import { writeFileSync } from 'node:fs';

@@ -113,10 +113,9 @@ describe('Hooks Agent Flow', () => {
      }
      `;

-      const scriptPath = join(rig.testDir!, 'after_agent_verify.cjs');
-      writeFileSync(scriptPath, hookScript);
+      const scriptPath = rig.createScript('after_agent_verify.cjs', hookScript);

-      await rig.setup('should receive prompt and response in AfterAgent hook', {
+      rig.setup('should receive prompt and response in AfterAgent hook', {
        settings: {
          hooksConfig: {
            enabled: true,
@@ -127,7 +126,7 @@ describe('Hooks Agent Flow', () => {
                hooks: [
                  {
                    type: 'command',
-                    command: `node "${scriptPath}"`,
+                    command: normalizePath(`node "${scriptPath}"`)!,
                    timeout: 5000,
                  },
                ],
@@ -157,7 +156,7 @@ describe('Hooks Agent Flow', () => {
    });

    it('should process clearContext in AfterAgent hook output', async () => {
-      await rig.setup('should process clearContext in AfterAgent hook output', {
+      rig.setup('should process clearContext in AfterAgent hook output', {
        fakeResponsesPath: join(
          import.meta.dirname,
          'hooks-system.after-agent.responses',
@@ -171,18 +170,32 @@ describe('Hooks Agent Flow', () => {
        const input = JSON.parse(fs.readFileSync(0, 'utf-8'));
        const messageCount = input.llm_request?.contents?.length || 0;
        let counts = [];
-        try { counts = JSON.parse(fs.readFileSync('${messageCountFile}', 'utf-8')); } catch (e) {}
+        try { counts = JSON.parse(fs.readFileSync(${JSON.stringify(messageCountFile)}, 'utf-8')); } catch (e) {}
        counts.push(messageCount);
-        fs.writeFileSync('${messageCountFile}', JSON.stringify(counts));
+        fs.writeFileSync(${JSON.stringify(messageCountFile)}, JSON.stringify(counts));
        console.log(JSON.stringify({ decision: 'allow' }));
      `;
-      const beforeModelScriptPath = join(
-        rig.testDir!,
+      const beforeModelScriptPath = rig.createScript(
        'before_model_counter.cjs',
+        beforeModelScript,
      );
-      writeFileSync(beforeModelScriptPath, beforeModelScript);

-      await rig.setup('should process clearContext in AfterAgent hook output', {
+      const afterAgentScript = `
+        console.log(JSON.stringify({
+          decision: 'block',
+          reason: 'Security policy triggered',
+          hookSpecificOutput: {
+            hookEventName: 'AfterAgent',
+            clearContext: true
+          }
+        }));
+      `;
+      const afterAgentScriptPath = rig.createScript(
+        'after_agent_clear.cjs',
+        afterAgentScript,
+      );
+
+      rig.setup('should process clearContext in AfterAgent hook output', {
        settings: {
          hooks: {
            enabled: true,
@@ -191,7 +204,7 @@ describe('Hooks Agent Flow', () => {
                hooks: [
                  {
                    type: 'command',
-                    command: `node "${beforeModelScriptPath}"`,
+                    command: normalizePath(`node "${beforeModelScriptPath}"`)!,
                    timeout: 5000,
                  },
                ],
@@ -202,7 +215,7 @@ describe('Hooks Agent Flow', () => {
                hooks: [
                  {
                    type: 'command',
-                    command: `node -e "console.log(JSON.stringify({decision: 'block', reason: 'Security policy triggered', hookSpecificOutput: {hookEventName: 'AfterAgent', clearContext: true}}))"`,
+                    command: normalizePath(`node "${afterAgentScriptPath}"`)!,
                    timeout: 5000,
                  },
                ],
@@ -244,6 +257,22 @@ describe('Hooks Agent Flow', () => {
            import.meta.dirname,
            'hooks-agent-flow-multistep.responses',
          ),
+        },
+      );
+
+      // Create script files for hooks
+      const baPath = rig.createScript(
+        'ba_fired.cjs',
+        "console.log('BeforeAgent Fired');",
+      );
+      const aaPath = rig.createScript(
+        'aa_fired.cjs',
+        "console.log('AfterAgent Fired');",
+      );
+
+      await rig.setup(
+        'should fire BeforeAgent and AfterAgent exactly once per turn despite tool calls',
+        {
          settings: {
            hooksConfig: {
              enabled: true,
@@ -254,7 +283,7 @@ describe('Hooks Agent Flow', () => {
                  hooks: [
                    {
                      type: 'command',
-                      command: `node -e "console.log('BeforeAgent Fired')"`,
+                      command: normalizePath(`node "${baPath}"`)!,
                      timeout: 5000,
                    },
                  ],
@@ -265,7 +294,7 @@ describe('Hooks Agent Flow', () => {
                  hooks: [
                    {
                      type: 'command',
-                      command: `node -e "console.log('AfterAgent Fired')"`,
+                      command: normalizePath(`node "${aaPath}"`)!,
                      timeout: 5000,
                    },
                  ],
@@ -5,3 +5,4 @@
 */

 export * from '@google/gemini-cli-test-utils';
+export { normalizePath } from '@google/gemini-cli-test-utils';
@@ -35,7 +35,6 @@ const DEFAULT_HOOK_TIMEOUT = 60000;
 * Exit code constants for hook execution
 */
 const EXIT_CODE_SUCCESS = 0;
-const EXIT_CODE_BLOCKING_ERROR = 2;
 const EXIT_CODE_NON_BLOCKING_ERROR = 1;

 /**
@@ -353,28 +352,25 @@ export class HookRunner {

        // Parse output
        let output: HookOutput | undefined;
-        if (exitCode === EXIT_CODE_SUCCESS && stdout.trim()) {
+
+        const textToParse = stdout.trim() || stderr.trim();
+        if (textToParse) {
          try {
-            let parsed = JSON.parse(stdout.trim());
+            let parsed = JSON.parse(textToParse);
            if (typeof parsed === 'string') {
-              // If the output is a string, parse it in case
-              // it's double-encoded JSON string.
              parsed = JSON.parse(parsed);
            }
-            if (parsed) {
+            if (parsed && typeof parsed === 'object') {
              // eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion
              output = parsed as HookOutput;
            }
          } catch {
            // Not JSON, convert plain text to structured output
-            output = this.convertPlainTextToHookOutput(stdout.trim(), exitCode);
+            output = this.convertPlainTextToHookOutput(
+              textToParse,
+              exitCode || EXIT_CODE_SUCCESS,
+            );
          }
-        } else if (exitCode !== EXIT_CODE_SUCCESS && stderr.trim()) {
-          // Convert error output to structured format
-          output = this.convertPlainTextToHookOutput(
-            stderr.trim(),
-            exitCode || EXIT_CODE_NON_BLOCKING_ERROR,
-          );
        }

        resolve({
@@ -435,18 +431,18 @@ export class HookRunner {
        decision: 'allow',
        systemMessage: text,
      };
-    } else if (exitCode === EXIT_CODE_BLOCKING_ERROR) {
-      // Blocking error
-      return {
-        decision: 'deny',
-        reason: text,
-      };
-    } else {
-      // Non-blocking error (EXIT_CODE_NON_BLOCKING_ERROR or any other code)
+    } else if (exitCode === EXIT_CODE_NON_BLOCKING_ERROR) {
+      // Non-blocking error (EXIT_CODE_NON_BLOCKING_ERROR = 1)
      return {
        decision: 'allow',
        systemMessage: `Warning: ${text}`,
      };
+    } else {
+      // All other non-zero exit codes (including 2) are blocking
+      return {
+        decision: 'deny',
+        reason: text,
+      };
    }
  }
 }
@@ -18,6 +18,9 @@ export interface PtyProcess {
 }

 export const getPty = async (): Promise<PtyImplementation> => {
+  if (process.env['GEMINI_PTY_INFO'] === 'child_process') {
+    return null;
+  }
  try {
    const lydell = '@lydell/node-pty';
    const module = await import(lydell);
@@ -345,6 +345,7 @@ export class TestRig {
  originalFakeResponsesPath?: string;
  private _interactiveRuns: InteractiveRun[] = [];
  private _spawnedProcesses: ChildProcess[] = [];
+  private _initialized = false;

  setup(
    testName: string,
@@ -359,6 +360,14 @@ export class TestRig {
      env['INTEGRATION_TEST_FILE_DIR'] || join(os.tmpdir(), 'gemini-cli-tests');
    this.testDir = join(testFileDir, sanitizedName);
    this.homeDir = join(testFileDir, sanitizedName + '-home');
+
+    if (!this._initialized) {
+      // Clean up existing directories from previous runs (e.g. retries)
+      this._cleanDir(this.testDir);
+      this._cleanDir(this.homeDir);
+      this._initialized = true;
+    }
+
    mkdirSync(this.testDir, { recursive: true });
    mkdirSync(this.homeDir, { recursive: true });
    if (options.fakeResponsesPath) {
@@ -373,6 +382,36 @@ export class TestRig {
    this._createSettingsFile(options.settings);
  }

+  private _cleanDir(dir: string) {
+    if (fs.existsSync(dir)) {
+      for (let i = 0; i < 10; i++) {
+        try {
+          fs.rmSync(dir, { recursive: true, force: true });
+          return;
+        } catch (err) {
+          if (i === 9) {
+            console.error(
+              `Failed to clean directory ${dir} after 10 attempts:`,
+              err,
+            );
+            throw err;
+          }
+          const delay = Math.min(Math.pow(2, i) * 1000, 10000); // Max 10s delay
+          try {
+            const sharedBuffer = new Int32Array(new SharedArrayBuffer(4));
+            Atomics.wait(sharedBuffer, 0, 0, delay);
+          } catch {
+            // Fallback for environments where SharedArrayBuffer might be restricted
+            const start = Date.now();
+            while (Date.now() - start < delay) {
+              /* busy wait */
+            }
+          }
+        }
+      }
+    }
+  }
+
  private _createSettingsFile(overrideSettings?: Record<string, unknown>) {
    const projectGeminiDir = join(this.testDir!, GEMINI_DIR);
    mkdirSync(projectGeminiDir, { recursive: true });
@@ -474,6 +513,17 @@ export class TestRig {
    return { command, initialArgs };
  }

+  createScript(fileName: string, content: string) {
+    if (!this.testDir) {
+      throw new Error(
+        'TestRig.setup must be called before creating files or scripts',
+      );
+    }
+    const scriptPath = join(this.testDir, fileName);
+    writeFileSync(scriptPath, content);
+    return normalizePath(scriptPath);
+  }
+
  private _getCleanEnv(
    extraEnv?: Record<string, string | undefined>,
  ): Record<string, string | undefined> {
@@ -499,6 +549,7 @@ export class TestRig {
    return {
      ...cleanEnv,
      GEMINI_CLI_HOME: this.homeDir!,
+      GEMINI_PTY_INFO: 'child_process',
      ...extraEnv,
    };
  }
@@ -801,6 +852,13 @@ export class TestRig {
    // Kill any interactive runs that are still active
    for (const run of this._interactiveRuns) {
      try {
+        if (process.platform === 'win32') {
+          // @ts-ignore - access private ptyProcess
+          const pid = run.ptyProcess?.pid;
+          if (pid) {
+            execSync(`taskkill /F /T /PID ${pid}`, { stdio: 'ignore' });
+          }
+        }
        await run.kill();
      } catch (error) {
        if (env['VERBOSE'] === 'true') {
@@ -814,6 +872,9 @@ export class TestRig {
    for (const child of this._spawnedProcesses) {
      if (child.exitCode === null && child.signalCode === null) {
        try {
+          if (process.platform === 'win32' && child.pid) {
+            execSync(`taskkill /F /T /PID ${child.pid}`, { stdio: 'ignore' });
+          }
          child.kill('SIGKILL');
        } catch (error) {
          if (env['VERBOSE'] === 'true') {
@@ -836,21 +897,21 @@ export class TestRig {
    // Clean up test directory and home directory
    if (this.testDir && !env['KEEP_OUTPUT']) {
      try {
-        fs.rmSync(this.testDir, { recursive: true, force: true });
+        this._cleanDir(this.testDir);
      } catch (error) {
        // Ignore cleanup errors
-        if (env['VERBOSE'] === 'true') {
-          console.warn('Cleanup warning:', (error as Error).message);
+        if (env['VERBOSE'] === 'true' || env['CI'] === 'true') {
+          console.warn('Cleanup warning (testDir):', (error as Error).message);
        }
      }
    }
    if (this.homeDir && !env['KEEP_OUTPUT']) {
      try {
-        fs.rmSync(this.homeDir, { recursive: true, force: true });
+        this._cleanDir(this.homeDir);
      } catch (error) {
        // Ignore cleanup errors
-        if (env['VERBOSE'] === 'true') {
-          console.warn('Cleanup warning:', (error as Error).message);
+        if (env['VERBOSE'] === 'true' || env['CI'] === 'true') {
+          console.warn('Cleanup warning (homeDir):', (error as Error).message);
        }
      }
    }
@@ -1282,6 +1343,23 @@ export class TestRig {

    const envVars = this._getCleanEnv(options?.env);

+    // node-pty on windows often needs these to spawn correctly
+    if (process.platform === 'win32') {
+      const windowsCriticalVars = [
+        'SystemRoot',
+        'COMSPEC',
+        'windir',
+        'PATHEXT',
+        'TEMP',
+        'TMP',
+      ];
+      for (const v of windowsCriticalVars) {
+        if (process.env[v] && !envVars[v]) {
+          envVars[v] = process.env[v]!;
+        }
+      }
+    }
+
    const ptyOptions: pty.IPtyForkOptions = {
      name: 'xterm-color',
      cols: 80,
@@ -1364,3 +1442,11 @@ export class TestRig {
    throw new Error(`pollCommand timed out after ${timeout}ms`);
  }
 }
+
+/**
+ * Normalizes a path for cross-platform matching (replaces backslashes with forward slashes).
+ */
+export function normalizePath(p: string | undefined): string | undefined {
+  if (!p) return p;
+  return p.replace(/\\/g, '/');
+}