mirror of
https://github.com/google-gemini/gemini-cli.git
synced 2026-03-10 14:10:37 -07:00
test: fix hook integration test flakiness on Windows CI (#18665)
This commit is contained in:
@@ -5,7 +5,7 @@
|
||||
*/
|
||||
|
||||
import { describe, it, expect, beforeEach, afterEach } from 'vitest';
|
||||
import { TestRig } from './test-helper.js';
|
||||
import { TestRig, normalizePath } from './test-helper.js';
|
||||
import { join } from 'node:path';
|
||||
import { writeFileSync } from 'node:fs';
|
||||
|
||||
@@ -113,10 +113,9 @@ describe('Hooks Agent Flow', () => {
|
||||
}
|
||||
`;
|
||||
|
||||
const scriptPath = join(rig.testDir!, 'after_agent_verify.cjs');
|
||||
writeFileSync(scriptPath, hookScript);
|
||||
const scriptPath = rig.createScript('after_agent_verify.cjs', hookScript);
|
||||
|
||||
await rig.setup('should receive prompt and response in AfterAgent hook', {
|
||||
rig.setup('should receive prompt and response in AfterAgent hook', {
|
||||
settings: {
|
||||
hooksConfig: {
|
||||
enabled: true,
|
||||
@@ -127,7 +126,7 @@ describe('Hooks Agent Flow', () => {
|
||||
hooks: [
|
||||
{
|
||||
type: 'command',
|
||||
command: `node "${scriptPath}"`,
|
||||
command: normalizePath(`node "${scriptPath}"`)!,
|
||||
timeout: 5000,
|
||||
},
|
||||
],
|
||||
@@ -157,7 +156,7 @@ describe('Hooks Agent Flow', () => {
|
||||
});
|
||||
|
||||
it('should process clearContext in AfterAgent hook output', async () => {
|
||||
await rig.setup('should process clearContext in AfterAgent hook output', {
|
||||
rig.setup('should process clearContext in AfterAgent hook output', {
|
||||
fakeResponsesPath: join(
|
||||
import.meta.dirname,
|
||||
'hooks-system.after-agent.responses',
|
||||
@@ -171,18 +170,32 @@ describe('Hooks Agent Flow', () => {
|
||||
const input = JSON.parse(fs.readFileSync(0, 'utf-8'));
|
||||
const messageCount = input.llm_request?.contents?.length || 0;
|
||||
let counts = [];
|
||||
try { counts = JSON.parse(fs.readFileSync('${messageCountFile}', 'utf-8')); } catch (e) {}
|
||||
try { counts = JSON.parse(fs.readFileSync(${JSON.stringify(messageCountFile)}, 'utf-8')); } catch (e) {}
|
||||
counts.push(messageCount);
|
||||
fs.writeFileSync('${messageCountFile}', JSON.stringify(counts));
|
||||
fs.writeFileSync(${JSON.stringify(messageCountFile)}, JSON.stringify(counts));
|
||||
console.log(JSON.stringify({ decision: 'allow' }));
|
||||
`;
|
||||
const beforeModelScriptPath = join(
|
||||
rig.testDir!,
|
||||
const beforeModelScriptPath = rig.createScript(
|
||||
'before_model_counter.cjs',
|
||||
beforeModelScript,
|
||||
);
|
||||
writeFileSync(beforeModelScriptPath, beforeModelScript);
|
||||
|
||||
await rig.setup('should process clearContext in AfterAgent hook output', {
|
||||
const afterAgentScript = `
|
||||
console.log(JSON.stringify({
|
||||
decision: 'block',
|
||||
reason: 'Security policy triggered',
|
||||
hookSpecificOutput: {
|
||||
hookEventName: 'AfterAgent',
|
||||
clearContext: true
|
||||
}
|
||||
}));
|
||||
`;
|
||||
const afterAgentScriptPath = rig.createScript(
|
||||
'after_agent_clear.cjs',
|
||||
afterAgentScript,
|
||||
);
|
||||
|
||||
rig.setup('should process clearContext in AfterAgent hook output', {
|
||||
settings: {
|
||||
hooks: {
|
||||
enabled: true,
|
||||
@@ -191,7 +204,7 @@ describe('Hooks Agent Flow', () => {
|
||||
hooks: [
|
||||
{
|
||||
type: 'command',
|
||||
command: `node "${beforeModelScriptPath}"`,
|
||||
command: normalizePath(`node "${beforeModelScriptPath}"`)!,
|
||||
timeout: 5000,
|
||||
},
|
||||
],
|
||||
@@ -202,7 +215,7 @@ describe('Hooks Agent Flow', () => {
|
||||
hooks: [
|
||||
{
|
||||
type: 'command',
|
||||
command: `node -e "console.log(JSON.stringify({decision: 'block', reason: 'Security policy triggered', hookSpecificOutput: {hookEventName: 'AfterAgent', clearContext: true}}))"`,
|
||||
command: normalizePath(`node "${afterAgentScriptPath}"`)!,
|
||||
timeout: 5000,
|
||||
},
|
||||
],
|
||||
@@ -244,6 +257,22 @@ describe('Hooks Agent Flow', () => {
|
||||
import.meta.dirname,
|
||||
'hooks-agent-flow-multistep.responses',
|
||||
),
|
||||
},
|
||||
);
|
||||
|
||||
// Create script files for hooks
|
||||
const baPath = rig.createScript(
|
||||
'ba_fired.cjs',
|
||||
"console.log('BeforeAgent Fired');",
|
||||
);
|
||||
const aaPath = rig.createScript(
|
||||
'aa_fired.cjs',
|
||||
"console.log('AfterAgent Fired');",
|
||||
);
|
||||
|
||||
await rig.setup(
|
||||
'should fire BeforeAgent and AfterAgent exactly once per turn despite tool calls',
|
||||
{
|
||||
settings: {
|
||||
hooksConfig: {
|
||||
enabled: true,
|
||||
@@ -254,7 +283,7 @@ describe('Hooks Agent Flow', () => {
|
||||
hooks: [
|
||||
{
|
||||
type: 'command',
|
||||
command: `node -e "console.log('BeforeAgent Fired')"`,
|
||||
command: normalizePath(`node "${baPath}"`)!,
|
||||
timeout: 5000,
|
||||
},
|
||||
],
|
||||
@@ -265,7 +294,7 @@ describe('Hooks Agent Flow', () => {
|
||||
hooks: [
|
||||
{
|
||||
type: 'command',
|
||||
command: `node -e "console.log('AfterAgent Fired')"`,
|
||||
command: normalizePath(`node "${aaPath}"`)!,
|
||||
timeout: 5000,
|
||||
},
|
||||
],
|
||||
|
||||
File diff suppressed because one or more lines are too long
File diff suppressed because it is too large
Load Diff
@@ -5,3 +5,4 @@
|
||||
*/
|
||||
|
||||
export * from '@google/gemini-cli-test-utils';
|
||||
export { normalizePath } from '@google/gemini-cli-test-utils';
|
||||
|
||||
@@ -35,7 +35,6 @@ const DEFAULT_HOOK_TIMEOUT = 60000;
|
||||
* Exit code constants for hook execution
|
||||
*/
|
||||
const EXIT_CODE_SUCCESS = 0;
|
||||
const EXIT_CODE_BLOCKING_ERROR = 2;
|
||||
const EXIT_CODE_NON_BLOCKING_ERROR = 1;
|
||||
|
||||
/**
|
||||
@@ -353,28 +352,25 @@ export class HookRunner {
|
||||
|
||||
// Parse output
|
||||
let output: HookOutput | undefined;
|
||||
if (exitCode === EXIT_CODE_SUCCESS && stdout.trim()) {
|
||||
|
||||
const textToParse = stdout.trim() || stderr.trim();
|
||||
if (textToParse) {
|
||||
try {
|
||||
let parsed = JSON.parse(stdout.trim());
|
||||
let parsed = JSON.parse(textToParse);
|
||||
if (typeof parsed === 'string') {
|
||||
// If the output is a string, parse it in case
|
||||
// it's double-encoded JSON string.
|
||||
parsed = JSON.parse(parsed);
|
||||
}
|
||||
if (parsed) {
|
||||
if (parsed && typeof parsed === 'object') {
|
||||
// eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion
|
||||
output = parsed as HookOutput;
|
||||
}
|
||||
} catch {
|
||||
// Not JSON, convert plain text to structured output
|
||||
output = this.convertPlainTextToHookOutput(stdout.trim(), exitCode);
|
||||
output = this.convertPlainTextToHookOutput(
|
||||
textToParse,
|
||||
exitCode || EXIT_CODE_SUCCESS,
|
||||
);
|
||||
}
|
||||
} else if (exitCode !== EXIT_CODE_SUCCESS && stderr.trim()) {
|
||||
// Convert error output to structured format
|
||||
output = this.convertPlainTextToHookOutput(
|
||||
stderr.trim(),
|
||||
exitCode || EXIT_CODE_NON_BLOCKING_ERROR,
|
||||
);
|
||||
}
|
||||
|
||||
resolve({
|
||||
@@ -435,18 +431,18 @@ export class HookRunner {
|
||||
decision: 'allow',
|
||||
systemMessage: text,
|
||||
};
|
||||
} else if (exitCode === EXIT_CODE_BLOCKING_ERROR) {
|
||||
// Blocking error
|
||||
return {
|
||||
decision: 'deny',
|
||||
reason: text,
|
||||
};
|
||||
} else {
|
||||
// Non-blocking error (EXIT_CODE_NON_BLOCKING_ERROR or any other code)
|
||||
} else if (exitCode === EXIT_CODE_NON_BLOCKING_ERROR) {
|
||||
// Non-blocking error (EXIT_CODE_NON_BLOCKING_ERROR = 1)
|
||||
return {
|
||||
decision: 'allow',
|
||||
systemMessage: `Warning: ${text}`,
|
||||
};
|
||||
} else {
|
||||
// All other non-zero exit codes (including 2) are blocking
|
||||
return {
|
||||
decision: 'deny',
|
||||
reason: text,
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -18,6 +18,9 @@ export interface PtyProcess {
|
||||
}
|
||||
|
||||
export const getPty = async (): Promise<PtyImplementation> => {
|
||||
if (process.env['GEMINI_PTY_INFO'] === 'child_process') {
|
||||
return null;
|
||||
}
|
||||
try {
|
||||
const lydell = '@lydell/node-pty';
|
||||
const module = await import(lydell);
|
||||
|
||||
@@ -345,6 +345,7 @@ export class TestRig {
|
||||
originalFakeResponsesPath?: string;
|
||||
private _interactiveRuns: InteractiveRun[] = [];
|
||||
private _spawnedProcesses: ChildProcess[] = [];
|
||||
private _initialized = false;
|
||||
|
||||
setup(
|
||||
testName: string,
|
||||
@@ -359,6 +360,14 @@ export class TestRig {
|
||||
env['INTEGRATION_TEST_FILE_DIR'] || join(os.tmpdir(), 'gemini-cli-tests');
|
||||
this.testDir = join(testFileDir, sanitizedName);
|
||||
this.homeDir = join(testFileDir, sanitizedName + '-home');
|
||||
|
||||
if (!this._initialized) {
|
||||
// Clean up existing directories from previous runs (e.g. retries)
|
||||
this._cleanDir(this.testDir);
|
||||
this._cleanDir(this.homeDir);
|
||||
this._initialized = true;
|
||||
}
|
||||
|
||||
mkdirSync(this.testDir, { recursive: true });
|
||||
mkdirSync(this.homeDir, { recursive: true });
|
||||
if (options.fakeResponsesPath) {
|
||||
@@ -373,6 +382,36 @@ export class TestRig {
|
||||
this._createSettingsFile(options.settings);
|
||||
}
|
||||
|
||||
private _cleanDir(dir: string) {
|
||||
if (fs.existsSync(dir)) {
|
||||
for (let i = 0; i < 10; i++) {
|
||||
try {
|
||||
fs.rmSync(dir, { recursive: true, force: true });
|
||||
return;
|
||||
} catch (err) {
|
||||
if (i === 9) {
|
||||
console.error(
|
||||
`Failed to clean directory ${dir} after 10 attempts:`,
|
||||
err,
|
||||
);
|
||||
throw err;
|
||||
}
|
||||
const delay = Math.min(Math.pow(2, i) * 1000, 10000); // Max 10s delay
|
||||
try {
|
||||
const sharedBuffer = new Int32Array(new SharedArrayBuffer(4));
|
||||
Atomics.wait(sharedBuffer, 0, 0, delay);
|
||||
} catch {
|
||||
// Fallback for environments where SharedArrayBuffer might be restricted
|
||||
const start = Date.now();
|
||||
while (Date.now() - start < delay) {
|
||||
/* busy wait */
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private _createSettingsFile(overrideSettings?: Record<string, unknown>) {
|
||||
const projectGeminiDir = join(this.testDir!, GEMINI_DIR);
|
||||
mkdirSync(projectGeminiDir, { recursive: true });
|
||||
@@ -474,6 +513,17 @@ export class TestRig {
|
||||
return { command, initialArgs };
|
||||
}
|
||||
|
||||
createScript(fileName: string, content: string) {
|
||||
if (!this.testDir) {
|
||||
throw new Error(
|
||||
'TestRig.setup must be called before creating files or scripts',
|
||||
);
|
||||
}
|
||||
const scriptPath = join(this.testDir, fileName);
|
||||
writeFileSync(scriptPath, content);
|
||||
return normalizePath(scriptPath);
|
||||
}
|
||||
|
||||
private _getCleanEnv(
|
||||
extraEnv?: Record<string, string | undefined>,
|
||||
): Record<string, string | undefined> {
|
||||
@@ -499,6 +549,7 @@ export class TestRig {
|
||||
return {
|
||||
...cleanEnv,
|
||||
GEMINI_CLI_HOME: this.homeDir!,
|
||||
GEMINI_PTY_INFO: 'child_process',
|
||||
...extraEnv,
|
||||
};
|
||||
}
|
||||
@@ -801,6 +852,13 @@ export class TestRig {
|
||||
// Kill any interactive runs that are still active
|
||||
for (const run of this._interactiveRuns) {
|
||||
try {
|
||||
if (process.platform === 'win32') {
|
||||
// @ts-ignore - access private ptyProcess
|
||||
const pid = run.ptyProcess?.pid;
|
||||
if (pid) {
|
||||
execSync(`taskkill /F /T /PID ${pid}`, { stdio: 'ignore' });
|
||||
}
|
||||
}
|
||||
await run.kill();
|
||||
} catch (error) {
|
||||
if (env['VERBOSE'] === 'true') {
|
||||
@@ -814,6 +872,9 @@ export class TestRig {
|
||||
for (const child of this._spawnedProcesses) {
|
||||
if (child.exitCode === null && child.signalCode === null) {
|
||||
try {
|
||||
if (process.platform === 'win32' && child.pid) {
|
||||
execSync(`taskkill /F /T /PID ${child.pid}`, { stdio: 'ignore' });
|
||||
}
|
||||
child.kill('SIGKILL');
|
||||
} catch (error) {
|
||||
if (env['VERBOSE'] === 'true') {
|
||||
@@ -836,21 +897,21 @@ export class TestRig {
|
||||
// Clean up test directory and home directory
|
||||
if (this.testDir && !env['KEEP_OUTPUT']) {
|
||||
try {
|
||||
fs.rmSync(this.testDir, { recursive: true, force: true });
|
||||
this._cleanDir(this.testDir);
|
||||
} catch (error) {
|
||||
// Ignore cleanup errors
|
||||
if (env['VERBOSE'] === 'true') {
|
||||
console.warn('Cleanup warning:', (error as Error).message);
|
||||
if (env['VERBOSE'] === 'true' || env['CI'] === 'true') {
|
||||
console.warn('Cleanup warning (testDir):', (error as Error).message);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (this.homeDir && !env['KEEP_OUTPUT']) {
|
||||
try {
|
||||
fs.rmSync(this.homeDir, { recursive: true, force: true });
|
||||
this._cleanDir(this.homeDir);
|
||||
} catch (error) {
|
||||
// Ignore cleanup errors
|
||||
if (env['VERBOSE'] === 'true') {
|
||||
console.warn('Cleanup warning:', (error as Error).message);
|
||||
if (env['VERBOSE'] === 'true' || env['CI'] === 'true') {
|
||||
console.warn('Cleanup warning (homeDir):', (error as Error).message);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1282,6 +1343,23 @@ export class TestRig {
|
||||
|
||||
const envVars = this._getCleanEnv(options?.env);
|
||||
|
||||
// node-pty on windows often needs these to spawn correctly
|
||||
if (process.platform === 'win32') {
|
||||
const windowsCriticalVars = [
|
||||
'SystemRoot',
|
||||
'COMSPEC',
|
||||
'windir',
|
||||
'PATHEXT',
|
||||
'TEMP',
|
||||
'TMP',
|
||||
];
|
||||
for (const v of windowsCriticalVars) {
|
||||
if (process.env[v] && !envVars[v]) {
|
||||
envVars[v] = process.env[v]!;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const ptyOptions: pty.IPtyForkOptions = {
|
||||
name: 'xterm-color',
|
||||
cols: 80,
|
||||
@@ -1364,3 +1442,11 @@ export class TestRig {
|
||||
throw new Error(`pollCommand timed out after ${timeout}ms`);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Normalizes a path for cross-platform matching (replaces backslashes with forward slashes).
|
||||
*/
|
||||
export function normalizePath(p: string | undefined): string | undefined {
|
||||
if (!p) return p;
|
||||
return p.replace(/\\/g, '/');
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user