fix(hooks): resolve Windows flakiness and improve reliability

This commit is contained in:
Taylor Mullen
2026-02-10 13:34:57 -08:00
parent b077cfe061
commit 80a0f04cfd
7 changed files with 129 additions and 147 deletions
+84 -9
View File
@@ -4,7 +4,6 @@ on:
push:
branches:
- 'main'
- 'ntm/repro-hook-flakiness'
merge_group:
workflow_run:
workflows: ['Trigger E2E']
@@ -131,7 +130,8 @@ jobs:
- 'merge_queue_skipper'
- 'parse_run_context'
runs-on: 'gemini-cli-ubuntu-16-core'
if: false
if: |
always() && (needs.merge_queue_skipper.result !='success' || needs.merge_queue_skipper.outputs.skip != 'true')
strategy:
fail-fast: false
matrix:
@@ -148,13 +148,43 @@ jobs:
ref: '${{ needs.parse_run_context.outputs.sha }}'
repository: '${{ needs.parse_run_context.outputs.repository }}'
- name: 'Set up Node.js ${{ matrix.node-version }}'
uses: 'actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020' # ratchet:actions-node@v4
with:
node-version: '${{ matrix.node-version }}'
- name: 'Install dependencies'
run: 'npm ci'
- name: 'Build project'
run: 'npm run build'
- name: 'Set up Docker'
if: "${{matrix.sandbox == 'sandbox:docker'}}"
uses: 'docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435' # ratchet:docker/setup-buildx-action@v3
- name: 'Run E2E tests'
env:
GEMINI_API_KEY: '${{ secrets.GEMINI_API_KEY }}'
KEEP_OUTPUT: 'true'
VERBOSE: 'true'
BUILD_SANDBOX_FLAGS: '--cache-from type=gha --cache-to type=gha,mode=max'
shell: 'bash'
run: |
if [[ "${{ matrix.sandbox }}" == "sandbox:docker" ]]; then
npm run test:integration:sandbox:docker
else
npm run test:integration:sandbox:none
fi
e2e_mac:
name: 'E2E Test (macOS)'
needs:
- 'merge_queue_skipper'
- 'parse_run_context'
runs-on: 'macos-latest'
if: false
if: |
always() && (needs.merge_queue_skipper.result !='success' || needs.merge_queue_skipper.outputs.skip != 'true')
steps:
- name: 'Checkout'
uses: 'actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955' # ratchet:actions/checkout@v5
@@ -162,6 +192,30 @@ jobs:
ref: '${{ needs.parse_run_context.outputs.sha }}'
repository: '${{ needs.parse_run_context.outputs.repository }}'
- name: 'Set up Node.js 20.x'
uses: 'actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020' # ratchet:actions-node@v4
with:
node-version: '20.x'
- name: 'Install dependencies'
run: 'npm ci'
- name: 'Build project'
run: 'npm run build'
- name: 'Fix rollup optional dependencies on macOS'
if: "${{runner.os == 'macOS'}}"
run: |
npm cache clean --force
- name: 'Run E2E tests (non-Windows)'
if: "${{runner.os != 'Windows'}}"
env:
GEMINI_API_KEY: '${{ secrets.GEMINI_API_KEY }}'
KEEP_OUTPUT: 'true'
SANDBOX: 'sandbox:none'
VERBOSE: 'true'
run: 'npm run test:integration:sandbox:none'
e2e_windows:
name: 'Slow E2E - Win'
needs:
@@ -221,7 +275,7 @@ jobs:
UV_THREADPOOL_SIZE: '32'
NODE_ENV: 'test'
shell: 'pwsh'
run: 'npx vitest run --root integration-tests hooks-system.test.ts --allowOnly'
run: 'npm run test:integration:sandbox:none'
evals:
name: 'Evals (ALWAYS_PASSING)'
@@ -229,7 +283,8 @@ jobs:
- 'merge_queue_skipper'
- 'parse_run_context'
runs-on: 'gemini-cli-ubuntu-16-core'
if: false
if: |
always() && (needs.merge_queue_skipper.result !='success' || needs.merge_queue_skipper.outputs.skip != 'true')
steps:
- name: 'Checkout'
uses: 'actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955' # ratchet:actions/checkout@v5
@@ -237,22 +292,42 @@ jobs:
ref: '${{ needs.parse_run_context.outputs.sha }}'
repository: '${{ needs.parse_run_context.outputs.repository }}'
- name: 'Set up Node.js 20.x'
uses: 'actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020' # ratchet:actions-node@v4
with:
node-version: '20.x'
- name: 'Install dependencies'
run: 'npm ci'
- name: 'Build project'
run: 'npm run build'
- name: 'Run Evals (Required to pass)'
env:
GEMINI_API_KEY: '${{ secrets.GEMINI_API_KEY }}'
run: 'npm run test:always_passing_evals'
e2e:
name: 'E2E'
if: |
always() && (needs.merge_queue_skipper.result !='success' || needs.merge_queue_skipper.outputs.skip != 'true')
needs:
- 'e2e_windows'
- 'e2e_linux'
- 'e2e_mac'
- 'evals'
- 'merge_queue_skipper'
runs-on: 'gemini-cli-ubuntu-16-core'
steps:
- name: 'Check E2E test results'
run: |
if [[ ${{ needs.e2e_windows.result }} != 'success' ]]; then
echo "Windows E2E job failed."
if [[ ${{ needs.e2e_linux.result }} != 'success' || \
${{ needs.e2e_mac.result }} != 'success' || \
${{ needs.evals.result }} != 'success' ]]; then
echo "One or more E2E jobs failed."
exit 1
fi
echo "Windows E2E job passed!"
echo "All required E2E jobs passed!"
set_workflow_status:
runs-on: 'gemini-cli-ubuntu-16-core'
+9 -9
View File
@@ -87,7 +87,7 @@ describe('Hooks System Integration', () => {
expect(hookTelemetryFound).toBeTruthy();
});
it.only('should block tool execution and use stderr as reason when hook exits with code 2', async () => {
it('should block tool execution and use stderr as reason when hook exits with code 2', async () => {
rig.setup(
'should block tool execution and use stderr as reason when hook exits with code 2',
{
@@ -110,7 +110,6 @@ process.exit(0);
);
const normalizedCmd = normalizePath(`node "${scriptPath}"`);
console.log(`[Test] normalizedCmd: ${normalizedCmd}`);
rig.setup(
'should block tool execution and use stderr as reason when hook exits with code 2',
@@ -156,14 +155,17 @@ process.exit(0);
// Result should mention the blocking reason from stderr
expect(result).toContain('File writing blocked by security policy');
// Verify hook telemetry shows exit code 2 and stderr
// Verify hook telemetry shows the deny decision
const hookLogs = rig.readHookLogs();
const blockHook = hookLogs.find((log) => log.hookCall.exit_code === 2);
const blockHook = hookLogs.find(
(log) =>
log.hookCall.hook_name.includes('stderr_block_hook.cjs') &&
log.hookCall.hook_output.includes('"decision":"deny"'),
);
expect(blockHook).toBeDefined();
expect(blockHook?.hookCall.stderr).toContain(
expect(blockHook?.hookCall.hook_output).toContain(
'File writing blocked by security policy',
);
expect(blockHook?.hookCall.success).toBe(false);
});
it('should allow tool execution when hook returns allow decision', async () => {
@@ -1894,10 +1896,9 @@ console.log(JSON.stringify({decision: "block", systemMessage: "Disabled hook sho
expect(disabledHookLog).toBeUndefined();
});
it.only('should respect disabled hooks across multiple operations', async () => {
it('should respect disabled hooks across multiple operations', async () => {
// 1. First setup to get the test directory and prepare the hook scripts
rig.setup('should respect disabled hooks across multiple operations');
console.log(`[Test] testDir: ${rig.testDir}`);
const activeHookScript = `const fs = require('fs');
console.log(JSON.stringify({decision: "allow", systemMessage: "Active hook executed"}));`;
@@ -1911,7 +1912,6 @@ console.log(JSON.stringify({decision: "block", systemMessage: "Disabled hook sho
writeFileSync(disabledPath, disabledHookScript);
const normalizedDisabledCmd = normalizePath(`node "${disabledPath}"`);
console.log(`[Test] normalizedDisabledCmd: ${normalizedDisabledCmd}`);
rig.setup('should respect disabled hooks across multiple operations', {
settings: {
@@ -965,11 +965,6 @@ export class CoreToolScheduler {
// The active tool is finished. Move it to the completed batch.
const completedCall = activeCall as CompletedToolCall;
if (process.env['CI'] === 'true' || process.env['VERBOSE'] === 'true') {
console.log(`[CoreToolScheduler] Completed tool call: ${completedCall.request.name}, status: ${completedCall.status}`);
}
this.completedToolCallsForBatch.push(completedCall);
logToolCall(this.config, new ToolCallEvent(completedCall));
-10
View File
@@ -233,16 +233,6 @@ please review the project settings (.gemini/settings.json) and remove them.`;
} as HookRegistryEntry);
const isDisabled = disabledHooks.includes(hookName);
if (process.env['CI'] === 'true' || process.env['VERBOSE'] === 'true') {
console.log(`[HookRegistry] Processing hook: "${hookName}"`);
if (isDisabled) {
console.log(`[HookRegistry] Hook is DISABLED (found in disabledHooks list)`);
}
if (disabledHooks.length > 0) {
console.log(`[HookRegistry] Current disabledHooks: ${JSON.stringify(disabledHooks)}`);
}
}
// Add source to hook config
hookConfig.source = source;
+1 -25
View File
@@ -35,6 +35,7 @@ const DEFAULT_HOOK_TIMEOUT = 60000;
* Exit code constants for hook execution
*/
const EXIT_CODE_SUCCESS = 0;
const EXIT_CODE_BLOCKING_ERROR = 2;
const EXIT_CODE_NON_BLOCKING_ERROR = 1;
/**
@@ -268,12 +269,6 @@ export class HookRunner {
shellConfig.shell,
);
if (process.env['CI'] === 'true' || process.env['VERBOSE'] === 'true') {
console.log(`[HookRunner] shellConfig: ${JSON.stringify(shellConfig)}`);
console.log(`[HookRunner] command: ${command}`);
console.log(`[HookRunner] cwd: ${input.cwd}`);
}
// Set up environment variables
const env = {
...sanitizeEnvironment(process.env, this.config.sanitizationConfig),
@@ -338,23 +333,11 @@ export class HookRunner {
stderr += data.toString();
});
child.on('exit', (code, signal) => {
if (process.env['CI'] === 'true' || process.env['VERBOSE'] === 'true') {
console.log(`[HookRunner] Hook exit. code: ${code}, signal: ${signal}`);
}
});
// Handle process exit
child.on('close', (exitCode) => {
clearTimeout(timeoutHandle);
const duration = Date.now() - startTime;
if (process.env['CI'] === 'true' || process.env['VERBOSE'] === 'true') {
console.log(`[HookRunner] Hook closed. exitCode: ${exitCode}, duration: ${duration}ms`);
console.log(`[HookRunner] stdout: ${stdout}`);
console.log(`[HookRunner] stderr: ${stderr}`);
}
if (timedOut) {
resolve({
hookConfig,
@@ -411,10 +394,6 @@ export class HookRunner {
clearTimeout(timeoutHandle);
const duration = Date.now() - startTime;
if (process.env['CI'] === 'true' || process.env['VERBOSE'] === 'true') {
console.log(`[HookRunner] Hook process error: ${error.message}`);
}
resolve({
hookConfig,
eventName,
@@ -450,9 +429,6 @@ export class HookRunner {
text: string,
exitCode: number,
): HookOutput {
if (process.env['CI'] === 'true' || process.env['VERBOSE'] === 'true') {
console.log(`[HookRunner] convertPlainTextToHookOutput: exitCode=${exitCode}, text="${text}"`);
}
if (exitCode === EXIT_CODE_SUCCESS) {
// Success - treat as system message or additional context
return {
+6 -10
View File
@@ -360,11 +360,9 @@ export class PolicyEngine {
);
if (match) {
if (process.env['CI'] === 'true' || process.env['VERBOSE'] === 'true') {
console.log(
`[PolicyEngine.check] MATCHED rule: toolName=${rule.toolName}, decision=${rule.decision}, priority=${rule.priority}, source=${rule.source}`,
);
}
debugLogger.debug(
`[PolicyEngine.check] MATCHED rule: toolName=${rule.toolName}, decision=${rule.decision}, priority=${rule.priority}, argsPattern=${rule.argsPattern?.source || 'none'}`,
);
if (isShellCommand && toolName) {
const shellResult = await this.checkShellCommand(
@@ -391,11 +389,9 @@ export class PolicyEngine {
// Default if no rule matched
if (decision === undefined) {
if (process.env['CI'] === 'true' || process.env['VERBOSE'] === 'true') {
console.log(
`[PolicyEngine.check] NO MATCH - using default decision: ${this.defaultDecision}`,
);
}
debugLogger.debug(
`[PolicyEngine.check] NO MATCH - using default decision: ${this.defaultDecision}`,
);
if (toolName && SHELL_TOOL_NAMES.includes(toolName)) {
const shellResult = await this.checkShellCommand(
toolName,
+29 -79
View File
@@ -60,18 +60,6 @@ export function sanitizeTestName(name: string) {
.replace(/-+/g, '-');
}
/**
* Normalizes a path for use in command-line arguments.
* On Windows, this converts backslashes to forward slashes.
*/
export function normalizePath(p: string): string {
const result = p.replace(/\\/g, '/');
if (process.env['CI'] === 'true' || process.env['VERBOSE'] === 'true') {
console.log(`[normalizePath] "${p}" -> "${result}"`);
}
return result;
}
// Helper to create detailed error messages
export function createToolCallErrorMessage(
expectedTools: string | string[],
@@ -373,43 +361,30 @@ export class TestRig {
this.testDir = join(testFileDir, sanitizedName);
this.homeDir = join(testFileDir, sanitizedName + '-home');
if (env['VERBOSE'] === 'true' || env['CI'] === 'true') {
console.log(`[TestRig] Setting up test: ${testName}`);
console.log(`[TestRig] testDir: ${this.testDir}`);
console.log(`[TestRig] homeDir: ${this.homeDir}`);
}
if (!this._initialized) {
// Clean up existing directories from previous runs (e.g. retries)
const cleanDir = (dir: string, name: string) => {
const cleanDir = (dir: string) => {
if (fs.existsSync(dir)) {
if (env['VERBOSE'] === 'true' || env['CI'] === 'true') {
console.log(`[TestRig] Cleaning up existing ${name}: ${dir}`);
}
for (let i = 0; i < 5; i++) {
try {
fs.rmSync(dir, { recursive: true, force: true });
return;
} catch (err) {
if (i === 4) throw err;
if (env['VERBOSE'] === 'true' || env['CI'] === 'true') {
console.log(
`[TestRig] Failed to clean ${name} (attempt ${i + 1}): ${err}`,
);
}
// Use timeout on windows, sleep on others
const sleepCmd =
process.platform === 'win32' ? 'timeout /t 1' : 'sleep 1';
try {
execSync(sleepCmd);
} catch {}
} catch {
/* ignore */
}
}
}
}
};
cleanDir(this.testDir, 'testDir');
cleanDir(this.homeDir, 'homeDir');
cleanDir(this.testDir);
cleanDir(this.homeDir);
this._initialized = true;
}
@@ -492,17 +467,6 @@ export class TestRig {
return filePath;
}
createScript(fileName: string, content: string) {
if (!this.testDir) {
throw new Error(
'TestRig.setup must be called before creating files or scripts',
);
}
const scriptPath = join(this.testDir, fileName);
writeFileSync(scriptPath, content);
return normalizePath(scriptPath);
}
mkdir(dir: string) {
mkdirSync(join(this.testDir!, dir), { recursive: true });
}
@@ -609,11 +573,6 @@ export class TestRig {
});
this._spawnedProcesses.push(child);
if (env['VERBOSE'] === 'true' || env['CI'] === 'true') {
console.log(`[TestRig] Running: ${command} ${commandArgs.join(' ')}`);
console.log(`[TestRig] CWD: ${this.testDir}`);
}
let stdout = '';
let stderr = '';
@@ -642,8 +601,7 @@ export class TestRig {
}
});
const isWinCI = os.platform() === 'win32' && process.env['CI'] === 'true';
const timeout = options.timeout ?? (isWinCI ? 600000 : 300000); // 10 mins on Win CI, 5 mins otherwise
const timeout = options.timeout ?? 300000;
const promise = new Promise<string>((resolve, reject) => {
const timer = setTimeout(() => {
child.kill('SIGKILL');
@@ -814,8 +772,7 @@ export class TestRig {
}
});
const isWinCI = os.platform() === 'win32' && process.env['CI'] === 'true';
const timeout = options.timeout ?? (isWinCI ? 600000 : 300000); // 10 mins on Win CI, 5 mins otherwise
const timeout = options.timeout ?? 300000;
const promise = new Promise<string>((resolve, reject) => {
const timer = setTimeout(() => {
child.kill('SIGKILL');
@@ -1257,11 +1214,6 @@ export class TestRig {
}[] = [];
for (const logData of parsedLogs) {
if (env['VERBOSE'] === 'true' || env['CI'] === 'true') {
if (logData.attributes?.['event.name']?.includes('tool')) {
console.log(`[TestRig] Found tool-related log: ${JSON.stringify(logData.attributes)}`);
}
}
// Look for tool call logs
if (
logData.attributes &&
@@ -1359,18 +1311,21 @@ export class TestRig {
const envVars = this._getCleanEnv(options?.env);
// Ensure PATH is included for pty
if (!envVars['PATH'] && process.env['PATH']) {
envVars['PATH'] = process.env['PATH'];
}
// Add critical Windows environment variables if missing
// node-pty on windows often needs these to spawn correctly
if (process.platform === 'win32') {
['SystemRoot', 'COMSPEC', 'windir', 'PATHEXT'].forEach((key) => {
if (!envVars[key] && process.env[key]) {
envVars[key] = process.env[key];
const windowsCriticalVars = [
'SystemRoot',
'COMSPEC',
'windir',
'PATHEXT',
'TEMP',
'TMP',
];
for (const v of windowsCriticalVars) {
if (process.env[v] && !envVars[v]) {
envVars[v] = process.env[v]!;
}
});
}
}
const ptyOptions: pty.IPtyForkOptions = {
@@ -1384,14 +1339,6 @@ export class TestRig {
};
const executable = command === 'node' ? process.execPath : command;
if (env['VERBOSE'] === 'true' || env['CI'] === 'true') {
console.log(`[TestRig] Spawning PTY: ${executable}`);
console.log(`[TestRig] Args: ${JSON.stringify(commandArgs)}`);
// Only log keys to avoid leaking sensitive info
console.log(
`[TestRig] Env Keys: ${Object.keys(ptyOptions.env!).join(', ')}`,
);
}
const ptyProcess = pty.spawn(executable, commandArgs, ptyOptions);
const run = new InteractiveRun(ptyProcess);
@@ -1419,11 +1366,6 @@ export class TestRig {
}[] = [];
for (const logData of parsedLogs) {
if (env['VERBOSE'] === 'true' || env['CI'] === 'true') {
if (logData.attributes?.['event.name']?.includes('hook')) {
console.log(`[TestRig] Found hook-related log: ${JSON.stringify(logData.attributes)}`);
}
}
// Look for tool call logs
if (
logData.attributes &&
@@ -1468,3 +1410,11 @@ export class TestRig {
throw new Error(`pollCommand timed out after ${timeout}ms`);
}
}
/**
* Normalizes a path for cross-platform matching (replaces backslashes with forward slashes).
*/
export function normalizePath(p: string | undefined): string | undefined {
if (!p) return p;
return p.replace(/\\/g, '/');
}