diff --git a/.github/workflows/chained_e2e.yml b/.github/workflows/chained_e2e.yml index b19c56834c..487225d452 100644 --- a/.github/workflows/chained_e2e.yml +++ b/.github/workflows/chained_e2e.yml @@ -4,7 +4,6 @@ on: push: branches: - 'main' - - 'ntm/repro-hook-flakiness' merge_group: workflow_run: workflows: ['Trigger E2E'] @@ -131,7 +130,8 @@ jobs: - 'merge_queue_skipper' - 'parse_run_context' runs-on: 'gemini-cli-ubuntu-16-core' - if: false + if: | + always() && (needs.merge_queue_skipper.result !='success' || needs.merge_queue_skipper.outputs.skip != 'true') strategy: fail-fast: false matrix: @@ -148,13 +148,43 @@ jobs: ref: '${{ needs.parse_run_context.outputs.sha }}' repository: '${{ needs.parse_run_context.outputs.repository }}' + - name: 'Set up Node.js ${{ matrix.node-version }}' + uses: 'actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020' # ratchet:actions-node@v4 + with: + node-version: '${{ matrix.node-version }}' + + - name: 'Install dependencies' + run: 'npm ci' + + - name: 'Build project' + run: 'npm run build' + + - name: 'Set up Docker' + if: "${{matrix.sandbox == 'sandbox:docker'}}" + uses: 'docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435' # ratchet:docker/setup-buildx-action@v3 + + - name: 'Run E2E tests' + env: + GEMINI_API_KEY: '${{ secrets.GEMINI_API_KEY }}' + KEEP_OUTPUT: 'true' + VERBOSE: 'true' + BUILD_SANDBOX_FLAGS: '--cache-from type=gha --cache-to type=gha,mode=max' + shell: 'bash' + run: | + if [[ "${{ matrix.sandbox }}" == "sandbox:docker" ]]; then + npm run test:integration:sandbox:docker + else + npm run test:integration:sandbox:none + fi + e2e_mac: name: 'E2E Test (macOS)' needs: - 'merge_queue_skipper' - 'parse_run_context' runs-on: 'macos-latest' - if: false + if: | + always() && (needs.merge_queue_skipper.result !='success' || needs.merge_queue_skipper.outputs.skip != 'true') steps: - name: 'Checkout' uses: 'actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955' # ratchet:actions/checkout@v5 @@ -162,6 +192,30 @@ jobs: ref: '${{ needs.parse_run_context.outputs.sha }}' repository: '${{ needs.parse_run_context.outputs.repository }}' + - name: 'Set up Node.js 20.x' + uses: 'actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020' # ratchet:actions-node@v4 + with: + node-version: '20.x' + + - name: 'Install dependencies' + run: 'npm ci' + + - name: 'Build project' + run: 'npm run build' + + - name: 'Fix rollup optional dependencies on macOS' + if: "${{runner.os == 'macOS'}}" + run: | + npm cache clean --force + - name: 'Run E2E tests (non-Windows)' + if: "${{runner.os != 'Windows'}}" + env: + GEMINI_API_KEY: '${{ secrets.GEMINI_API_KEY }}' + KEEP_OUTPUT: 'true' + SANDBOX: 'sandbox:none' + VERBOSE: 'true' + run: 'npm run test:integration:sandbox:none' + e2e_windows: name: 'Slow E2E - Win' needs: @@ -221,7 +275,7 @@ jobs: UV_THREADPOOL_SIZE: '32' NODE_ENV: 'test' shell: 'pwsh' - run: 'npx vitest run --root integration-tests hooks-system.test.ts --allowOnly' + run: 'npm run test:integration:sandbox:none' evals: name: 'Evals (ALWAYS_PASSING)' @@ -229,7 +283,8 @@ jobs: - 'merge_queue_skipper' - 'parse_run_context' runs-on: 'gemini-cli-ubuntu-16-core' - if: false + if: | + always() && (needs.merge_queue_skipper.result !='success' || needs.merge_queue_skipper.outputs.skip != 'true') steps: - name: 'Checkout' uses: 'actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955' # ratchet:actions/checkout@v5 @@ -237,22 +292,42 @@ jobs: ref: '${{ needs.parse_run_context.outputs.sha }}' repository: '${{ needs.parse_run_context.outputs.repository }}' + - name: 'Set up Node.js 20.x' + uses: 'actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020' # ratchet:actions-node@v4 + with: + node-version: '20.x' + + - name: 'Install dependencies' + run: 'npm ci' + + - name: 'Build project' + run: 'npm run build' + + - name: 'Run Evals (Required to pass)' + env: + GEMINI_API_KEY: '${{ secrets.GEMINI_API_KEY }}' + run: 'npm run test:always_passing_evals' + e2e: name: 'E2E' if: | always() && (needs.merge_queue_skipper.result !='success' || needs.merge_queue_skipper.outputs.skip != 'true') needs: - - 'e2e_windows' + - 'e2e_linux' + - 'e2e_mac' + - 'evals' - 'merge_queue_skipper' runs-on: 'gemini-cli-ubuntu-16-core' steps: - name: 'Check E2E test results' run: | - if [[ ${{ needs.e2e_windows.result }} != 'success' ]]; then - echo "Windows E2E job failed." + if [[ ${{ needs.e2e_linux.result }} != 'success' || \ + ${{ needs.e2e_mac.result }} != 'success' || \ + ${{ needs.evals.result }} != 'success' ]]; then + echo "One or more E2E jobs failed." exit 1 fi - echo "Windows E2E job passed!" + echo "All required E2E jobs passed!" set_workflow_status: runs-on: 'gemini-cli-ubuntu-16-core' diff --git a/integration-tests/hooks-system.test.ts b/integration-tests/hooks-system.test.ts index 5e9be2bbb9..f91aba5dfe 100644 --- a/integration-tests/hooks-system.test.ts +++ b/integration-tests/hooks-system.test.ts @@ -87,7 +87,7 @@ describe('Hooks System Integration', () => { expect(hookTelemetryFound).toBeTruthy(); }); - it.only('should block tool execution and use stderr as reason when hook exits with code 2', async () => { + it('should block tool execution and use stderr as reason when hook exits with code 2', async () => { rig.setup( 'should block tool execution and use stderr as reason when hook exits with code 2', { @@ -110,7 +110,6 @@ process.exit(0); ); const normalizedCmd = normalizePath(`node "${scriptPath}"`); - console.log(`[Test] normalizedCmd: ${normalizedCmd}`); rig.setup( 'should block tool execution and use stderr as reason when hook exits with code 2', @@ -156,14 +155,17 @@ process.exit(0); // Result should mention the blocking reason from stderr expect(result).toContain('File writing blocked by security policy'); - // Verify hook telemetry shows exit code 2 and stderr + // Verify hook telemetry shows the deny decision const hookLogs = rig.readHookLogs(); - const blockHook = hookLogs.find((log) => log.hookCall.exit_code === 2); + const blockHook = hookLogs.find( + (log) => + log.hookCall.hook_name.includes('stderr_block_hook.cjs') && + log.hookCall.hook_output.includes('"decision":"deny"'), + ); expect(blockHook).toBeDefined(); - expect(blockHook?.hookCall.stderr).toContain( + expect(blockHook?.hookCall.hook_output).toContain( 'File writing blocked by security policy', ); - expect(blockHook?.hookCall.success).toBe(false); }); it('should allow tool execution when hook returns allow decision', async () => { @@ -1894,10 +1896,9 @@ console.log(JSON.stringify({decision: "block", systemMessage: "Disabled hook sho expect(disabledHookLog).toBeUndefined(); }); - it.only('should respect disabled hooks across multiple operations', async () => { + it('should respect disabled hooks across multiple operations', async () => { // 1. First setup to get the test directory and prepare the hook scripts rig.setup('should respect disabled hooks across multiple operations'); - console.log(`[Test] testDir: ${rig.testDir}`); const activeHookScript = `const fs = require('fs'); console.log(JSON.stringify({decision: "allow", systemMessage: "Active hook executed"}));`; @@ -1911,7 +1912,6 @@ console.log(JSON.stringify({decision: "block", systemMessage: "Disabled hook sho writeFileSync(disabledPath, disabledHookScript); const normalizedDisabledCmd = normalizePath(`node "${disabledPath}"`); - console.log(`[Test] normalizedDisabledCmd: ${normalizedDisabledCmd}`); rig.setup('should respect disabled hooks across multiple operations', { settings: { diff --git a/packages/core/src/core/coreToolScheduler.ts b/packages/core/src/core/coreToolScheduler.ts index 3ea97c8adf..ea2cdb7015 100644 --- a/packages/core/src/core/coreToolScheduler.ts +++ b/packages/core/src/core/coreToolScheduler.ts @@ -965,11 +965,6 @@ export class CoreToolScheduler { // The active tool is finished. Move it to the completed batch. const completedCall = activeCall as CompletedToolCall; - - if (process.env['CI'] === 'true' || process.env['VERBOSE'] === 'true') { - console.log(`[CoreToolScheduler] Completed tool call: ${completedCall.request.name}, status: ${completedCall.status}`); - } - this.completedToolCallsForBatch.push(completedCall); logToolCall(this.config, new ToolCallEvent(completedCall)); diff --git a/packages/core/src/hooks/hookRegistry.ts b/packages/core/src/hooks/hookRegistry.ts index e90c60f4b5..1e5515a6a0 100644 --- a/packages/core/src/hooks/hookRegistry.ts +++ b/packages/core/src/hooks/hookRegistry.ts @@ -233,16 +233,6 @@ please review the project settings (.gemini/settings.json) and remove them.`; } as HookRegistryEntry); const isDisabled = disabledHooks.includes(hookName); - if (process.env['CI'] === 'true' || process.env['VERBOSE'] === 'true') { - console.log(`[HookRegistry] Processing hook: "${hookName}"`); - if (isDisabled) { - console.log(`[HookRegistry] Hook is DISABLED (found in disabledHooks list)`); - } - if (disabledHooks.length > 0) { - console.log(`[HookRegistry] Current disabledHooks: ${JSON.stringify(disabledHooks)}`); - } - } - // Add source to hook config hookConfig.source = source; diff --git a/packages/core/src/hooks/hookRunner.ts b/packages/core/src/hooks/hookRunner.ts index ec521faf72..122bd85136 100644 --- a/packages/core/src/hooks/hookRunner.ts +++ b/packages/core/src/hooks/hookRunner.ts @@ -35,6 +35,7 @@ const DEFAULT_HOOK_TIMEOUT = 60000; * Exit code constants for hook execution */ const EXIT_CODE_SUCCESS = 0; +const EXIT_CODE_BLOCKING_ERROR = 2; const EXIT_CODE_NON_BLOCKING_ERROR = 1; /** @@ -268,12 +269,6 @@ export class HookRunner { shellConfig.shell, ); - if (process.env['CI'] === 'true' || process.env['VERBOSE'] === 'true') { - console.log(`[HookRunner] shellConfig: ${JSON.stringify(shellConfig)}`); - console.log(`[HookRunner] command: ${command}`); - console.log(`[HookRunner] cwd: ${input.cwd}`); - } - // Set up environment variables const env = { ...sanitizeEnvironment(process.env, this.config.sanitizationConfig), @@ -338,23 +333,11 @@ export class HookRunner { stderr += data.toString(); }); - child.on('exit', (code, signal) => { - if (process.env['CI'] === 'true' || process.env['VERBOSE'] === 'true') { - console.log(`[HookRunner] Hook exit. code: ${code}, signal: ${signal}`); - } - }); - // Handle process exit child.on('close', (exitCode) => { clearTimeout(timeoutHandle); const duration = Date.now() - startTime; - if (process.env['CI'] === 'true' || process.env['VERBOSE'] === 'true') { - console.log(`[HookRunner] Hook closed. exitCode: ${exitCode}, duration: ${duration}ms`); - console.log(`[HookRunner] stdout: ${stdout}`); - console.log(`[HookRunner] stderr: ${stderr}`); - } - if (timedOut) { resolve({ hookConfig, @@ -411,10 +394,6 @@ export class HookRunner { clearTimeout(timeoutHandle); const duration = Date.now() - startTime; - if (process.env['CI'] === 'true' || process.env['VERBOSE'] === 'true') { - console.log(`[HookRunner] Hook process error: ${error.message}`); - } - resolve({ hookConfig, eventName, @@ -450,9 +429,6 @@ export class HookRunner { text: string, exitCode: number, ): HookOutput { - if (process.env['CI'] === 'true' || process.env['VERBOSE'] === 'true') { - console.log(`[HookRunner] convertPlainTextToHookOutput: exitCode=${exitCode}, text="${text}"`); - } if (exitCode === EXIT_CODE_SUCCESS) { // Success - treat as system message or additional context return { diff --git a/packages/core/src/policy/policy-engine.ts b/packages/core/src/policy/policy-engine.ts index 0801125614..3f386edd8f 100644 --- a/packages/core/src/policy/policy-engine.ts +++ b/packages/core/src/policy/policy-engine.ts @@ -360,11 +360,9 @@ export class PolicyEngine { ); if (match) { - if (process.env['CI'] === 'true' || process.env['VERBOSE'] === 'true') { - console.log( - `[PolicyEngine.check] MATCHED rule: toolName=${rule.toolName}, decision=${rule.decision}, priority=${rule.priority}, source=${rule.source}`, - ); - } + debugLogger.debug( + `[PolicyEngine.check] MATCHED rule: toolName=${rule.toolName}, decision=${rule.decision}, priority=${rule.priority}, argsPattern=${rule.argsPattern?.source || 'none'}`, + ); if (isShellCommand && toolName) { const shellResult = await this.checkShellCommand( @@ -391,11 +389,9 @@ export class PolicyEngine { // Default if no rule matched if (decision === undefined) { - if (process.env['CI'] === 'true' || process.env['VERBOSE'] === 'true') { - console.log( - `[PolicyEngine.check] NO MATCH - using default decision: ${this.defaultDecision}`, - ); - } + debugLogger.debug( + `[PolicyEngine.check] NO MATCH - using default decision: ${this.defaultDecision}`, + ); if (toolName && SHELL_TOOL_NAMES.includes(toolName)) { const shellResult = await this.checkShellCommand( toolName, diff --git a/packages/test-utils/src/test-rig.ts b/packages/test-utils/src/test-rig.ts index 3e92a177b1..5d2e6abf93 100644 --- a/packages/test-utils/src/test-rig.ts +++ b/packages/test-utils/src/test-rig.ts @@ -60,18 +60,6 @@ export function sanitizeTestName(name: string) { .replace(/-+/g, '-'); } -/** - * Normalizes a path for use in command-line arguments. - * On Windows, this converts backslashes to forward slashes. - */ -export function normalizePath(p: string): string { - const result = p.replace(/\\/g, '/'); - if (process.env['CI'] === 'true' || process.env['VERBOSE'] === 'true') { - console.log(`[normalizePath] "${p}" -> "${result}"`); - } - return result; -} - // Helper to create detailed error messages export function createToolCallErrorMessage( expectedTools: string | string[], @@ -373,43 +361,30 @@ export class TestRig { this.testDir = join(testFileDir, sanitizedName); this.homeDir = join(testFileDir, sanitizedName + '-home'); - if (env['VERBOSE'] === 'true' || env['CI'] === 'true') { - console.log(`[TestRig] Setting up test: ${testName}`); - console.log(`[TestRig] testDir: ${this.testDir}`); - console.log(`[TestRig] homeDir: ${this.homeDir}`); - } - if (!this._initialized) { // Clean up existing directories from previous runs (e.g. retries) - const cleanDir = (dir: string, name: string) => { + const cleanDir = (dir: string) => { if (fs.existsSync(dir)) { - if (env['VERBOSE'] === 'true' || env['CI'] === 'true') { - console.log(`[TestRig] Cleaning up existing ${name}: ${dir}`); - } for (let i = 0; i < 5; i++) { try { fs.rmSync(dir, { recursive: true, force: true }); return; } catch (err) { if (i === 4) throw err; - if (env['VERBOSE'] === 'true' || env['CI'] === 'true') { - console.log( - `[TestRig] Failed to clean ${name} (attempt ${i + 1}): ${err}`, - ); - } - // Use timeout on windows, sleep on others const sleepCmd = process.platform === 'win32' ? 'timeout /t 1' : 'sleep 1'; try { execSync(sleepCmd); - } catch {} + } catch { + /* ignore */ + } } } } }; - cleanDir(this.testDir, 'testDir'); - cleanDir(this.homeDir, 'homeDir'); + cleanDir(this.testDir); + cleanDir(this.homeDir); this._initialized = true; } @@ -492,17 +467,6 @@ export class TestRig { return filePath; } - createScript(fileName: string, content: string) { - if (!this.testDir) { - throw new Error( - 'TestRig.setup must be called before creating files or scripts', - ); - } - const scriptPath = join(this.testDir, fileName); - writeFileSync(scriptPath, content); - return normalizePath(scriptPath); - } - mkdir(dir: string) { mkdirSync(join(this.testDir!, dir), { recursive: true }); } @@ -609,11 +573,6 @@ export class TestRig { }); this._spawnedProcesses.push(child); - if (env['VERBOSE'] === 'true' || env['CI'] === 'true') { - console.log(`[TestRig] Running: ${command} ${commandArgs.join(' ')}`); - console.log(`[TestRig] CWD: ${this.testDir}`); - } - let stdout = ''; let stderr = ''; @@ -642,8 +601,7 @@ export class TestRig { } }); - const isWinCI = os.platform() === 'win32' && process.env['CI'] === 'true'; - const timeout = options.timeout ?? (isWinCI ? 600000 : 300000); // 10 mins on Win CI, 5 mins otherwise + const timeout = options.timeout ?? 300000; const promise = new Promise((resolve, reject) => { const timer = setTimeout(() => { child.kill('SIGKILL'); @@ -814,8 +772,7 @@ export class TestRig { } }); - const isWinCI = os.platform() === 'win32' && process.env['CI'] === 'true'; - const timeout = options.timeout ?? (isWinCI ? 600000 : 300000); // 10 mins on Win CI, 5 mins otherwise + const timeout = options.timeout ?? 300000; const promise = new Promise((resolve, reject) => { const timer = setTimeout(() => { child.kill('SIGKILL'); @@ -1257,11 +1214,6 @@ export class TestRig { }[] = []; for (const logData of parsedLogs) { - if (env['VERBOSE'] === 'true' || env['CI'] === 'true') { - if (logData.attributes?.['event.name']?.includes('tool')) { - console.log(`[TestRig] Found tool-related log: ${JSON.stringify(logData.attributes)}`); - } - } // Look for tool call logs if ( logData.attributes && @@ -1359,18 +1311,21 @@ export class TestRig { const envVars = this._getCleanEnv(options?.env); - // Ensure PATH is included for pty - if (!envVars['PATH'] && process.env['PATH']) { - envVars['PATH'] = process.env['PATH']; - } - - // Add critical Windows environment variables if missing + // node-pty on windows often needs these to spawn correctly if (process.platform === 'win32') { - ['SystemRoot', 'COMSPEC', 'windir', 'PATHEXT'].forEach((key) => { - if (!envVars[key] && process.env[key]) { - envVars[key] = process.env[key]; + const windowsCriticalVars = [ + 'SystemRoot', + 'COMSPEC', + 'windir', + 'PATHEXT', + 'TEMP', + 'TMP', + ]; + for (const v of windowsCriticalVars) { + if (process.env[v] && !envVars[v]) { + envVars[v] = process.env[v]!; } - }); + } } const ptyOptions: pty.IPtyForkOptions = { @@ -1384,14 +1339,6 @@ export class TestRig { }; const executable = command === 'node' ? process.execPath : command; - if (env['VERBOSE'] === 'true' || env['CI'] === 'true') { - console.log(`[TestRig] Spawning PTY: ${executable}`); - console.log(`[TestRig] Args: ${JSON.stringify(commandArgs)}`); - // Only log keys to avoid leaking sensitive info - console.log( - `[TestRig] Env Keys: ${Object.keys(ptyOptions.env!).join(', ')}`, - ); - } const ptyProcess = pty.spawn(executable, commandArgs, ptyOptions); const run = new InteractiveRun(ptyProcess); @@ -1419,11 +1366,6 @@ export class TestRig { }[] = []; for (const logData of parsedLogs) { - if (env['VERBOSE'] === 'true' || env['CI'] === 'true') { - if (logData.attributes?.['event.name']?.includes('hook')) { - console.log(`[TestRig] Found hook-related log: ${JSON.stringify(logData.attributes)}`); - } - } // Look for tool call logs if ( logData.attributes && @@ -1468,3 +1410,11 @@ export class TestRig { throw new Error(`pollCommand timed out after ${timeout}ms`); } } + +/** + * Normalizes a path for cross-platform matching (replaces backslashes with forward slashes). + */ +export function normalizePath(p: string | undefined): string | undefined { + if (!p) return p; + return p.replace(/\\/g, '/'); +}