feat(test): add high-volume shell test and refine perf harness (#24983)

2026-06-12 20:37:08 -07:00 · 2026-04-09 15:23:00 -07:00
parent 451edb3ea6
commit de628b04fc
9 changed files with 312 additions and 27 deletions
@@ -50,6 +50,7 @@ export const DEFAULT_ACTIVITY_CONFIG: ActivityMonitorConfig = {
    ActivityType.USER_INPUT_START,
    ActivityType.MESSAGE_ADDED,
    ActivityType.TOOL_CALL_SCHEDULED,
+    ActivityType.TOOL_CALL_COMPLETED,
    ActivityType.STREAM_START,
  ],
 };
@@ -0,0 +1,99 @@
+/**
+ * @license
+ * Copyright 2026 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import process from 'node:process';
+import { monitorEventLoopDelay, type IntervalHistogram } from 'node:perf_hooks';
+import type { Config } from '../config/config.js';
+import {
+  recordEventLoopDelay,
+  isPerformanceMonitoringActive,
+} from './metrics.js';
+
+export class EventLoopMonitor {
+  private eventLoopHistogram: IntervalHistogram | null = null;
+  private intervalId: NodeJS.Timeout | null = null;
+  private isRunning = false;
+
+  start(config: Config, intervalMs: number = 10000): void {
+    const isEnabled =
+      process.env['GEMINI_EVENT_LOOP_MONITOR_ENABLED'] === 'true';
+    if (!isEnabled || !isPerformanceMonitoringActive() || this.isRunning) {
+      return;
+    }
+
+    this.isRunning = true;
+    this.eventLoopHistogram = monitorEventLoopDelay({ resolution: 10 });
+    this.eventLoopHistogram.enable();
+
+    this.intervalId = setInterval(() => {
+      this.takeSnapshot(config);
+    }, intervalMs).unref();
+  }
+
+  stop(): void {
+    if (!this.isRunning) {
+      return;
+    }
+
+    if (this.intervalId) {
+      clearInterval(this.intervalId);
+      this.intervalId = null;
+    }
+
+    if (this.eventLoopHistogram) {
+      this.eventLoopHistogram.disable();
+      this.eventLoopHistogram = null;
+    }
+
+    this.isRunning = false;
+  }
+
+  private takeSnapshot(config: Config): void {
+    if (!this.eventLoopHistogram) {
+      return;
+    }
+
+    const p50 = this.eventLoopHistogram.percentile(50) / 1e6;
+    const p95 = this.eventLoopHistogram.percentile(95) / 1e6;
+    const max = this.eventLoopHistogram.max / 1e6;
+
+    recordEventLoopDelay(config, p50, {
+      percentile: 'p50',
+      component: 'event_loop_monitor',
+    });
+    recordEventLoopDelay(config, p95, {
+      percentile: 'p95',
+      component: 'event_loop_monitor',
+    });
+    recordEventLoopDelay(config, max, {
+      percentile: 'max',
+      component: 'event_loop_monitor',
+    });
+  }
+}
+
+let globalEventLoopMonitor: EventLoopMonitor | null = null;
+
+export function startGlobalEventLoopMonitoring(
+  config: Config,
+  intervalMs?: number,
+): void {
+  if (!globalEventLoopMonitor) {
+    globalEventLoopMonitor = new EventLoopMonitor();
+  }
+  globalEventLoopMonitor.start(config, intervalMs);
+}
+
+export function stopGlobalEventLoopMonitoring(): void {
+  if (globalEventLoopMonitor) {
+    globalEventLoopMonitor.stop();
+    globalEventLoopMonitor = null;
+  }
+}
+
+export function getEventLoopMonitor(): EventLoopMonitor | null {
+  return globalEventLoopMonitor;
+}
@@ -93,6 +93,12 @@ export {
  stopGlobalMemoryMonitoring,
 } from './memory-monitor.js';
 export type { MemorySnapshot, ProcessMetrics } from './memory-monitor.js';
+export {
+  EventLoopMonitor,
+  startGlobalEventLoopMonitoring,
+  stopGlobalEventLoopMonitoring,
+  getEventLoopMonitor,
+} from './event-loop-monitor.js';
 export { HighWaterMarkTracker } from './high-water-mark-tracker.js';
 export { RateLimiter } from './rate-limiter.js';
 export { ActivityType } from './activity-types.js';
@@ -133,6 +139,7 @@ export {
  recordStartupPerformance,
  recordMemoryUsage,
  recordCpuUsage,
+  recordEventLoopDelay,
  recordToolQueueDepth,
  recordToolExecutionBreakdown,
  recordTokenEfficiency,
@@ -88,6 +88,7 @@ const GEN_AI_CLIENT_OPERATION_DURATION = 'gen_ai.client.operation.duration';
 const STARTUP_TIME = 'gemini_cli.startup.duration';
 const MEMORY_USAGE = 'gemini_cli.memory.usage';
 const CPU_USAGE = 'gemini_cli.cpu.usage';
+const EVENT_LOOP_DELAY = 'gemini_cli.event_loop.delay';
 const TOOL_QUEUE_DEPTH = 'gemini_cli.tool.queue.depth';
 const TOOL_EXECUTION_BREAKDOWN = 'gemini_cli.tool.execution.breakdown';
 const TOKEN_EFFICIENCY = 'gemini_cli.token.efficiency';
@@ -608,6 +609,17 @@ const PERFORMANCE_HISTOGRAM_DEFINITIONS = {
      component?: string;
    },
  },
+  [EVENT_LOOP_DELAY]: {
+    description: 'Event loop delay in milliseconds.',
+    unit: 'ms',
+    valueType: ValueType.DOUBLE,
+    assign: (h: Histogram) => (eventLoopDelayHistogram = h),
+    // eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion
+    attributes: {} as {
+      percentile: string;
+      component?: string;
+    },
+  },
  [TOOL_QUEUE_DEPTH]: {
    description: 'Number of tools in execution queue.',
    unit: 'count',
@@ -806,6 +818,7 @@ let genAiClientOperationDurationHistogram: Histogram | undefined;
 let startupTimeHistogram: Histogram | undefined;
 let memoryUsageGauge: Histogram | undefined; // Using Histogram until ObservableGauge is available
 let cpuUsageGauge: Histogram | undefined;
+let eventLoopDelayHistogram: Histogram | undefined;
 let toolQueueDepthGauge: Histogram | undefined;
 let toolExecutionBreakdownHistogram: Histogram | undefined;
 let tokenEfficiencyHistogram: Histogram | undefined;
@@ -1339,6 +1352,21 @@ export function recordCpuUsage(
  cpuUsageGauge.record(percentage, metricAttributes);
 }

+export function recordEventLoopDelay(
+  config: Config,
+  delayMs: number,
+  attributes: MetricDefinitions[typeof EVENT_LOOP_DELAY]['attributes'],
+): void {
+  if (!eventLoopDelayHistogram || !isPerformanceMonitoringEnabled) return;
+
+  const metricAttributes: Attributes = {
+    ...baseMetricDefinition.getCommonAttributes(config),
+    ...attributes,
+  };
+
+  eventLoopDelayHistogram.record(delayMs, metricAttributes);
+}
+
 export function recordToolQueueDepth(config: Config, queueDepth: number): void {
  if (!toolQueueDepthGauge || !isPerformanceMonitoringEnabled) return;

@@ -52,6 +52,11 @@ import {
 } from './gcp-exporters.js';
 import { TelemetryTarget } from './index.js';
 import { debugLogger } from '../utils/debugLogger.js';
+import {
+  startGlobalMemoryMonitoring,
+  getMemoryMonitor,
+} from './memory-monitor.js';
+import { startGlobalEventLoopMonitoring } from './event-loop-monitor.js';
 import { authEvents } from '../code_assist/oauth2.js';
 import { coreEvents, CoreEvent } from '../utils/events.js';
 import {
@@ -91,6 +96,7 @@ diag.setLogger(new DiagLoggerAdapter(), DiagLogLevel.INFO);
 let sdk: NodeSDK | undefined;
 let spanProcessor: BatchSpanProcessor | undefined;
 let logRecordProcessor: BatchLogRecordProcessor | undefined;
+let metricReader: PeriodicExportingMetricReader | undefined;
 let telemetryInitialized = false;
 let callbackRegistered = false;
 let authListener: ((newCredentials: JWTInput) => Promise<void>) | undefined =
@@ -258,7 +264,6 @@ export async function initializeTelemetry(
    | GcpLogExporter
    | FileLogExporter
    | ConsoleLogRecordExporter;
-  let metricReader: PeriodicExportingMetricReader;

  if (useDirectGcpExport) {
    debugLogger.log(
@@ -346,6 +351,26 @@ export async function initializeTelemetry(
    }
    activeTelemetryEmail = credentials?.client_email;
    initializeMetrics(config);
+
+    // Start memory monitoring if interval is specified via environment variable
+    const monitorInterval = process.env['GEMINI_MEMORY_MONITOR_INTERVAL'];
+    debugLogger.log(
+      `[TELEMETRY] GEMINI_MEMORY_MONITOR_INTERVAL: ${monitorInterval}`,
+    );
+    if (monitorInterval) {
+      const intervalMs = parseInt(monitorInterval, 10);
+      if (!isNaN(intervalMs) && intervalMs > 0) {
+        startGlobalMemoryMonitoring(config, intervalMs);
+        startGlobalEventLoopMonitoring(config, intervalMs);
+        // Disable enhanced monitoring (rate limiting/high water mark) in tests
+        // to ensure we get regular snapshots regardless of growth.
+        const monitor = getMemoryMonitor();
+        if (monitor) {
+          monitor.setEnhancedMonitoring(false);
+        }
+      }
+    }
+
    telemetryInitialized = true;
    void flushTelemetryBuffer();
  } catch (error) {
@@ -378,6 +403,7 @@ export async function flushTelemetry(config: Config): Promise<void> {
    await Promise.all([
      spanProcessor.forceFlush(),
      logRecordProcessor.forceFlush(),
+      metricReader ? metricReader.forceFlush() : Promise.resolve(),
    ]);
    if (config.getDebugMode()) {
      debugLogger.log('OpenTelemetry SDK flushed successfully.');
@@ -23,7 +23,6 @@ type PlotFn = (series: number[], config?: PlotConfig) => string;
 export interface PerfBaseline {
  wallClockMs: number;
  cpuTotalUs: number;
-  eventLoopDelayP99Ms: number;
  timestamp: string;
 }

@@ -48,8 +47,10 @@ export interface PerfSnapshot {
  cpuTotalUs: number;
  eventLoopDelayP50Ms: number;
  eventLoopDelayP95Ms: number;
-  eventLoopDelayP99Ms: number;
  eventLoopDelayMaxMs: number;
+  childEventLoopDelayP50Ms?: number;
+  childEventLoopDelayP95Ms?: number;
+  childEventLoopDelayMaxMs?: number;
 }

 /**
@@ -159,7 +160,6 @@ export class PerfTestHarness {
      cpuTotalUs: cpuDelta.user + cpuDelta.system,
      eventLoopDelayP50Ms: 0,
      eventLoopDelayP95Ms: 0,
-      eventLoopDelayP99Ms: 0,
      eventLoopDelayMaxMs: 0,
    };
  }
@@ -196,7 +196,6 @@ export class PerfTestHarness {
    // Convert from nanoseconds to milliseconds
    snapshot.eventLoopDelayP50Ms = histogram.percentile(50) / 1e6;
    snapshot.eventLoopDelayP95Ms = histogram.percentile(95) / 1e6;
-    snapshot.eventLoopDelayP99Ms = histogram.percentile(99) / 1e6;
    snapshot.eventLoopDelayMaxMs = histogram.max / 1e6;

    return snapshot;
@@ -305,7 +304,6 @@ export class PerfTestHarness {
          `  Baseline:    ${result.baseline.wallClockMs.toFixed(1)} ms wall-clock\n` +
          `  Delta:       ${deltaPercent.toFixed(1)}% (tolerance: ${tolerance}%)\n` +
          `  CPU total:   ${formatUs(result.median.cpuTotalUs)}\n` +
-          `  EL p99:      ${result.median.eventLoopDelayP99Ms.toFixed(1)} ms\n` +
          `  Samples:     ${result.samples.length} (${result.filteredSamples.length} after IQR filter)`,
      );
    }
@@ -316,8 +314,7 @@ export class PerfTestHarness {
          `  Measured:    ${formatUs(result.median.cpuTotalUs)}\n` +
          `  Baseline:    ${formatUs(result.baseline.cpuTotalUs)}\n` +
          `  Delta:       ${result.cpuDeltaPercent.toFixed(1)}% (tolerance: ${cpuTolerance}%)\n` +
-          `  Wall-clock:  ${result.median.wallClockMs.toFixed(1)} ms\n` +
-          `  EL p99:      ${result.median.eventLoopDelayP99Ms.toFixed(1)} ms`,
+          `  Wall-clock:  ${result.median.wallClockMs.toFixed(1)} ms`,
      );
    }
  }
@@ -329,7 +326,6 @@ export class PerfTestHarness {
    updatePerfBaseline(this.baselinesPath, result.scenarioName, {
      wallClockMs: result.median.wallClockMs,
      cpuTotalUs: result.median.cpuTotalUs,
-      eventLoopDelayP99Ms: result.median.eventLoopDelayP99Ms,
    });
    // Reload baselines after update
    this.baselines = loadPerfBaselines(this.baselinesPath);
@@ -375,9 +371,18 @@ export class PerfTestHarness {
        `  CPU: ${cpuMs} (user: ${formatUs(result.median.cpuUserUs)}, system: ${formatUs(result.median.cpuSystemUs)})`,
      );

-      if (result.median.eventLoopDelayP99Ms > 0) {
+      if (result.median.eventLoopDelayMaxMs > 0) {
        lines.push(
-          `  Event loop: p50=${result.median.eventLoopDelayP50Ms.toFixed(1)}ms p95=${result.median.eventLoopDelayP95Ms.toFixed(1)}ms p99=${result.median.eventLoopDelayP99Ms.toFixed(1)}ms max=${result.median.eventLoopDelayMaxMs.toFixed(1)}ms`,
+          `  Event loop (runner): p50=${result.median.eventLoopDelayP50Ms.toFixed(1)}ms p95=${result.median.eventLoopDelayP95Ms.toFixed(1)}ms max=${result.median.eventLoopDelayMaxMs.toFixed(1)}ms`,
+        );
+      }
+
+      if (
+        result.median.childEventLoopDelayMaxMs !== undefined &&
+        result.median.childEventLoopDelayMaxMs > 0
+      ) {
+        lines.push(
+          `  Event loop (CLI):    p50=${result.median.childEventLoopDelayP50Ms!.toFixed(1)}ms p95=${result.median.childEventLoopDelayP95Ms!.toFixed(1)}ms max=${result.median.childEventLoopDelayMaxMs!.toFixed(1)}ms`,
        );
      }

@@ -517,14 +522,12 @@ export function updatePerfBaseline(
  measured: {
    wallClockMs: number;
    cpuTotalUs: number;
-    eventLoopDelayP99Ms: number;
  },
 ): void {
  const baselines = loadPerfBaselines(path);
  baselines.scenarios[scenarioName] = {
    wallClockMs: measured.wallClockMs,
    cpuTotalUs: measured.cpuTotalUs,
-    eventLoopDelayP99Ms: measured.eventLoopDelayP99Ms,
    timestamp: new Date().toISOString(),
  };
  savePerfBaselines(path, baselines);
@@ -1,24 +1,26 @@
 {
  "version": 1,
-  "updatedAt": "2026-04-08T18:51:29.839Z",
+  "updatedAt": "2026-04-09T02:30:22.000Z",
  "scenarios": {
    "cold-startup-time": {
-      "wallClockMs": 1333.4230420000004,
-      "cpuTotalUs": 1711,
-      "eventLoopDelayP99Ms": 0,
-      "timestamp": "2026-04-08T18:50:58.124Z"
+      "wallClockMs": 927.553249999999,
+      "cpuTotalUs": 1470,
+      "timestamp": "2026-04-08T22:27:54.871Z"
    },
    "idle-cpu-usage": {
-      "wallClockMs": 5001.926125,
-      "cpuTotalUs": 128518,
-      "eventLoopDelayP99Ms": 12.705791,
-      "timestamp": "2026-04-08T18:51:23.938Z"
+      "wallClockMs": 5000.460750000002,
+      "cpuTotalUs": 12157,
+      "timestamp": "2026-04-08T22:28:19.098Z"
    },
    "skill-loading-time": {
-      "wallClockMs": 1372.4463749999995,
-      "cpuTotalUs": 1550,
-      "eventLoopDelayP99Ms": 0,
-      "timestamp": "2026-04-08T18:51:29.839Z"
+      "wallClockMs": 930.0920409999962,
+      "cpuTotalUs": 1323,
+      "timestamp": "2026-04-08T22:28:23.290Z"
+    },
+    "high-volume-shell-output": {
+      "wallClockMs": 1119.9,
+      "cpuTotalUs": 2100,
+      "timestamp": "2026-04-09T02:30:22.000Z"
    }
  }
 }
@@ -8,6 +8,7 @@ import { describe, it, beforeAll, afterAll } from 'vitest';
 import { TestRig, PerfTestHarness } from '@google/gemini-cli-test-utils';
 import { join, dirname } from 'node:path';
 import { fileURLToPath } from 'node:url';
+import { existsSync, readFileSync } from 'node:fs';

 const __dirname = dirname(fileURLToPath(import.meta.url));
 const BASELINES_PATH = join(__dirname, 'baselines.json');
@@ -33,7 +34,7 @@ describe('CPU Performance Tests', () => {
  afterAll(async () => {
    // Generate the summary report after all tests
    await harness.generateReport();
-  });
+  }, 30000);

  it('cold-startup-time: startup completes within baseline', async () => {
    const result = await harness.runScenario('cold-startup-time', async () => {
@@ -150,4 +151,119 @@ describe('CPU Performance Tests', () => {
      harness.assertWithinBaseline(result);
    }
  });
+
+  it('high-volume-shell-output: handles large output efficiently', async () => {
+    const result = await harness.runScenario(
+      'high-volume-shell-output',
+      async () => {
+        const rig = new TestRig();
+        try {
+          rig.setup('perf-high-volume-output', {
+            fakeResponsesPath: join(__dirname, 'perf.high-volume.responses'),
+          });
+
+          const snapshot = await harness.measureWithEventLoop(
+            'high-volume-output',
+            async () => {
+              const runResult = await rig.run({
+                args: ['Generate 1M lines of output'],
+                timeout: 120000,
+                env: {
+                  GEMINI_API_KEY: 'fake-perf-test-key',
+                  GEMINI_TELEMETRY_ENABLED: 'true',
+                  GEMINI_MEMORY_MONITOR_INTERVAL: '500',
+                  GEMINI_EVENT_LOOP_MONITOR_ENABLED: 'true',
+                  DEBUG: 'true',
+                },
+              });
+              console.log(`  Child Process Output:`, runResult);
+            },
+          );
+
+          // Query CLI's own performance metrics from telemetry logs
+          await rig.waitForTelemetryReady();
+
+          // Debug: Read and log the telemetry file content
+          try {
+            const logFilePath = join(rig.homeDir!, 'telemetry.log');
+            if (existsSync(logFilePath)) {
+              const content = readFileSync(logFilePath, 'utf-8');
+              console.log(`  Telemetry Log Content:\n`, content);
+            } else {
+              console.log(`  Telemetry log file not found at: ${logFilePath}`);
+            }
+          } catch (e) {
+            console.error(`  Failed to read telemetry log:`, e);
+          }
+
+          const memoryMetric = rig.readMetric('memory.usage');
+          const cpuMetric = rig.readMetric('cpu.usage');
+          const toolLatencyMetric = rig.readMetric('tool.call.latency');
+          const eventLoopMetric = rig.readMetric('event_loop.delay');
+
+          if (memoryMetric) {
+            console.log(
+              `  CLI Memory Metric found:`,
+              JSON.stringify(memoryMetric),
+            );
+          }
+          if (cpuMetric) {
+            console.log(`  CLI CPU Metric found:`, JSON.stringify(cpuMetric));
+          }
+          if (toolLatencyMetric) {
+            console.log(
+              `  CLI Tool Latency Metric found:`,
+              JSON.stringify(toolLatencyMetric),
+            );
+          }
+          // eslint-disable-next-line @typescript-eslint/no-explicit-any
+          const logs = (rig as any)._readAndParseTelemetryLog();
+          console.log(`  Total telemetry log entries: ${logs.length}`);
+          for (const logData of logs) {
+            if (logData.scopeMetrics) {
+              for (const scopeMetric of logData.scopeMetrics) {
+                for (const metric of scopeMetric.metrics) {
+                  if (metric.descriptor.name.includes('event_loop')) {
+                    console.log(
+                      `  Found event_loop metric in log:`,
+                      metric.descriptor.name,
+                    );
+                  }
+                }
+              }
+            }
+          }
+
+          if (eventLoopMetric) {
+            console.log(
+              `  CLI Event Loop Metric found:`,
+              JSON.stringify(eventLoopMetric),
+            );
+
+            const findValue = (percentile: string) => {
+              const dp = eventLoopMetric.dataPoints.find(
+                // eslint-disable-next-line @typescript-eslint/no-explicit-any
+                (p: any) => p.attributes.percentile === percentile,
+              );
+              return dp ? dp.value.min : undefined;
+            };
+
+            snapshot.childEventLoopDelayP50Ms = findValue('p50');
+            snapshot.childEventLoopDelayP95Ms = findValue('p95');
+            snapshot.childEventLoopDelayMaxMs = findValue('max');
+          }
+
+          return snapshot;
+        } finally {
+          await rig.cleanup();
+        }
+      },
+    );
+
+    if (UPDATE_BASELINES) {
+      harness.updateScenarioBaseline(result);
+    } else {
+      harness.assertWithinBaseline(result);
+    }
+  });
 });
@@ -0,0 +1,3 @@
+{"method":"generateContent","response":{"candidates":[{"content":{"parts":[{"text":"0"}],"role":"model"},"finishReason":"STOP","index":0}]}}
+{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"functionCall":{"name":"run_shell_command","args":{"command":"yes | head -n 1000000"}}}],"role":"model"},"finishReason":"STOP","index":0}]}]}
+{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"I have generated 1M lines of output."}],"role":"model"},"finishReason":"STOP","index":0}]}]}