feat(test): add high-volume shell test and refine perf harness (#24983)

This commit is contained in:
Sri Pasumarthi
2026-04-09 15:23:00 -07:00
committed by GitHub
parent 451edb3ea6
commit de628b04fc
9 changed files with 312 additions and 27 deletions

View File

@@ -50,6 +50,7 @@ export const DEFAULT_ACTIVITY_CONFIG: ActivityMonitorConfig = {
ActivityType.USER_INPUT_START,
ActivityType.MESSAGE_ADDED,
ActivityType.TOOL_CALL_SCHEDULED,
ActivityType.TOOL_CALL_COMPLETED,
ActivityType.STREAM_START,
],
};

View File

@@ -0,0 +1,99 @@
/**
* @license
* Copyright 2026 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import process from 'node:process';
import { monitorEventLoopDelay, type IntervalHistogram } from 'node:perf_hooks';
import type { Config } from '../config/config.js';
import {
recordEventLoopDelay,
isPerformanceMonitoringActive,
} from './metrics.js';
export class EventLoopMonitor {
private eventLoopHistogram: IntervalHistogram | null = null;
private intervalId: NodeJS.Timeout | null = null;
private isRunning = false;
start(config: Config, intervalMs: number = 10000): void {
const isEnabled =
process.env['GEMINI_EVENT_LOOP_MONITOR_ENABLED'] === 'true';
if (!isEnabled || !isPerformanceMonitoringActive() || this.isRunning) {
return;
}
this.isRunning = true;
this.eventLoopHistogram = monitorEventLoopDelay({ resolution: 10 });
this.eventLoopHistogram.enable();
this.intervalId = setInterval(() => {
this.takeSnapshot(config);
}, intervalMs).unref();
}
stop(): void {
if (!this.isRunning) {
return;
}
if (this.intervalId) {
clearInterval(this.intervalId);
this.intervalId = null;
}
if (this.eventLoopHistogram) {
this.eventLoopHistogram.disable();
this.eventLoopHistogram = null;
}
this.isRunning = false;
}
private takeSnapshot(config: Config): void {
if (!this.eventLoopHistogram) {
return;
}
const p50 = this.eventLoopHistogram.percentile(50) / 1e6;
const p95 = this.eventLoopHistogram.percentile(95) / 1e6;
const max = this.eventLoopHistogram.max / 1e6;
recordEventLoopDelay(config, p50, {
percentile: 'p50',
component: 'event_loop_monitor',
});
recordEventLoopDelay(config, p95, {
percentile: 'p95',
component: 'event_loop_monitor',
});
recordEventLoopDelay(config, max, {
percentile: 'max',
component: 'event_loop_monitor',
});
}
}
let globalEventLoopMonitor: EventLoopMonitor | null = null;
export function startGlobalEventLoopMonitoring(
config: Config,
intervalMs?: number,
): void {
if (!globalEventLoopMonitor) {
globalEventLoopMonitor = new EventLoopMonitor();
}
globalEventLoopMonitor.start(config, intervalMs);
}
export function stopGlobalEventLoopMonitoring(): void {
if (globalEventLoopMonitor) {
globalEventLoopMonitor.stop();
globalEventLoopMonitor = null;
}
}
export function getEventLoopMonitor(): EventLoopMonitor | null {
return globalEventLoopMonitor;
}

View File

@@ -93,6 +93,12 @@ export {
stopGlobalMemoryMonitoring,
} from './memory-monitor.js';
export type { MemorySnapshot, ProcessMetrics } from './memory-monitor.js';
export {
EventLoopMonitor,
startGlobalEventLoopMonitoring,
stopGlobalEventLoopMonitoring,
getEventLoopMonitor,
} from './event-loop-monitor.js';
export { HighWaterMarkTracker } from './high-water-mark-tracker.js';
export { RateLimiter } from './rate-limiter.js';
export { ActivityType } from './activity-types.js';
@@ -133,6 +139,7 @@ export {
recordStartupPerformance,
recordMemoryUsage,
recordCpuUsage,
recordEventLoopDelay,
recordToolQueueDepth,
recordToolExecutionBreakdown,
recordTokenEfficiency,

View File

@@ -88,6 +88,7 @@ const GEN_AI_CLIENT_OPERATION_DURATION = 'gen_ai.client.operation.duration';
const STARTUP_TIME = 'gemini_cli.startup.duration';
const MEMORY_USAGE = 'gemini_cli.memory.usage';
const CPU_USAGE = 'gemini_cli.cpu.usage';
const EVENT_LOOP_DELAY = 'gemini_cli.event_loop.delay';
const TOOL_QUEUE_DEPTH = 'gemini_cli.tool.queue.depth';
const TOOL_EXECUTION_BREAKDOWN = 'gemini_cli.tool.execution.breakdown';
const TOKEN_EFFICIENCY = 'gemini_cli.token.efficiency';
@@ -608,6 +609,17 @@ const PERFORMANCE_HISTOGRAM_DEFINITIONS = {
component?: string;
},
},
[EVENT_LOOP_DELAY]: {
description: 'Event loop delay in milliseconds.',
unit: 'ms',
valueType: ValueType.DOUBLE,
assign: (h: Histogram) => (eventLoopDelayHistogram = h),
// eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion
attributes: {} as {
percentile: string;
component?: string;
},
},
[TOOL_QUEUE_DEPTH]: {
description: 'Number of tools in execution queue.',
unit: 'count',
@@ -806,6 +818,7 @@ let genAiClientOperationDurationHistogram: Histogram | undefined;
let startupTimeHistogram: Histogram | undefined;
let memoryUsageGauge: Histogram | undefined; // Using Histogram until ObservableGauge is available
let cpuUsageGauge: Histogram | undefined;
let eventLoopDelayHistogram: Histogram | undefined;
let toolQueueDepthGauge: Histogram | undefined;
let toolExecutionBreakdownHistogram: Histogram | undefined;
let tokenEfficiencyHistogram: Histogram | undefined;
@@ -1339,6 +1352,21 @@ export function recordCpuUsage(
cpuUsageGauge.record(percentage, metricAttributes);
}
export function recordEventLoopDelay(
config: Config,
delayMs: number,
attributes: MetricDefinitions[typeof EVENT_LOOP_DELAY]['attributes'],
): void {
if (!eventLoopDelayHistogram || !isPerformanceMonitoringEnabled) return;
const metricAttributes: Attributes = {
...baseMetricDefinition.getCommonAttributes(config),
...attributes,
};
eventLoopDelayHistogram.record(delayMs, metricAttributes);
}
export function recordToolQueueDepth(config: Config, queueDepth: number): void {
if (!toolQueueDepthGauge || !isPerformanceMonitoringEnabled) return;

View File

@@ -52,6 +52,11 @@ import {
} from './gcp-exporters.js';
import { TelemetryTarget } from './index.js';
import { debugLogger } from '../utils/debugLogger.js';
import {
startGlobalMemoryMonitoring,
getMemoryMonitor,
} from './memory-monitor.js';
import { startGlobalEventLoopMonitoring } from './event-loop-monitor.js';
import { authEvents } from '../code_assist/oauth2.js';
import { coreEvents, CoreEvent } from '../utils/events.js';
import {
@@ -91,6 +96,7 @@ diag.setLogger(new DiagLoggerAdapter(), DiagLogLevel.INFO);
let sdk: NodeSDK | undefined;
let spanProcessor: BatchSpanProcessor | undefined;
let logRecordProcessor: BatchLogRecordProcessor | undefined;
let metricReader: PeriodicExportingMetricReader | undefined;
let telemetryInitialized = false;
let callbackRegistered = false;
let authListener: ((newCredentials: JWTInput) => Promise<void>) | undefined =
@@ -258,7 +264,6 @@ export async function initializeTelemetry(
| GcpLogExporter
| FileLogExporter
| ConsoleLogRecordExporter;
let metricReader: PeriodicExportingMetricReader;
if (useDirectGcpExport) {
debugLogger.log(
@@ -346,6 +351,26 @@ export async function initializeTelemetry(
}
activeTelemetryEmail = credentials?.client_email;
initializeMetrics(config);
// Start memory monitoring if interval is specified via environment variable
const monitorInterval = process.env['GEMINI_MEMORY_MONITOR_INTERVAL'];
debugLogger.log(
`[TELEMETRY] GEMINI_MEMORY_MONITOR_INTERVAL: ${monitorInterval}`,
);
if (monitorInterval) {
const intervalMs = parseInt(monitorInterval, 10);
if (!isNaN(intervalMs) && intervalMs > 0) {
startGlobalMemoryMonitoring(config, intervalMs);
startGlobalEventLoopMonitoring(config, intervalMs);
// Disable enhanced monitoring (rate limiting/high water mark) in tests
// to ensure we get regular snapshots regardless of growth.
const monitor = getMemoryMonitor();
if (monitor) {
monitor.setEnhancedMonitoring(false);
}
}
}
telemetryInitialized = true;
void flushTelemetryBuffer();
} catch (error) {
@@ -378,6 +403,7 @@ export async function flushTelemetry(config: Config): Promise<void> {
await Promise.all([
spanProcessor.forceFlush(),
logRecordProcessor.forceFlush(),
metricReader ? metricReader.forceFlush() : Promise.resolve(),
]);
if (config.getDebugMode()) {
debugLogger.log('OpenTelemetry SDK flushed successfully.');

View File

@@ -23,7 +23,6 @@ type PlotFn = (series: number[], config?: PlotConfig) => string;
export interface PerfBaseline {
wallClockMs: number;
cpuTotalUs: number;
eventLoopDelayP99Ms: number;
timestamp: string;
}
@@ -48,8 +47,10 @@ export interface PerfSnapshot {
cpuTotalUs: number;
eventLoopDelayP50Ms: number;
eventLoopDelayP95Ms: number;
eventLoopDelayP99Ms: number;
eventLoopDelayMaxMs: number;
childEventLoopDelayP50Ms?: number;
childEventLoopDelayP95Ms?: number;
childEventLoopDelayMaxMs?: number;
}
/**
@@ -159,7 +160,6 @@ export class PerfTestHarness {
cpuTotalUs: cpuDelta.user + cpuDelta.system,
eventLoopDelayP50Ms: 0,
eventLoopDelayP95Ms: 0,
eventLoopDelayP99Ms: 0,
eventLoopDelayMaxMs: 0,
};
}
@@ -196,7 +196,6 @@ export class PerfTestHarness {
// Convert from nanoseconds to milliseconds
snapshot.eventLoopDelayP50Ms = histogram.percentile(50) / 1e6;
snapshot.eventLoopDelayP95Ms = histogram.percentile(95) / 1e6;
snapshot.eventLoopDelayP99Ms = histogram.percentile(99) / 1e6;
snapshot.eventLoopDelayMaxMs = histogram.max / 1e6;
return snapshot;
@@ -305,7 +304,6 @@ export class PerfTestHarness {
` Baseline: ${result.baseline.wallClockMs.toFixed(1)} ms wall-clock\n` +
` Delta: ${deltaPercent.toFixed(1)}% (tolerance: ${tolerance}%)\n` +
` CPU total: ${formatUs(result.median.cpuTotalUs)}\n` +
` EL p99: ${result.median.eventLoopDelayP99Ms.toFixed(1)} ms\n` +
` Samples: ${result.samples.length} (${result.filteredSamples.length} after IQR filter)`,
);
}
@@ -316,8 +314,7 @@ export class PerfTestHarness {
` Measured: ${formatUs(result.median.cpuTotalUs)}\n` +
` Baseline: ${formatUs(result.baseline.cpuTotalUs)}\n` +
` Delta: ${result.cpuDeltaPercent.toFixed(1)}% (tolerance: ${cpuTolerance}%)\n` +
` Wall-clock: ${result.median.wallClockMs.toFixed(1)} ms\n` +
` EL p99: ${result.median.eventLoopDelayP99Ms.toFixed(1)} ms`,
` Wall-clock: ${result.median.wallClockMs.toFixed(1)} ms`,
);
}
}
@@ -329,7 +326,6 @@ export class PerfTestHarness {
updatePerfBaseline(this.baselinesPath, result.scenarioName, {
wallClockMs: result.median.wallClockMs,
cpuTotalUs: result.median.cpuTotalUs,
eventLoopDelayP99Ms: result.median.eventLoopDelayP99Ms,
});
// Reload baselines after update
this.baselines = loadPerfBaselines(this.baselinesPath);
@@ -375,9 +371,18 @@ export class PerfTestHarness {
` CPU: ${cpuMs} (user: ${formatUs(result.median.cpuUserUs)}, system: ${formatUs(result.median.cpuSystemUs)})`,
);
if (result.median.eventLoopDelayP99Ms > 0) {
if (result.median.eventLoopDelayMaxMs > 0) {
lines.push(
` Event loop: p50=${result.median.eventLoopDelayP50Ms.toFixed(1)}ms p95=${result.median.eventLoopDelayP95Ms.toFixed(1)}ms p99=${result.median.eventLoopDelayP99Ms.toFixed(1)}ms max=${result.median.eventLoopDelayMaxMs.toFixed(1)}ms`,
` Event loop (runner): p50=${result.median.eventLoopDelayP50Ms.toFixed(1)}ms p95=${result.median.eventLoopDelayP95Ms.toFixed(1)}ms max=${result.median.eventLoopDelayMaxMs.toFixed(1)}ms`,
);
}
if (
result.median.childEventLoopDelayMaxMs !== undefined &&
result.median.childEventLoopDelayMaxMs > 0
) {
lines.push(
` Event loop (CLI): p50=${result.median.childEventLoopDelayP50Ms!.toFixed(1)}ms p95=${result.median.childEventLoopDelayP95Ms!.toFixed(1)}ms max=${result.median.childEventLoopDelayMaxMs!.toFixed(1)}ms`,
);
}
@@ -517,14 +522,12 @@ export function updatePerfBaseline(
measured: {
wallClockMs: number;
cpuTotalUs: number;
eventLoopDelayP99Ms: number;
},
): void {
const baselines = loadPerfBaselines(path);
baselines.scenarios[scenarioName] = {
wallClockMs: measured.wallClockMs,
cpuTotalUs: measured.cpuTotalUs,
eventLoopDelayP99Ms: measured.eventLoopDelayP99Ms,
timestamp: new Date().toISOString(),
};
savePerfBaselines(path, baselines);

View File

@@ -1,24 +1,26 @@
{
"version": 1,
"updatedAt": "2026-04-08T18:51:29.839Z",
"updatedAt": "2026-04-09T02:30:22.000Z",
"scenarios": {
"cold-startup-time": {
"wallClockMs": 1333.4230420000004,
"cpuTotalUs": 1711,
"eventLoopDelayP99Ms": 0,
"timestamp": "2026-04-08T18:50:58.124Z"
"wallClockMs": 927.553249999999,
"cpuTotalUs": 1470,
"timestamp": "2026-04-08T22:27:54.871Z"
},
"idle-cpu-usage": {
"wallClockMs": 5001.926125,
"cpuTotalUs": 128518,
"eventLoopDelayP99Ms": 12.705791,
"timestamp": "2026-04-08T18:51:23.938Z"
"wallClockMs": 5000.460750000002,
"cpuTotalUs": 12157,
"timestamp": "2026-04-08T22:28:19.098Z"
},
"skill-loading-time": {
"wallClockMs": 1372.4463749999995,
"cpuTotalUs": 1550,
"eventLoopDelayP99Ms": 0,
"timestamp": "2026-04-08T18:51:29.839Z"
"wallClockMs": 930.0920409999962,
"cpuTotalUs": 1323,
"timestamp": "2026-04-08T22:28:23.290Z"
},
"high-volume-shell-output": {
"wallClockMs": 1119.9,
"cpuTotalUs": 2100,
"timestamp": "2026-04-09T02:30:22.000Z"
}
}
}

View File

@@ -8,6 +8,7 @@ import { describe, it, beforeAll, afterAll } from 'vitest';
import { TestRig, PerfTestHarness } from '@google/gemini-cli-test-utils';
import { join, dirname } from 'node:path';
import { fileURLToPath } from 'node:url';
import { existsSync, readFileSync } from 'node:fs';
const __dirname = dirname(fileURLToPath(import.meta.url));
const BASELINES_PATH = join(__dirname, 'baselines.json');
@@ -33,7 +34,7 @@ describe('CPU Performance Tests', () => {
afterAll(async () => {
// Generate the summary report after all tests
await harness.generateReport();
});
}, 30000);
it('cold-startup-time: startup completes within baseline', async () => {
const result = await harness.runScenario('cold-startup-time', async () => {
@@ -150,4 +151,119 @@ describe('CPU Performance Tests', () => {
harness.assertWithinBaseline(result);
}
});
it('high-volume-shell-output: handles large output efficiently', async () => {
const result = await harness.runScenario(
'high-volume-shell-output',
async () => {
const rig = new TestRig();
try {
rig.setup('perf-high-volume-output', {
fakeResponsesPath: join(__dirname, 'perf.high-volume.responses'),
});
const snapshot = await harness.measureWithEventLoop(
'high-volume-output',
async () => {
const runResult = await rig.run({
args: ['Generate 1M lines of output'],
timeout: 120000,
env: {
GEMINI_API_KEY: 'fake-perf-test-key',
GEMINI_TELEMETRY_ENABLED: 'true',
GEMINI_MEMORY_MONITOR_INTERVAL: '500',
GEMINI_EVENT_LOOP_MONITOR_ENABLED: 'true',
DEBUG: 'true',
},
});
console.log(` Child Process Output:`, runResult);
},
);
// Query CLI's own performance metrics from telemetry logs
await rig.waitForTelemetryReady();
// Debug: Read and log the telemetry file content
try {
const logFilePath = join(rig.homeDir!, 'telemetry.log');
if (existsSync(logFilePath)) {
const content = readFileSync(logFilePath, 'utf-8');
console.log(` Telemetry Log Content:\n`, content);
} else {
console.log(` Telemetry log file not found at: ${logFilePath}`);
}
} catch (e) {
console.error(` Failed to read telemetry log:`, e);
}
const memoryMetric = rig.readMetric('memory.usage');
const cpuMetric = rig.readMetric('cpu.usage');
const toolLatencyMetric = rig.readMetric('tool.call.latency');
const eventLoopMetric = rig.readMetric('event_loop.delay');
if (memoryMetric) {
console.log(
` CLI Memory Metric found:`,
JSON.stringify(memoryMetric),
);
}
if (cpuMetric) {
console.log(` CLI CPU Metric found:`, JSON.stringify(cpuMetric));
}
if (toolLatencyMetric) {
console.log(
` CLI Tool Latency Metric found:`,
JSON.stringify(toolLatencyMetric),
);
}
// eslint-disable-next-line @typescript-eslint/no-explicit-any
const logs = (rig as any)._readAndParseTelemetryLog();
console.log(` Total telemetry log entries: ${logs.length}`);
for (const logData of logs) {
if (logData.scopeMetrics) {
for (const scopeMetric of logData.scopeMetrics) {
for (const metric of scopeMetric.metrics) {
if (metric.descriptor.name.includes('event_loop')) {
console.log(
` Found event_loop metric in log:`,
metric.descriptor.name,
);
}
}
}
}
}
if (eventLoopMetric) {
console.log(
` CLI Event Loop Metric found:`,
JSON.stringify(eventLoopMetric),
);
const findValue = (percentile: string) => {
const dp = eventLoopMetric.dataPoints.find(
// eslint-disable-next-line @typescript-eslint/no-explicit-any
(p: any) => p.attributes.percentile === percentile,
);
return dp ? dp.value.min : undefined;
};
snapshot.childEventLoopDelayP50Ms = findValue('p50');
snapshot.childEventLoopDelayP95Ms = findValue('p95');
snapshot.childEventLoopDelayMaxMs = findValue('max');
}
return snapshot;
} finally {
await rig.cleanup();
}
},
);
if (UPDATE_BASELINES) {
harness.updateScenarioBaseline(result);
} else {
harness.assertWithinBaseline(result);
}
});
});

View File

@@ -0,0 +1,3 @@
{"method":"generateContent","response":{"candidates":[{"content":{"parts":[{"text":"0"}],"role":"model"},"finishReason":"STOP","index":0}]}}
{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"functionCall":{"name":"run_shell_command","args":{"command":"yes | head -n 1000000"}}}],"role":"model"},"finishReason":"STOP","index":0}]}]}
{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"I have generated 1M lines of output."}],"role":"model"},"finishReason":"STOP","index":0}]}]}