diff --git a/packages/core/src/telemetry/memory-monitor.test.ts b/packages/core/src/telemetry/memory-monitor.test.ts index 8ad0d45595..9cb0e91caa 100644 --- a/packages/core/src/telemetry/memory-monitor.test.ts +++ b/packages/core/src/telemetry/memory-monitor.test.ts @@ -17,13 +17,18 @@ import { _resetGlobalMemoryMonitorForTests, } from './memory-monitor.js'; import type { Config } from '../config/config.js'; -import { recordMemoryUsage, isPerformanceMonitoringActive } from './metrics.js'; +import { + recordMemoryUsage, + recordCpuUsage, + isPerformanceMonitoringActive, +} from './metrics.js'; import { HighWaterMarkTracker } from './high-water-mark-tracker.js'; import { RateLimiter } from './rate-limiter.js'; // Mock dependencies vi.mock('./metrics.js', () => ({ recordMemoryUsage: vi.fn(), + recordCpuUsage: vi.fn(), isPerformanceMonitoringActive: vi.fn(), MemoryMetricType: { HEAP_USED: 'heap_used', @@ -50,6 +55,7 @@ vi.mock('node:process', () => ({ })); const mockRecordMemoryUsage = vi.mocked(recordMemoryUsage); +const mockRecordCpuUsage = vi.mocked(recordCpuUsage); const mockIsPerformanceMonitoringActive = vi.mocked( isPerformanceMonitoringActive, ); @@ -192,6 +198,13 @@ describe('MemoryMonitor', () => { component: 'test_context', }, ); + expect(mockRecordCpuUsage).toHaveBeenCalledWith( + mockConfig, + expect.any(Number), + { + component: 'test_context', + }, + ); }); it('should not record metrics when performance monitoring is inactive', () => { diff --git a/packages/core/src/telemetry/memory-monitor.ts b/packages/core/src/telemetry/memory-monitor.ts index e005bd73cc..aeaecc6ca0 100644 --- a/packages/core/src/telemetry/memory-monitor.ts +++ b/packages/core/src/telemetry/memory-monitor.ts @@ -12,6 +12,7 @@ import { isUserActive } from './activity-detector.js'; import { HighWaterMarkTracker } from './high-water-mark-tracker.js'; import { recordMemoryUsage, + recordCpuUsage, MemoryMetricType, isPerformanceMonitoringActive, } from './metrics.js'; @@ -37,6 +38,7 @@ export class MemoryMonitor { private intervalId: NodeJS.Timeout | null = null; private isRunning = false; private lastSnapshot: MemorySnapshot | null = null; + private lastCpuUsage: NodeJS.CpuUsage | null = null; private monitoringInterval: number = 10000; private highWaterMarkTracker: HighWaterMarkTracker; private rateLimiter: RateLimiter; @@ -191,6 +193,13 @@ export class MemoryMonitor { memory_type: MemoryMetricType.RSS, component: context, }); + + // Record delta CPU usage (in microseconds) + const cpuUsage = process.cpuUsage(this.lastCpuUsage ?? undefined); + this.lastCpuUsage = process.cpuUsage(); + recordCpuUsage(config, cpuUsage.user + cpuUsage.system, { + component: context, + }); } this.lastSnapshot = snapshot; diff --git a/packages/test-utils/src/perf-test-harness.ts b/packages/test-utils/src/perf-test-harness.ts index 2f376f58b6..f0520ccecb 100644 --- a/packages/test-utils/src/perf-test-harness.ts +++ b/packages/test-utils/src/perf-test-harness.ts @@ -147,7 +147,9 @@ export class PerfTestHarness { throw new Error(`No active timer found for label "${label}"`); } - const wallClockMs = performance.now() - timer.startTime; + // Round wall-clock time to nearest 0.1 ms + const wallClockMs = + Math.round((performance.now() - timer.startTime) * 10) / 10; const cpuDelta = process.cpuUsage(timer.startCpuUsage); this.activeTimers.delete(label); diff --git a/packages/test-utils/src/test-rig.ts b/packages/test-utils/src/test-rig.ts index 734c1b9546..906a7760bf 100644 --- a/packages/test-utils/src/test-rig.ts +++ b/packages/test-utils/src/test-rig.ts @@ -193,6 +193,28 @@ export function checkModelOutputContent( return isValid; } +export interface MetricDataPoint { + attributes?: Record; + value?: { + sum?: number; + min?: number; + max?: number; + count?: number; + }; + startTime?: [number, number]; + endTime?: string; +} + +export interface TelemetryMetric { + descriptor: { + name: string; + type?: string; + description?: string; + unit?: string; + }; + dataPoints: MetricDataPoint[]; +} + export interface ParsedLog { attributes?: { 'event.name'?: string; @@ -213,11 +235,7 @@ export interface ParsedLog { prompt_id?: string; }; scopeMetrics?: { - metrics: { - descriptor: { - name: string; - }; - }[]; + metrics: TelemetryMetric[]; }[]; } @@ -1297,6 +1315,10 @@ export class TestRig { return logs; } + readTelemetryLogs(): ParsedLog[] { + return this._readAndParseTelemetryLog(); + } + private _readAndParseTelemetryLog(): ParsedLog[] { // Telemetry is always written to the test directory const logFilePath = join(this.homeDir!, 'telemetry.log'); @@ -1450,7 +1472,7 @@ export class TestRig { ); } - readMetric(metricName: string): Record | null { + readMetric(metricName: string): TelemetryMetric | null { const logs = this._readAndParseTelemetryLog(); for (const logData of logs) { if (logData.scopeMetrics) { diff --git a/perf-tests/baselines.json b/perf-tests/baselines.json index d6972342d4..caf92bedb6 100644 --- a/perf-tests/baselines.json +++ b/perf-tests/baselines.json @@ -1,14 +1,14 @@ { "version": 1, - "updatedAt": "2026-04-09T02:30:22.000Z", + "updatedAt": "2026-04-14T14:04:02.662Z", "scenarios": { "cold-startup-time": { - "wallClockMs": 927.553249999999, + "wallClockMs": 927.6, "cpuTotalUs": 1470, "timestamp": "2026-04-08T22:27:54.871Z" }, "idle-cpu-usage": { - "wallClockMs": 5000.460750000002, + "wallClockMs": 5000.5, "cpuTotalUs": 12157, "timestamp": "2026-04-08T22:28:19.098Z" }, @@ -18,7 +18,7 @@ "timestamp": "2026-04-14T15:22:56.133Z" }, "skill-loading-time": { - "wallClockMs": 930.0920409999962, + "wallClockMs": 930.1, "cpuTotalUs": 1323, "timestamp": "2026-04-08T22:28:23.290Z" }, @@ -26,6 +26,31 @@ "wallClockMs": 1119.9, "cpuTotalUs": 2100, "timestamp": "2026-04-09T02:30:22.000Z" + }, + "long-conversation-resume": { + "wallClockMs": 4212.5, + "cpuTotalUs": 351393, + "timestamp": "2026-04-14T14:02:53.268Z" + }, + "long-conversation-typing": { + "wallClockMs": 113.7, + "cpuTotalUs": 3304, + "timestamp": "2026-04-14T14:03:12.525Z" + }, + "long-conversation-execution": { + "wallClockMs": 248.7, + "cpuTotalUs": 3825, + "timestamp": "2026-04-14T14:03:28.575Z" + }, + "long-conversation-terminal-scrolling": { + "wallClockMs": 362.4, + "cpuTotalUs": 12755860, + "timestamp": "2026-04-14T14:03:45.687Z" + }, + "long-conversation-alternate-scrolling": { + "wallClockMs": 362.4, + "cpuTotalUs": 12755860, + "timestamp": "2026-04-14T14:04:02.662Z" } } } diff --git a/perf-tests/perf-usage.test.ts b/perf-tests/perf-usage.test.ts index 4bbc5ab0ea..a100382f48 100644 --- a/perf-tests/perf-usage.test.ts +++ b/perf-tests/perf-usage.test.ts @@ -5,10 +5,20 @@ */ import { describe, it, beforeAll, afterAll } from 'vitest'; -import { TestRig, PerfTestHarness } from '@google/gemini-cli-test-utils'; +import { + TestRig, + PerfTestHarness, + type PerfSnapshot, +} from '@google/gemini-cli-test-utils'; import { join, dirname } from 'node:path'; import { fileURLToPath } from 'node:url'; -import { existsSync, readFileSync } from 'node:fs'; +import { + existsSync, + readFileSync, + mkdirSync, + copyFileSync, + writeFileSync, +} from 'node:fs'; const __dirname = dirname(fileURLToPath(import.meta.url)); const BASELINES_PATH = join(__dirname, 'baselines.json'); @@ -195,7 +205,7 @@ describe('CPU Performance Tests', () => { const snapshot = await harness.measureWithEventLoop( 'high-volume-output', async () => { - const runResult = await rig.run({ + await rig.run({ args: ['Generate 1M lines of output'], timeout: 120000, env: { @@ -206,7 +216,6 @@ describe('CPU Performance Tests', () => { DEBUG: 'true', }, }); - console.log(` Child Process Output:`, runResult); }, ); @@ -246,8 +255,7 @@ describe('CPU Performance Tests', () => { JSON.stringify(toolLatencyMetric), ); } - // eslint-disable-next-line @typescript-eslint/no-explicit-any - const logs = (rig as any)._readAndParseTelemetryLog(); + const logs = rig.readTelemetryLogs(); console.log(` Total telemetry log entries: ${logs.length}`); for (const logData of logs) { if (logData.scopeMetrics) { @@ -272,10 +280,9 @@ describe('CPU Performance Tests', () => { const findValue = (percentile: string) => { const dp = eventLoopMetric.dataPoints.find( - // eslint-disable-next-line @typescript-eslint/no-explicit-any - (p: any) => p.attributes.percentile === percentile, + (p) => p.attributes?.['percentile'] === percentile, ); - return dp ? dp.value.min : undefined; + return dp?.value?.min; }; snapshot.childEventLoopDelayP50Ms = findValue('p50'); @@ -296,4 +303,358 @@ describe('CPU Performance Tests', () => { harness.assertWithinBaseline(result); } }); + + describe('long-conversation', () => { + let rig: TestRig; + const identifier = 'perf-long-conversation'; + const SESSION_ID = + 'anonymous_unique_id_577296e0eee5afecdcec05d11838e0cd1a851cd97a28119a4a876b11'; + const LARGE_CHAT_SOURCE = join( + __dirname, + '..', + 'memory-tests', + 'large-chat-session.json', + ); + + beforeAll(async () => { + if (!existsSync(LARGE_CHAT_SOURCE)) { + throw new Error( + `Performance test fixture missing: ${LARGE_CHAT_SOURCE}.`, + ); + } + + rig = new TestRig(); + rig.setup(identifier, { + fakeResponsesPath: join(__dirname, 'perf.long-chat.responses'), + }); + + const geminiDir = join(rig.homeDir!, '.gemini'); + const projectTempDir = join(geminiDir, 'tmp', identifier); + const targetChatsDir = join(projectTempDir, 'chats'); + + mkdirSync(targetChatsDir, { recursive: true }); + writeFileSync( + join(geminiDir, 'projects.json'), + JSON.stringify({ + projects: { [rig.testDir!]: identifier }, + }), + ); + writeFileSync(join(projectTempDir, '.project_root'), rig.testDir!); + copyFileSync( + LARGE_CHAT_SOURCE, + join(targetChatsDir, `session-${SESSION_ID}.json`), + ); + }); + + afterAll(async () => { + await rig.cleanup(); + }); + + it('session-load: resume a 60MB chat history', async () => { + const result = await harness.runScenario( + 'long-conversation-resume', + async () => { + const snapshot = await harness.measureWithEventLoop( + 'resume', + async () => { + const run = await rig.runInteractive({ + args: ['--resume', 'latest'], + env: { + GEMINI_API_KEY: 'fake-perf-test-key', + GEMINI_TELEMETRY_ENABLED: 'true', + GEMINI_MEMORY_MONITOR_INTERVAL: '500', + GEMINI_EVENT_LOOP_MONITOR_ENABLED: 'true', + DEBUG: 'true', + }, + }); + await run.kill(); + }, + ); + return snapshot; + }, + ); + + if (UPDATE_BASELINES) { + harness.updateScenarioBaseline(result); + } else { + harness.assertWithinBaseline(result); + } + }); + + it('typing: latency when typing into a large session', async () => { + const result = await harness.runScenario( + 'long-conversation-typing', + async () => { + const run = await rig.runInteractive({ + args: ['--resume', 'latest'], + env: { + GEMINI_API_KEY: 'fake-perf-test-key', + GEMINI_TELEMETRY_ENABLED: 'true', + GEMINI_MEMORY_MONITOR_INTERVAL: '500', + GEMINI_EVENT_LOOP_MONITOR_ENABLED: 'true', + DEBUG: 'true', + }, + }); + + const snapshot = await harness.measureWithEventLoop( + 'typing', + async () => { + // On average, the expected latency per key is under 30ms. + for (const char of 'Hello') { + await run.type(char); + } + }, + ); + + await run.kill(); + return snapshot; + }, + ); + + if (UPDATE_BASELINES) { + harness.updateScenarioBaseline(result); + } else { + harness.assertWithinBaseline(result); + } + }); + + it('execution: response latency for a simple shell command', async () => { + const result = await harness.runScenario( + 'long-conversation-execution', + async () => { + const run = await rig.runInteractive({ + args: ['--resume', 'latest'], + env: { + GEMINI_API_KEY: 'fake-perf-test-key', + GEMINI_TELEMETRY_ENABLED: 'true', + GEMINI_MEMORY_MONITOR_INTERVAL: '500', + GEMINI_EVENT_LOOP_MONITOR_ENABLED: 'true', + DEBUG: 'true', + }, + }); + + await run.expectText('Type your message'); + + const snapshot = await harness.measureWithEventLoop( + 'execution', + async () => { + await run.sendKeys('!echo hi\r'); + await run.expectText('hi'); + }, + ); + + await run.kill(); + return snapshot; + }, + ); + + if (UPDATE_BASELINES) { + harness.updateScenarioBaseline(result); + } else { + harness.assertWithinBaseline(result); + } + }); + + it('terminal-scrolling: latency when scrolling a large terminal buffer', async () => { + const result = await harness.runScenario( + 'long-conversation-terminal-scrolling', + async () => { + // Enable terminalBuffer to intentionally test CLI scrolling logic + const settingsPath = join(rig.homeDir!, '.gemini', 'settings.json'); + writeFileSync( + settingsPath, + JSON.stringify({ + security: { folderTrust: { enabled: false } }, + ui: { terminalBuffer: true }, + }), + ); + + const run = await rig.runInteractive({ + args: ['--resume', 'latest'], + env: { + GEMINI_API_KEY: 'fake-perf-test-key', + GEMINI_TELEMETRY_ENABLED: 'true', + GEMINI_MEMORY_MONITOR_INTERVAL: '500', + GEMINI_EVENT_LOOP_MONITOR_ENABLED: 'true', + DEBUG: 'true', + }, + }); + + await run.expectText('Type your message'); + + for (let i = 0; i < 5; i++) { + await run.sendKeys('\u001b[5~'); // PageUp + } + + // Scroll to the very top + await run.sendKeys('\u001b[H'); // Home + // Verify top line of chat is visible. + await run.expectText('Authenticated with'); + + for (let i = 0; i < 5; i++) { + await run.sendKeys('\u001b[6~'); // PageDown + } + + await rig.waitForTelemetryReady(); + await run.kill(); + + const eventLoopMetric = rig.readMetric('event_loop.delay'); + const cpuMetric = rig.readMetric('cpu.usage'); + + let p50Ms = 0; + let p95Ms = 0; + let maxMs = 0; + if (eventLoopMetric) { + const dataPoints = eventLoopMetric.dataPoints; + const p50Data = dataPoints.find( + (dp) => dp.attributes?.['percentile'] === 'p50', + ); + const p95Data = dataPoints.find( + (dp) => dp.attributes?.['percentile'] === 'p95', + ); + const maxData = dataPoints.find( + (dp) => dp.attributes?.['percentile'] === 'max', + ); + + if (p50Data?.value?.sum) p50Ms = p50Data.value.sum; + if (p95Data?.value?.sum) p95Ms = p95Data.value.sum; + if (maxData?.value?.sum) maxMs = maxData.value.sum; + } + + let cpuTotalUs = 0; + if (cpuMetric) { + const dataPoints = cpuMetric.dataPoints; + for (const dp of dataPoints) { + if (dp.value?.sum && dp.value.sum > 0) { + cpuTotalUs += dp.value.sum; + } + } + } + const cpuUserUs = cpuTotalUs; + const cpuSystemUs = 0; + + const snapshot: PerfSnapshot = { + timestamp: Date.now(), + label: 'scrolling', + wallClockMs: Math.round(p50Ms * 10) / 10, + cpuTotalUs, + cpuUserUs, + cpuSystemUs, + eventLoopDelayP50Ms: p50Ms, + eventLoopDelayP95Ms: p95Ms, + eventLoopDelayMaxMs: maxMs, + }; + + return snapshot; + }, + ); + + if (UPDATE_BASELINES) { + harness.updateScenarioBaseline(result); + } else { + harness.assertWithinBaseline(result); + } + }); + + it('alternate-scrolling: latency when scrolling a large alternate buffer', async () => { + const result = await harness.runScenario( + 'long-conversation-alternate-scrolling', + async () => { + // Enable useAlternateBuffer to intentionally test CLI scrolling logic + const settingsPath = join(rig.homeDir!, '.gemini', 'settings.json'); + writeFileSync( + settingsPath, + JSON.stringify({ + security: { folderTrust: { enabled: false } }, + ui: { useAlternateBuffer: true }, + }), + ); + + const run = await rig.runInteractive({ + args: ['--resume', 'latest'], + env: { + GEMINI_API_KEY: 'fake-perf-test-key', + GEMINI_TELEMETRY_ENABLED: 'true', + GEMINI_MEMORY_MONITOR_INTERVAL: '500', + GEMINI_EVENT_LOOP_MONITOR_ENABLED: 'true', + DEBUG: 'true', + }, + }); + + await run.expectText('Type your message'); + + for (let i = 0; i < 5; i++) { + await run.sendKeys('\u001b[5~'); // PageUp + } + + // Scroll to the very top + await run.sendKeys('\u001b[H'); // Home + // Verify top line of chat is visible. + await run.expectText('Authenticated with'); + + for (let i = 0; i < 5; i++) { + await run.sendKeys('\u001b[6~'); // PageDown + } + + await rig.waitForTelemetryReady(); + await run.kill(); + + const eventLoopMetric = rig.readMetric('event_loop.delay'); + const cpuMetric = rig.readMetric('cpu.usage'); + + let p50Ms = 0; + let p95Ms = 0; + let maxMs = 0; + if (eventLoopMetric) { + const dataPoints = eventLoopMetric.dataPoints; + const p50Data = dataPoints.find( + (dp) => dp.attributes?.['percentile'] === 'p50', + ); + const p95Data = dataPoints.find( + (dp) => dp.attributes?.['percentile'] === 'p95', + ); + const maxData = dataPoints.find( + (dp) => dp.attributes?.['percentile'] === 'max', + ); + + if (p50Data?.value?.sum) p50Ms = p50Data.value.sum; + if (p95Data?.value?.sum) p95Ms = p95Data.value.sum; + if (maxData?.value?.sum) maxMs = maxData.value.sum; + } + + let cpuTotalUs = 0; + if (cpuMetric) { + const dataPoints = cpuMetric.dataPoints; + for (const dp of dataPoints) { + if (dp.value?.sum && dp.value.sum > 0) { + cpuTotalUs += dp.value.sum; + } + } + } + const cpuUserUs = cpuTotalUs; + const cpuSystemUs = 0; + + const snapshot: PerfSnapshot = { + timestamp: Date.now(), + label: 'scrolling', + wallClockMs: Math.round(p50Ms * 10) / 10, + cpuTotalUs, + cpuUserUs, + cpuSystemUs, + eventLoopDelayP50Ms: p50Ms, + eventLoopDelayP95Ms: p95Ms, + eventLoopDelayMaxMs: maxMs, + }; + + return snapshot; + }, + ); + + if (UPDATE_BASELINES) { + harness.updateScenarioBaseline(result); + } else { + harness.assertWithinBaseline(result); + } + }); + }); }); diff --git a/perf-tests/perf.long-chat.responses b/perf-tests/perf.long-chat.responses new file mode 100644 index 0000000000..7cf057e5a4 --- /dev/null +++ b/perf-tests/perf.long-chat.responses @@ -0,0 +1,4 @@ +{"method":"generateContent","response":{"candidates":[{"content":{"parts":[{"text":"{\"complexity_reasoning\":\"simple\",\"complexity_score\":1}"}],"role":"model"},"finishReason":"STOP","index":0}]}} +{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"I am a large conversation model response."}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"candidatesTokenCount":10,"promptTokenCount":20,"totalTokenCount":30}}]} +{"method":"generateContent","response":{"candidates":[{"content":{"parts":[{"text":"{\"originalSummary\":\"large chat summary\",\"events\":[]}"}],"role":"model"},"finishReason":"STOP","index":0}]}} +{"method":"countTokens","response":{"totalTokens":100}}