/** * @license * Copyright 2026 Google LLC * SPDX-License-Identifier: Apache-2.0 */ import { describe, it, beforeAll, afterAll } from 'vitest'; import { TestRig, PerfTestHarness, type PerfSnapshot, } from '@google/gemini-cli-test-utils'; import { join, dirname } from 'node:path'; import { fileURLToPath } from 'node:url'; import { existsSync, readFileSync, mkdirSync, copyFileSync, writeFileSync, } from 'node:fs'; const __dirname = dirname(fileURLToPath(import.meta.url)); const BASELINES_PATH = join(__dirname, 'baselines.json'); const UPDATE_BASELINES = process.env['UPDATE_PERF_BASELINES'] === 'true'; const TOLERANCE_PERCENT = 15; // Use fewer samples locally for faster iteration, more in CI const SAMPLE_COUNT = process.env['CI'] ? 5 : 3; const WARMUP_COUNT = 1; describe('CPU Performance Tests', () => { let harness: PerfTestHarness; beforeAll(() => { harness = new PerfTestHarness({ baselinesPath: BASELINES_PATH, defaultTolerancePercent: TOLERANCE_PERCENT, sampleCount: SAMPLE_COUNT, warmupCount: WARMUP_COUNT, }); }); afterAll(async () => { // Generate the summary report after all tests await harness.generateReport(); }, 30000); it('cold-startup-time: startup completes within baseline', async () => { const result = await harness.runScenario('cold-startup-time', async () => { const rig = new TestRig(); try { rig.setup('perf-cold-startup', { fakeResponsesPath: join(__dirname, 'perf.cold-startup.responses'), }); return await harness.measure('cold-startup', async () => { await rig.run({ args: ['hello'], timeout: 120000, env: { GEMINI_API_KEY: 'fake-perf-test-key' }, }); }); } finally { await rig.cleanup(); } }); if (UPDATE_BASELINES) { harness.updateScenarioBaseline(result); } else { harness.assertWithinBaseline(result); } }); it('idle-cpu-usage: CPU stays low when idle', async () => { const IDLE_OBSERVATION_MS = 5000; const result = await harness.runScenario('idle-cpu-usage', async () => { const rig = new TestRig(); try { rig.setup('perf-idle-cpu', { fakeResponsesPath: join(__dirname, 'perf.idle-cpu.responses'), }); // First, run a prompt to get the CLI into idle state await rig.run({ args: ['hello'], timeout: 120000, env: { GEMINI_API_KEY: 'fake-perf-test-key' }, }); // Now measure CPU during idle period in the test process return await harness.measureWithEventLoop('idle-cpu', async () => { // Simulate idle period — just wait const { setTimeout: sleep } = await import('node:timers/promises'); await sleep(IDLE_OBSERVATION_MS); }); } finally { await rig.cleanup(); } }); if (UPDATE_BASELINES) { harness.updateScenarioBaseline(result); } else { harness.assertWithinBaseline(result); } }); it('asian-language-conv: verify perf is acceptable ', async () => { const result = await harness.runScenario( 'asian-language-conv', async () => { const rig = new TestRig(); try { rig.setup('perf-asian-language', { fakeResponsesPath: join(__dirname, 'perf.asian-language.responses'), }); return await harness.measure('asian-language', async () => { await rig.run({ args: ['嗨'], timeout: 120000, env: { GEMINI_API_KEY: 'fake-perf-test-key' }, }); }); } finally { await rig.cleanup(); } }, ); if (UPDATE_BASELINES) { harness.updateScenarioBaseline(result); } else { harness.assertWithinBaseline(result); } }); it('skill-loading-time: startup with many skills within baseline', async () => { const SKILL_COUNT = 20; const result = await harness.runScenario('skill-loading-time', async () => { const rig = new TestRig(); try { rig.setup('perf-skill-loading', { fakeResponsesPath: join(__dirname, 'perf.skill-loading.responses'), }); // Create many skill directories with SKILL.md files for (let i = 0; i < SKILL_COUNT; i++) { const skillDir = `.gemini/skills/perf-skill-${i}`; rig.mkdir(skillDir); rig.createFile( `${skillDir}/SKILL.md`, [ '---', `name: perf-skill-${i}`, `description: Performance test skill number ${i}`, `activation: manual`, '---', '', `# Performance Test Skill ${i}`, '', `This is a test skill for measuring skill loading performance.`, `It contains some content to simulate real-world skill files.`, '', `## Usage`, '', `Use this skill by activating it with @perf-skill-${i}.`, ].join('\n'), ); } return await harness.measure('skill-loading', async () => { await rig.run({ args: ['hello'], timeout: 120000, env: { GEMINI_API_KEY: 'fake-perf-test-key' }, }); }); } finally { await rig.cleanup(); } }); if (UPDATE_BASELINES) { harness.updateScenarioBaseline(result); } else { harness.assertWithinBaseline(result); } }); it('high-volume-shell-output: handles large output efficiently', async () => { const result = await harness.runScenario( 'high-volume-shell-output', async () => { const rig = new TestRig(); try { rig.setup('perf-high-volume-output', { fakeResponsesPath: join(__dirname, 'perf.high-volume.responses'), }); const snapshot = await harness.measureWithEventLoop( 'high-volume-output', async () => { await rig.run({ args: ['Generate 1M lines of output'], timeout: 120000, env: { GEMINI_API_KEY: 'fake-perf-test-key', GEMINI_TELEMETRY_ENABLED: 'true', GEMINI_MEMORY_MONITOR_INTERVAL: '500', GEMINI_EVENT_LOOP_MONITOR_ENABLED: 'true', DEBUG: 'true', }, }); }, ); // Query CLI's own performance metrics from telemetry logs await rig.waitForTelemetryReady(); // Debug: Read and log the telemetry file content try { const logFilePath = join(rig.homeDir!, 'telemetry.log'); if (existsSync(logFilePath)) { const content = readFileSync(logFilePath, 'utf-8'); console.log(` Telemetry Log Content:\n`, content); } else { console.log(` Telemetry log file not found at: ${logFilePath}`); } } catch (e) { console.error(` Failed to read telemetry log:`, e); } const memoryMetric = rig.readMetric('memory.usage'); const cpuMetric = rig.readMetric('cpu.usage'); const toolLatencyMetric = rig.readMetric('tool.call.latency'); const eventLoopMetric = rig.readMetric('event_loop.delay'); if (memoryMetric) { console.log( ` CLI Memory Metric found:`, JSON.stringify(memoryMetric), ); } if (cpuMetric) { console.log(` CLI CPU Metric found:`, JSON.stringify(cpuMetric)); } if (toolLatencyMetric) { console.log( ` CLI Tool Latency Metric found:`, JSON.stringify(toolLatencyMetric), ); } const logs = rig.readTelemetryLogs(); console.log(` Total telemetry log entries: ${logs.length}`); for (const logData of logs) { if (logData.scopeMetrics) { for (const scopeMetric of logData.scopeMetrics) { for (const metric of scopeMetric.metrics) { if (metric.descriptor.name.includes('event_loop')) { console.log( ` Found event_loop metric in log:`, metric.descriptor.name, ); } } } } } if (eventLoopMetric) { console.log( ` CLI Event Loop Metric found:`, JSON.stringify(eventLoopMetric), ); const findValue = (percentile: string) => { const dp = eventLoopMetric.dataPoints.find( (p) => p.attributes?.['percentile'] === percentile, ); return dp?.value?.min; }; snapshot.childEventLoopDelayP50Ms = findValue('p50'); snapshot.childEventLoopDelayP95Ms = findValue('p95'); snapshot.childEventLoopDelayMaxMs = findValue('max'); } return snapshot; } finally { await rig.cleanup(); } }, ); if (UPDATE_BASELINES) { harness.updateScenarioBaseline(result); } else { harness.assertWithinBaseline(result); } }); describe('long-conversation', () => { let rig: TestRig; const identifier = 'perf-long-conversation'; const SESSION_ID = 'anonymous_unique_id_577296e0eee5afecdcec05d11838e0cd1a851cd97a28119a4a876b11'; const LARGE_CHAT_SOURCE = join( __dirname, '..', 'memory-tests', 'large-chat-session.json', ); beforeAll(async () => { if (!existsSync(LARGE_CHAT_SOURCE)) { throw new Error( `Performance test fixture missing: ${LARGE_CHAT_SOURCE}.`, ); } rig = new TestRig(); rig.setup(identifier, { fakeResponsesPath: join(__dirname, 'perf.long-chat.responses'), }); const geminiDir = join(rig.homeDir!, '.gemini'); const projectTempDir = join(geminiDir, 'tmp', identifier); const targetChatsDir = join(projectTempDir, 'chats'); mkdirSync(targetChatsDir, { recursive: true }); writeFileSync( join(geminiDir, 'projects.json'), JSON.stringify({ projects: { [rig.testDir!]: identifier }, }), ); writeFileSync(join(projectTempDir, '.project_root'), rig.testDir!); copyFileSync( LARGE_CHAT_SOURCE, join(targetChatsDir, `session-${SESSION_ID}.json`), ); }); afterAll(async () => { await rig.cleanup(); }); it('session-load: resume a 60MB chat history', async () => { const result = await harness.runScenario( 'long-conversation-resume', async () => { const snapshot = await harness.measureWithEventLoop( 'resume', async () => { const run = await rig.runInteractive({ args: ['--resume', 'latest'], env: { GEMINI_API_KEY: 'fake-perf-test-key', GEMINI_TELEMETRY_ENABLED: 'true', GEMINI_MEMORY_MONITOR_INTERVAL: '500', GEMINI_EVENT_LOOP_MONITOR_ENABLED: 'true', DEBUG: 'true', }, }); await run.kill(); }, ); return snapshot; }, ); if (UPDATE_BASELINES) { harness.updateScenarioBaseline(result); } else { harness.assertWithinBaseline(result); } }); it('typing: latency when typing into a large session', async () => { const result = await harness.runScenario( 'long-conversation-typing', async () => { const run = await rig.runInteractive({ args: ['--resume', 'latest'], env: { GEMINI_API_KEY: 'fake-perf-test-key', GEMINI_TELEMETRY_ENABLED: 'true', GEMINI_MEMORY_MONITOR_INTERVAL: '500', GEMINI_EVENT_LOOP_MONITOR_ENABLED: 'true', DEBUG: 'true', }, }); const snapshot = await harness.measureWithEventLoop( 'typing', async () => { // On average, the expected latency per key is under 30ms. for (const char of 'Hello') { await run.type(char); } }, ); await run.kill(); return snapshot; }, ); if (UPDATE_BASELINES) { harness.updateScenarioBaseline(result); } else { harness.assertWithinBaseline(result); } }); it('execution: response latency for a simple shell command', async () => { const result = await harness.runScenario( 'long-conversation-execution', async () => { const run = await rig.runInteractive({ args: ['--resume', 'latest'], env: { GEMINI_API_KEY: 'fake-perf-test-key', GEMINI_TELEMETRY_ENABLED: 'true', GEMINI_MEMORY_MONITOR_INTERVAL: '500', GEMINI_EVENT_LOOP_MONITOR_ENABLED: 'true', DEBUG: 'true', }, }); await run.expectText('Type your message'); const snapshot = await harness.measureWithEventLoop( 'execution', async () => { await run.sendKeys('!echo hi\r'); await run.expectText('hi'); }, ); await run.kill(); return snapshot; }, ); if (UPDATE_BASELINES) { harness.updateScenarioBaseline(result); } else { harness.assertWithinBaseline(result); } }); it('terminal-scrolling: latency when scrolling a large terminal buffer', async () => { const result = await harness.runScenario( 'long-conversation-terminal-scrolling', async () => { // Enable terminalBuffer to intentionally test CLI scrolling logic const settingsPath = join(rig.homeDir!, '.gemini', 'settings.json'); writeFileSync( settingsPath, JSON.stringify({ security: { folderTrust: { enabled: false } }, ui: { terminalBuffer: true }, }), ); const run = await rig.runInteractive({ args: ['--resume', 'latest'], env: { GEMINI_API_KEY: 'fake-perf-test-key', GEMINI_TELEMETRY_ENABLED: 'true', GEMINI_MEMORY_MONITOR_INTERVAL: '500', GEMINI_EVENT_LOOP_MONITOR_ENABLED: 'true', DEBUG: 'true', }, }); await run.expectText('Type your message'); for (let i = 0; i < 5; i++) { await run.sendKeys('\u001b[5~'); // PageUp } // Scroll to the very top await run.sendKeys('\u001b[H'); // Home // Verify top line of chat is visible. await run.expectText('Authenticated with'); for (let i = 0; i < 5; i++) { await run.sendKeys('\u001b[6~'); // PageDown } await rig.waitForTelemetryReady(); await run.kill(); const eventLoopMetric = rig.readMetric('event_loop.delay'); const cpuMetric = rig.readMetric('cpu.usage'); let p50Ms = 0; let p95Ms = 0; let maxMs = 0; if (eventLoopMetric) { const dataPoints = eventLoopMetric.dataPoints; const p50Data = dataPoints.find( (dp) => dp.attributes?.['percentile'] === 'p50', ); const p95Data = dataPoints.find( (dp) => dp.attributes?.['percentile'] === 'p95', ); const maxData = dataPoints.find( (dp) => dp.attributes?.['percentile'] === 'max', ); if (p50Data?.value?.sum) p50Ms = p50Data.value.sum; if (p95Data?.value?.sum) p95Ms = p95Data.value.sum; if (maxData?.value?.sum) maxMs = maxData.value.sum; } let cpuTotalUs = 0; if (cpuMetric) { const dataPoints = cpuMetric.dataPoints; for (const dp of dataPoints) { if (dp.value?.sum && dp.value.sum > 0) { cpuTotalUs += dp.value.sum; } } } const cpuUserUs = cpuTotalUs; const cpuSystemUs = 0; const snapshot: PerfSnapshot = { timestamp: Date.now(), label: 'scrolling', wallClockMs: Math.round(p50Ms * 10) / 10, cpuTotalUs, cpuUserUs, cpuSystemUs, eventLoopDelayP50Ms: p50Ms, eventLoopDelayP95Ms: p95Ms, eventLoopDelayMaxMs: maxMs, }; return snapshot; }, ); if (UPDATE_BASELINES) { harness.updateScenarioBaseline(result); } else { harness.assertWithinBaseline(result); } }); it('alternate-scrolling: latency when scrolling a large alternate buffer', async () => { const result = await harness.runScenario( 'long-conversation-alternate-scrolling', async () => { // Enable useAlternateBuffer to intentionally test CLI scrolling logic const settingsPath = join(rig.homeDir!, '.gemini', 'settings.json'); writeFileSync( settingsPath, JSON.stringify({ security: { folderTrust: { enabled: false } }, ui: { useAlternateBuffer: true }, }), ); const run = await rig.runInteractive({ args: ['--resume', 'latest'], env: { GEMINI_API_KEY: 'fake-perf-test-key', GEMINI_TELEMETRY_ENABLED: 'true', GEMINI_MEMORY_MONITOR_INTERVAL: '500', GEMINI_EVENT_LOOP_MONITOR_ENABLED: 'true', DEBUG: 'true', }, }); await run.expectText('Type your message'); for (let i = 0; i < 5; i++) { await run.sendKeys('\u001b[5~'); // PageUp } // Scroll to the very top await run.sendKeys('\u001b[H'); // Home // Verify top line of chat is visible. await run.expectText('Authenticated with'); for (let i = 0; i < 5; i++) { await run.sendKeys('\u001b[6~'); // PageDown } await rig.waitForTelemetryReady(); await run.kill(); const eventLoopMetric = rig.readMetric('event_loop.delay'); const cpuMetric = rig.readMetric('cpu.usage'); let p50Ms = 0; let p95Ms = 0; let maxMs = 0; if (eventLoopMetric) { const dataPoints = eventLoopMetric.dataPoints; const p50Data = dataPoints.find( (dp) => dp.attributes?.['percentile'] === 'p50', ); const p95Data = dataPoints.find( (dp) => dp.attributes?.['percentile'] === 'p95', ); const maxData = dataPoints.find( (dp) => dp.attributes?.['percentile'] === 'max', ); if (p50Data?.value?.sum) p50Ms = p50Data.value.sum; if (p95Data?.value?.sum) p95Ms = p95Data.value.sum; if (maxData?.value?.sum) maxMs = maxData.value.sum; } let cpuTotalUs = 0; if (cpuMetric) { const dataPoints = cpuMetric.dataPoints; for (const dp of dataPoints) { if (dp.value?.sum && dp.value.sum > 0) { cpuTotalUs += dp.value.sum; } } } const cpuUserUs = cpuTotalUs; const cpuSystemUs = 0; const snapshot: PerfSnapshot = { timestamp: Date.now(), label: 'scrolling', wallClockMs: Math.round(p50Ms * 10) / 10, cpuTotalUs, cpuUserUs, cpuSystemUs, eventLoopDelayP50Ms: p50Ms, eventLoopDelayP95Ms: p95Ms, eventLoopDelayMaxMs: maxMs, }; return snapshot; }, ); if (UPDATE_BASELINES) { harness.updateScenarioBaseline(result); } else { harness.assertWithinBaseline(result); } }); }); });