mirror of
https://github.com/google-gemini/gemini-cli.git
synced 2026-04-17 08:41:19 -07:00
661 lines
20 KiB
TypeScript
661 lines
20 KiB
TypeScript
/**
|
|
* @license
|
|
* Copyright 2026 Google LLC
|
|
* SPDX-License-Identifier: Apache-2.0
|
|
*/
|
|
|
|
import { describe, it, beforeAll, afterAll } from 'vitest';
|
|
import {
|
|
TestRig,
|
|
PerfTestHarness,
|
|
type PerfSnapshot,
|
|
} from '@google/gemini-cli-test-utils';
|
|
import { join, dirname } from 'node:path';
|
|
import { fileURLToPath } from 'node:url';
|
|
import {
|
|
existsSync,
|
|
readFileSync,
|
|
mkdirSync,
|
|
copyFileSync,
|
|
writeFileSync,
|
|
} from 'node:fs';
|
|
|
|
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
const BASELINES_PATH = join(__dirname, 'baselines.json');
|
|
const UPDATE_BASELINES = process.env['UPDATE_PERF_BASELINES'] === 'true';
|
|
const TOLERANCE_PERCENT = 15;
|
|
|
|
// Use fewer samples locally for faster iteration, more in CI
|
|
const SAMPLE_COUNT = process.env['CI'] ? 5 : 3;
|
|
const WARMUP_COUNT = 1;
|
|
|
|
describe('CPU Performance Tests', () => {
|
|
let harness: PerfTestHarness;
|
|
|
|
beforeAll(() => {
|
|
harness = new PerfTestHarness({
|
|
baselinesPath: BASELINES_PATH,
|
|
defaultTolerancePercent: TOLERANCE_PERCENT,
|
|
sampleCount: SAMPLE_COUNT,
|
|
warmupCount: WARMUP_COUNT,
|
|
});
|
|
});
|
|
|
|
afterAll(async () => {
|
|
// Generate the summary report after all tests
|
|
await harness.generateReport();
|
|
}, 30000);
|
|
|
|
it('cold-startup-time: startup completes within baseline', async () => {
|
|
const result = await harness.runScenario('cold-startup-time', async () => {
|
|
const rig = new TestRig();
|
|
try {
|
|
rig.setup('perf-cold-startup', {
|
|
fakeResponsesPath: join(__dirname, 'perf.cold-startup.responses'),
|
|
});
|
|
|
|
return await harness.measure('cold-startup', async () => {
|
|
await rig.run({
|
|
args: ['hello'],
|
|
timeout: 120000,
|
|
env: { GEMINI_API_KEY: 'fake-perf-test-key' },
|
|
});
|
|
});
|
|
} finally {
|
|
await rig.cleanup();
|
|
}
|
|
});
|
|
|
|
if (UPDATE_BASELINES) {
|
|
harness.updateScenarioBaseline(result);
|
|
} else {
|
|
harness.assertWithinBaseline(result);
|
|
}
|
|
});
|
|
|
|
it('idle-cpu-usage: CPU stays low when idle', async () => {
|
|
const IDLE_OBSERVATION_MS = 5000;
|
|
|
|
const result = await harness.runScenario('idle-cpu-usage', async () => {
|
|
const rig = new TestRig();
|
|
try {
|
|
rig.setup('perf-idle-cpu', {
|
|
fakeResponsesPath: join(__dirname, 'perf.idle-cpu.responses'),
|
|
});
|
|
|
|
// First, run a prompt to get the CLI into idle state
|
|
await rig.run({
|
|
args: ['hello'],
|
|
timeout: 120000,
|
|
env: { GEMINI_API_KEY: 'fake-perf-test-key' },
|
|
});
|
|
|
|
// Now measure CPU during idle period in the test process
|
|
return await harness.measureWithEventLoop('idle-cpu', async () => {
|
|
// Simulate idle period — just wait
|
|
const { setTimeout: sleep } = await import('node:timers/promises');
|
|
await sleep(IDLE_OBSERVATION_MS);
|
|
});
|
|
} finally {
|
|
await rig.cleanup();
|
|
}
|
|
});
|
|
|
|
if (UPDATE_BASELINES) {
|
|
harness.updateScenarioBaseline(result);
|
|
} else {
|
|
harness.assertWithinBaseline(result);
|
|
}
|
|
});
|
|
|
|
it('asian-language-conv: verify perf is acceptable ', async () => {
|
|
const result = await harness.runScenario(
|
|
'asian-language-conv',
|
|
async () => {
|
|
const rig = new TestRig();
|
|
try {
|
|
rig.setup('perf-asian-language', {
|
|
fakeResponsesPath: join(__dirname, 'perf.asian-language.responses'),
|
|
});
|
|
|
|
return await harness.measure('asian-language', async () => {
|
|
await rig.run({
|
|
args: ['嗨'],
|
|
timeout: 120000,
|
|
env: { GEMINI_API_KEY: 'fake-perf-test-key' },
|
|
});
|
|
});
|
|
} finally {
|
|
await rig.cleanup();
|
|
}
|
|
},
|
|
);
|
|
|
|
if (UPDATE_BASELINES) {
|
|
harness.updateScenarioBaseline(result);
|
|
} else {
|
|
harness.assertWithinBaseline(result);
|
|
}
|
|
});
|
|
|
|
it('skill-loading-time: startup with many skills within baseline', async () => {
|
|
const SKILL_COUNT = 20;
|
|
|
|
const result = await harness.runScenario('skill-loading-time', async () => {
|
|
const rig = new TestRig();
|
|
try {
|
|
rig.setup('perf-skill-loading', {
|
|
fakeResponsesPath: join(__dirname, 'perf.skill-loading.responses'),
|
|
});
|
|
|
|
// Create many skill directories with SKILL.md files
|
|
for (let i = 0; i < SKILL_COUNT; i++) {
|
|
const skillDir = `.gemini/skills/perf-skill-${i}`;
|
|
rig.mkdir(skillDir);
|
|
rig.createFile(
|
|
`${skillDir}/SKILL.md`,
|
|
[
|
|
'---',
|
|
`name: perf-skill-${i}`,
|
|
`description: Performance test skill number ${i}`,
|
|
`activation: manual`,
|
|
'---',
|
|
'',
|
|
`# Performance Test Skill ${i}`,
|
|
'',
|
|
`This is a test skill for measuring skill loading performance.`,
|
|
`It contains some content to simulate real-world skill files.`,
|
|
'',
|
|
`## Usage`,
|
|
'',
|
|
`Use this skill by activating it with @perf-skill-${i}.`,
|
|
].join('\n'),
|
|
);
|
|
}
|
|
|
|
return await harness.measure('skill-loading', async () => {
|
|
await rig.run({
|
|
args: ['hello'],
|
|
timeout: 120000,
|
|
env: { GEMINI_API_KEY: 'fake-perf-test-key' },
|
|
});
|
|
});
|
|
} finally {
|
|
await rig.cleanup();
|
|
}
|
|
});
|
|
|
|
if (UPDATE_BASELINES) {
|
|
harness.updateScenarioBaseline(result);
|
|
} else {
|
|
harness.assertWithinBaseline(result);
|
|
}
|
|
});
|
|
|
|
it('high-volume-shell-output: handles large output efficiently', async () => {
|
|
const result = await harness.runScenario(
|
|
'high-volume-shell-output',
|
|
async () => {
|
|
const rig = new TestRig();
|
|
try {
|
|
rig.setup('perf-high-volume-output', {
|
|
fakeResponsesPath: join(__dirname, 'perf.high-volume.responses'),
|
|
});
|
|
|
|
const snapshot = await harness.measureWithEventLoop(
|
|
'high-volume-output',
|
|
async () => {
|
|
await rig.run({
|
|
args: ['Generate 1M lines of output'],
|
|
timeout: 120000,
|
|
env: {
|
|
GEMINI_API_KEY: 'fake-perf-test-key',
|
|
GEMINI_TELEMETRY_ENABLED: 'true',
|
|
GEMINI_MEMORY_MONITOR_INTERVAL: '500',
|
|
GEMINI_EVENT_LOOP_MONITOR_ENABLED: 'true',
|
|
DEBUG: 'true',
|
|
},
|
|
});
|
|
},
|
|
);
|
|
|
|
// Query CLI's own performance metrics from telemetry logs
|
|
await rig.waitForTelemetryReady();
|
|
|
|
// Debug: Read and log the telemetry file content
|
|
try {
|
|
const logFilePath = join(rig.homeDir!, 'telemetry.log');
|
|
if (existsSync(logFilePath)) {
|
|
const content = readFileSync(logFilePath, 'utf-8');
|
|
console.log(` Telemetry Log Content:\n`, content);
|
|
} else {
|
|
console.log(` Telemetry log file not found at: ${logFilePath}`);
|
|
}
|
|
} catch (e) {
|
|
console.error(` Failed to read telemetry log:`, e);
|
|
}
|
|
|
|
const memoryMetric = rig.readMetric('memory.usage');
|
|
const cpuMetric = rig.readMetric('cpu.usage');
|
|
const toolLatencyMetric = rig.readMetric('tool.call.latency');
|
|
const eventLoopMetric = rig.readMetric('event_loop.delay');
|
|
|
|
if (memoryMetric) {
|
|
console.log(
|
|
` CLI Memory Metric found:`,
|
|
JSON.stringify(memoryMetric),
|
|
);
|
|
}
|
|
if (cpuMetric) {
|
|
console.log(` CLI CPU Metric found:`, JSON.stringify(cpuMetric));
|
|
}
|
|
if (toolLatencyMetric) {
|
|
console.log(
|
|
` CLI Tool Latency Metric found:`,
|
|
JSON.stringify(toolLatencyMetric),
|
|
);
|
|
}
|
|
const logs = rig.readTelemetryLogs();
|
|
console.log(` Total telemetry log entries: ${logs.length}`);
|
|
for (const logData of logs) {
|
|
if (logData.scopeMetrics) {
|
|
for (const scopeMetric of logData.scopeMetrics) {
|
|
for (const metric of scopeMetric.metrics) {
|
|
if (metric.descriptor.name.includes('event_loop')) {
|
|
console.log(
|
|
` Found event_loop metric in log:`,
|
|
metric.descriptor.name,
|
|
);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
if (eventLoopMetric) {
|
|
console.log(
|
|
` CLI Event Loop Metric found:`,
|
|
JSON.stringify(eventLoopMetric),
|
|
);
|
|
|
|
const findValue = (percentile: string) => {
|
|
const dp = eventLoopMetric.dataPoints.find(
|
|
(p) => p.attributes?.['percentile'] === percentile,
|
|
);
|
|
return dp?.value?.min;
|
|
};
|
|
|
|
snapshot.childEventLoopDelayP50Ms = findValue('p50');
|
|
snapshot.childEventLoopDelayP95Ms = findValue('p95');
|
|
snapshot.childEventLoopDelayMaxMs = findValue('max');
|
|
}
|
|
|
|
return snapshot;
|
|
} finally {
|
|
await rig.cleanup();
|
|
}
|
|
},
|
|
);
|
|
|
|
if (UPDATE_BASELINES) {
|
|
harness.updateScenarioBaseline(result);
|
|
} else {
|
|
harness.assertWithinBaseline(result);
|
|
}
|
|
});
|
|
|
|
describe('long-conversation', () => {
|
|
let rig: TestRig;
|
|
const identifier = 'perf-long-conversation';
|
|
const SESSION_ID =
|
|
'anonymous_unique_id_577296e0eee5afecdcec05d11838e0cd1a851cd97a28119a4a876b11';
|
|
const LARGE_CHAT_SOURCE = join(
|
|
__dirname,
|
|
'..',
|
|
'memory-tests',
|
|
'large-chat-session.json',
|
|
);
|
|
|
|
beforeAll(async () => {
|
|
if (!existsSync(LARGE_CHAT_SOURCE)) {
|
|
throw new Error(
|
|
`Performance test fixture missing: ${LARGE_CHAT_SOURCE}.`,
|
|
);
|
|
}
|
|
|
|
rig = new TestRig();
|
|
rig.setup(identifier, {
|
|
fakeResponsesPath: join(__dirname, 'perf.long-chat.responses'),
|
|
});
|
|
|
|
const geminiDir = join(rig.homeDir!, '.gemini');
|
|
const projectTempDir = join(geminiDir, 'tmp', identifier);
|
|
const targetChatsDir = join(projectTempDir, 'chats');
|
|
|
|
mkdirSync(targetChatsDir, { recursive: true });
|
|
writeFileSync(
|
|
join(geminiDir, 'projects.json'),
|
|
JSON.stringify({
|
|
projects: { [rig.testDir!]: identifier },
|
|
}),
|
|
);
|
|
writeFileSync(join(projectTempDir, '.project_root'), rig.testDir!);
|
|
copyFileSync(
|
|
LARGE_CHAT_SOURCE,
|
|
join(targetChatsDir, `session-${SESSION_ID}.json`),
|
|
);
|
|
});
|
|
|
|
afterAll(async () => {
|
|
await rig.cleanup();
|
|
});
|
|
|
|
it('session-load: resume a 60MB chat history', async () => {
|
|
const result = await harness.runScenario(
|
|
'long-conversation-resume',
|
|
async () => {
|
|
const snapshot = await harness.measureWithEventLoop(
|
|
'resume',
|
|
async () => {
|
|
const run = await rig.runInteractive({
|
|
args: ['--resume', 'latest'],
|
|
env: {
|
|
GEMINI_API_KEY: 'fake-perf-test-key',
|
|
GEMINI_TELEMETRY_ENABLED: 'true',
|
|
GEMINI_MEMORY_MONITOR_INTERVAL: '500',
|
|
GEMINI_EVENT_LOOP_MONITOR_ENABLED: 'true',
|
|
DEBUG: 'true',
|
|
},
|
|
});
|
|
await run.kill();
|
|
},
|
|
);
|
|
return snapshot;
|
|
},
|
|
);
|
|
|
|
if (UPDATE_BASELINES) {
|
|
harness.updateScenarioBaseline(result);
|
|
} else {
|
|
harness.assertWithinBaseline(result);
|
|
}
|
|
});
|
|
|
|
it('typing: latency when typing into a large session', async () => {
|
|
const result = await harness.runScenario(
|
|
'long-conversation-typing',
|
|
async () => {
|
|
const run = await rig.runInteractive({
|
|
args: ['--resume', 'latest'],
|
|
env: {
|
|
GEMINI_API_KEY: 'fake-perf-test-key',
|
|
GEMINI_TELEMETRY_ENABLED: 'true',
|
|
GEMINI_MEMORY_MONITOR_INTERVAL: '500',
|
|
GEMINI_EVENT_LOOP_MONITOR_ENABLED: 'true',
|
|
DEBUG: 'true',
|
|
},
|
|
});
|
|
|
|
const snapshot = await harness.measureWithEventLoop(
|
|
'typing',
|
|
async () => {
|
|
// On average, the expected latency per key is under 30ms.
|
|
for (const char of 'Hello') {
|
|
await run.type(char);
|
|
}
|
|
},
|
|
);
|
|
|
|
await run.kill();
|
|
return snapshot;
|
|
},
|
|
);
|
|
|
|
if (UPDATE_BASELINES) {
|
|
harness.updateScenarioBaseline(result);
|
|
} else {
|
|
harness.assertWithinBaseline(result);
|
|
}
|
|
});
|
|
|
|
it('execution: response latency for a simple shell command', async () => {
|
|
const result = await harness.runScenario(
|
|
'long-conversation-execution',
|
|
async () => {
|
|
const run = await rig.runInteractive({
|
|
args: ['--resume', 'latest'],
|
|
env: {
|
|
GEMINI_API_KEY: 'fake-perf-test-key',
|
|
GEMINI_TELEMETRY_ENABLED: 'true',
|
|
GEMINI_MEMORY_MONITOR_INTERVAL: '500',
|
|
GEMINI_EVENT_LOOP_MONITOR_ENABLED: 'true',
|
|
DEBUG: 'true',
|
|
},
|
|
});
|
|
|
|
await run.expectText('Type your message');
|
|
|
|
const snapshot = await harness.measureWithEventLoop(
|
|
'execution',
|
|
async () => {
|
|
await run.sendKeys('!echo hi\r');
|
|
await run.expectText('hi');
|
|
},
|
|
);
|
|
|
|
await run.kill();
|
|
return snapshot;
|
|
},
|
|
);
|
|
|
|
if (UPDATE_BASELINES) {
|
|
harness.updateScenarioBaseline(result);
|
|
} else {
|
|
harness.assertWithinBaseline(result);
|
|
}
|
|
});
|
|
|
|
it('terminal-scrolling: latency when scrolling a large terminal buffer', async () => {
|
|
const result = await harness.runScenario(
|
|
'long-conversation-terminal-scrolling',
|
|
async () => {
|
|
// Enable terminalBuffer to intentionally test CLI scrolling logic
|
|
const settingsPath = join(rig.homeDir!, '.gemini', 'settings.json');
|
|
writeFileSync(
|
|
settingsPath,
|
|
JSON.stringify({
|
|
security: { folderTrust: { enabled: false } },
|
|
ui: { terminalBuffer: true },
|
|
}),
|
|
);
|
|
|
|
const run = await rig.runInteractive({
|
|
args: ['--resume', 'latest'],
|
|
env: {
|
|
GEMINI_API_KEY: 'fake-perf-test-key',
|
|
GEMINI_TELEMETRY_ENABLED: 'true',
|
|
GEMINI_MEMORY_MONITOR_INTERVAL: '500',
|
|
GEMINI_EVENT_LOOP_MONITOR_ENABLED: 'true',
|
|
DEBUG: 'true',
|
|
},
|
|
});
|
|
|
|
await run.expectText('Type your message');
|
|
|
|
for (let i = 0; i < 5; i++) {
|
|
await run.sendKeys('\u001b[5~'); // PageUp
|
|
}
|
|
|
|
// Scroll to the very top
|
|
await run.sendKeys('\u001b[H'); // Home
|
|
// Verify top line of chat is visible.
|
|
await run.expectText('Authenticated with');
|
|
|
|
for (let i = 0; i < 5; i++) {
|
|
await run.sendKeys('\u001b[6~'); // PageDown
|
|
}
|
|
|
|
await rig.waitForTelemetryReady();
|
|
await run.kill();
|
|
|
|
const eventLoopMetric = rig.readMetric('event_loop.delay');
|
|
const cpuMetric = rig.readMetric('cpu.usage');
|
|
|
|
let p50Ms = 0;
|
|
let p95Ms = 0;
|
|
let maxMs = 0;
|
|
if (eventLoopMetric) {
|
|
const dataPoints = eventLoopMetric.dataPoints;
|
|
const p50Data = dataPoints.find(
|
|
(dp) => dp.attributes?.['percentile'] === 'p50',
|
|
);
|
|
const p95Data = dataPoints.find(
|
|
(dp) => dp.attributes?.['percentile'] === 'p95',
|
|
);
|
|
const maxData = dataPoints.find(
|
|
(dp) => dp.attributes?.['percentile'] === 'max',
|
|
);
|
|
|
|
if (p50Data?.value?.sum) p50Ms = p50Data.value.sum;
|
|
if (p95Data?.value?.sum) p95Ms = p95Data.value.sum;
|
|
if (maxData?.value?.sum) maxMs = maxData.value.sum;
|
|
}
|
|
|
|
let cpuTotalUs = 0;
|
|
if (cpuMetric) {
|
|
const dataPoints = cpuMetric.dataPoints;
|
|
for (const dp of dataPoints) {
|
|
if (dp.value?.sum && dp.value.sum > 0) {
|
|
cpuTotalUs += dp.value.sum;
|
|
}
|
|
}
|
|
}
|
|
const cpuUserUs = cpuTotalUs;
|
|
const cpuSystemUs = 0;
|
|
|
|
const snapshot: PerfSnapshot = {
|
|
timestamp: Date.now(),
|
|
label: 'scrolling',
|
|
wallClockMs: Math.round(p50Ms * 10) / 10,
|
|
cpuTotalUs,
|
|
cpuUserUs,
|
|
cpuSystemUs,
|
|
eventLoopDelayP50Ms: p50Ms,
|
|
eventLoopDelayP95Ms: p95Ms,
|
|
eventLoopDelayMaxMs: maxMs,
|
|
};
|
|
|
|
return snapshot;
|
|
},
|
|
);
|
|
|
|
if (UPDATE_BASELINES) {
|
|
harness.updateScenarioBaseline(result);
|
|
} else {
|
|
harness.assertWithinBaseline(result);
|
|
}
|
|
});
|
|
|
|
it('alternate-scrolling: latency when scrolling a large alternate buffer', async () => {
|
|
const result = await harness.runScenario(
|
|
'long-conversation-alternate-scrolling',
|
|
async () => {
|
|
// Enable useAlternateBuffer to intentionally test CLI scrolling logic
|
|
const settingsPath = join(rig.homeDir!, '.gemini', 'settings.json');
|
|
writeFileSync(
|
|
settingsPath,
|
|
JSON.stringify({
|
|
security: { folderTrust: { enabled: false } },
|
|
ui: { useAlternateBuffer: true },
|
|
}),
|
|
);
|
|
|
|
const run = await rig.runInteractive({
|
|
args: ['--resume', 'latest'],
|
|
env: {
|
|
GEMINI_API_KEY: 'fake-perf-test-key',
|
|
GEMINI_TELEMETRY_ENABLED: 'true',
|
|
GEMINI_MEMORY_MONITOR_INTERVAL: '500',
|
|
GEMINI_EVENT_LOOP_MONITOR_ENABLED: 'true',
|
|
DEBUG: 'true',
|
|
},
|
|
});
|
|
|
|
await run.expectText('Type your message');
|
|
|
|
for (let i = 0; i < 5; i++) {
|
|
await run.sendKeys('\u001b[5~'); // PageUp
|
|
}
|
|
|
|
// Scroll to the very top
|
|
await run.sendKeys('\u001b[H'); // Home
|
|
// Verify top line of chat is visible.
|
|
await run.expectText('Authenticated with');
|
|
|
|
for (let i = 0; i < 5; i++) {
|
|
await run.sendKeys('\u001b[6~'); // PageDown
|
|
}
|
|
|
|
await rig.waitForTelemetryReady();
|
|
await run.kill();
|
|
|
|
const eventLoopMetric = rig.readMetric('event_loop.delay');
|
|
const cpuMetric = rig.readMetric('cpu.usage');
|
|
|
|
let p50Ms = 0;
|
|
let p95Ms = 0;
|
|
let maxMs = 0;
|
|
if (eventLoopMetric) {
|
|
const dataPoints = eventLoopMetric.dataPoints;
|
|
const p50Data = dataPoints.find(
|
|
(dp) => dp.attributes?.['percentile'] === 'p50',
|
|
);
|
|
const p95Data = dataPoints.find(
|
|
(dp) => dp.attributes?.['percentile'] === 'p95',
|
|
);
|
|
const maxData = dataPoints.find(
|
|
(dp) => dp.attributes?.['percentile'] === 'max',
|
|
);
|
|
|
|
if (p50Data?.value?.sum) p50Ms = p50Data.value.sum;
|
|
if (p95Data?.value?.sum) p95Ms = p95Data.value.sum;
|
|
if (maxData?.value?.sum) maxMs = maxData.value.sum;
|
|
}
|
|
|
|
let cpuTotalUs = 0;
|
|
if (cpuMetric) {
|
|
const dataPoints = cpuMetric.dataPoints;
|
|
for (const dp of dataPoints) {
|
|
if (dp.value?.sum && dp.value.sum > 0) {
|
|
cpuTotalUs += dp.value.sum;
|
|
}
|
|
}
|
|
}
|
|
const cpuUserUs = cpuTotalUs;
|
|
const cpuSystemUs = 0;
|
|
|
|
const snapshot: PerfSnapshot = {
|
|
timestamp: Date.now(),
|
|
label: 'scrolling',
|
|
wallClockMs: Math.round(p50Ms * 10) / 10,
|
|
cpuTotalUs,
|
|
cpuUserUs,
|
|
cpuSystemUs,
|
|
eventLoopDelayP50Ms: p50Ms,
|
|
eventLoopDelayP95Ms: p95Ms,
|
|
eventLoopDelayMaxMs: maxMs,
|
|
};
|
|
|
|
return snapshot;
|
|
},
|
|
);
|
|
|
|
if (UPDATE_BASELINES) {
|
|
harness.updateScenarioBaseline(result);
|
|
} else {
|
|
harness.assertWithinBaseline(result);
|
|
}
|
|
});
|
|
});
|
|
});
|