mirror of
https://github.com/google-gemini/gemini-cli.git
synced 2026-05-15 06:12:50 -07:00
feat(test): refactor the memory usage test to use metrics from CLI process instead of test runner (#25708)
This commit is contained in:
+36
-36
@@ -1,55 +1,55 @@
|
|||||||
{
|
{
|
||||||
"version": 1,
|
"version": 1,
|
||||||
"updatedAt": "2026-04-10T15:36:04.547Z",
|
"updatedAt": "2026-04-20T18:04:59.671Z",
|
||||||
"scenarios": {
|
"scenarios": {
|
||||||
"multi-turn-conversation": {
|
"multi-turn-conversation": {
|
||||||
"heapUsedBytes": 120082704,
|
"heapUsedMB": 68.8,
|
||||||
"heapTotalBytes": 177586176,
|
"heapTotalMB": 91.2,
|
||||||
"rssBytes": 269172736,
|
"rssMB": 215.4,
|
||||||
"externalBytes": 4304053,
|
"externalMB": 93.8,
|
||||||
"timestamp": "2026-04-10T15:35:17.603Z"
|
"timestamp": "2026-04-20T18:02:40.101Z"
|
||||||
},
|
},
|
||||||
"multi-function-call-repo-search": {
|
"multi-function-call-repo-search": {
|
||||||
"heapUsedBytes": 104644984,
|
"heapUsedMB": 73.5,
|
||||||
"heapTotalBytes": 111575040,
|
"heapTotalMB": 93.1,
|
||||||
"rssBytes": 204079104,
|
"rssMB": 223.6,
|
||||||
"externalBytes": 4304053,
|
"externalMB": 97.7,
|
||||||
"timestamp": "2026-04-10T15:35:22.480Z"
|
"timestamp": "2026-04-20T18:02:42.032Z"
|
||||||
},
|
},
|
||||||
"idle-session-startup": {
|
"idle-session-startup": {
|
||||||
"heapUsedBytes": 119813672,
|
"heapUsedMB": 69.8,
|
||||||
"heapTotalBytes": 177061888,
|
"heapTotalMB": 92.4,
|
||||||
"rssBytes": 267943936,
|
"rssMB": 217.4,
|
||||||
"externalBytes": 4304053,
|
"externalMB": 93.8,
|
||||||
"timestamp": "2026-04-10T15:35:08.035Z"
|
"timestamp": "2026-04-20T18:02:36.294Z"
|
||||||
},
|
},
|
||||||
"simple-prompt-response": {
|
"simple-prompt-response": {
|
||||||
"heapUsedBytes": 119722064,
|
"heapUsedMB": 69.5,
|
||||||
"heapTotalBytes": 177324032,
|
"heapTotalMB": 92.4,
|
||||||
"rssBytes": 268812288,
|
"rssMB": 216.1,
|
||||||
"externalBytes": 4304053,
|
"externalMB": 93.8,
|
||||||
"timestamp": "2026-04-10T15:35:12.770Z"
|
"timestamp": "2026-04-20T18:02:38.198Z"
|
||||||
},
|
},
|
||||||
"resume-large-chat-with-messages": {
|
"resume-large-chat-with-messages": {
|
||||||
"heapUsedBytes": 106545568,
|
"heapUsedMB": 887.1,
|
||||||
"heapTotalBytes": 111509504,
|
"heapTotalMB": 954.3,
|
||||||
"rssBytes": 202596352,
|
"rssMB": 1109.6,
|
||||||
"externalBytes": 4306101,
|
"externalMB": 103.2,
|
||||||
"timestamp": "2026-04-10T15:36:04.547Z"
|
"timestamp": "2026-04-20T18:04:59.671Z"
|
||||||
},
|
},
|
||||||
"resume-large-chat": {
|
"resume-large-chat": {
|
||||||
"heapUsedBytes": 106513760,
|
"heapUsedMB": 885.6,
|
||||||
"heapTotalBytes": 111509504,
|
"heapTotalMB": 955.6,
|
||||||
"rssBytes": 202596352,
|
"rssMB": 1107.8,
|
||||||
"externalBytes": 4306101,
|
"externalMB": 110.5,
|
||||||
"timestamp": "2026-04-10T15:35:59.528Z"
|
"timestamp": "2026-04-20T18:04:06.526Z"
|
||||||
},
|
},
|
||||||
"large-chat": {
|
"large-chat": {
|
||||||
"heapUsedBytes": 106471568,
|
"heapUsedMB": 158.5,
|
||||||
"heapTotalBytes": 111509504,
|
"heapTotalMB": 193,
|
||||||
"rssBytes": 202596352,
|
"rssMB": 787.9,
|
||||||
"externalBytes": 4306101,
|
"externalMB": 104,
|
||||||
"timestamp": "2026-04-10T15:35:53.180Z"
|
"timestamp": "2026-04-20T18:03:12.486Z"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -16,15 +16,21 @@ import {
|
|||||||
mkdirSync,
|
mkdirSync,
|
||||||
rmSync,
|
rmSync,
|
||||||
} from 'node:fs';
|
} from 'node:fs';
|
||||||
import { randomUUID } from 'node:crypto';
|
import { randomUUID, createHash } from 'node:crypto';
|
||||||
|
|
||||||
const __dirname = dirname(fileURLToPath(import.meta.url));
|
const __dirname = dirname(fileURLToPath(import.meta.url));
|
||||||
const BASELINES_PATH = join(__dirname, 'baselines.json');
|
const BASELINES_PATH = join(__dirname, 'baselines.json');
|
||||||
const UPDATE_BASELINES = process.env['UPDATE_MEMORY_BASELINES'] === 'true';
|
const UPDATE_BASELINES = process.env['UPDATE_MEMORY_BASELINES'] === 'true';
|
||||||
|
function getProjectHash(projectRoot: string): string {
|
||||||
|
return createHash('sha256').update(projectRoot).digest('hex');
|
||||||
|
}
|
||||||
const TOLERANCE_PERCENT = 10;
|
const TOLERANCE_PERCENT = 10;
|
||||||
|
|
||||||
// Fake API key for tests using fake responses
|
// Fake API key for tests using fake responses
|
||||||
const TEST_ENV = { GEMINI_API_KEY: 'fake-memory-test-key' };
|
const TEST_ENV = {
|
||||||
|
GEMINI_API_KEY: 'fake-memory-test-key',
|
||||||
|
GEMINI_MEMORY_MONITOR_INTERVAL: '100',
|
||||||
|
};
|
||||||
|
|
||||||
describe('Memory Usage Tests', () => {
|
describe('Memory Usage Tests', () => {
|
||||||
let harness: MemoryTestHarness;
|
let harness: MemoryTestHarness;
|
||||||
@@ -56,6 +62,7 @@ describe('Memory Usage Tests', () => {
|
|||||||
});
|
});
|
||||||
|
|
||||||
const result = await harness.runScenario(
|
const result = await harness.runScenario(
|
||||||
|
rig,
|
||||||
'idle-session-startup',
|
'idle-session-startup',
|
||||||
async (recordSnapshot) => {
|
async (recordSnapshot) => {
|
||||||
await rig.run({
|
await rig.run({
|
||||||
@@ -85,6 +92,7 @@ describe('Memory Usage Tests', () => {
|
|||||||
});
|
});
|
||||||
|
|
||||||
const result = await harness.runScenario(
|
const result = await harness.runScenario(
|
||||||
|
rig,
|
||||||
'simple-prompt-response',
|
'simple-prompt-response',
|
||||||
async (recordSnapshot) => {
|
async (recordSnapshot) => {
|
||||||
await rig.run({
|
await rig.run({
|
||||||
@@ -122,6 +130,7 @@ describe('Memory Usage Tests', () => {
|
|||||||
];
|
];
|
||||||
|
|
||||||
const result = await harness.runScenario(
|
const result = await harness.runScenario(
|
||||||
|
rig,
|
||||||
'multi-turn-conversation',
|
'multi-turn-conversation',
|
||||||
async (recordSnapshot) => {
|
async (recordSnapshot) => {
|
||||||
// Run through all turns as a piped sequence
|
// Run through all turns as a piped sequence
|
||||||
@@ -144,6 +153,9 @@ describe('Memory Usage Tests', () => {
|
|||||||
);
|
);
|
||||||
} else {
|
} else {
|
||||||
harness.assertWithinBaseline(result);
|
harness.assertWithinBaseline(result);
|
||||||
|
harness.assertMemoryReturnsToBaseline(result.snapshots, 20);
|
||||||
|
const { leaked, message } = harness.analyzeSnapshots(result.snapshots);
|
||||||
|
if (leaked) console.warn(`⚠ ${message}`);
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
@@ -168,6 +180,7 @@ describe('Memory Usage Tests', () => {
|
|||||||
);
|
);
|
||||||
|
|
||||||
const result = await harness.runScenario(
|
const result = await harness.runScenario(
|
||||||
|
rig,
|
||||||
'multi-function-call-repo-search',
|
'multi-function-call-repo-search',
|
||||||
async (recordSnapshot) => {
|
async (recordSnapshot) => {
|
||||||
await rig.run({
|
await rig.run({
|
||||||
@@ -189,6 +202,7 @@ describe('Memory Usage Tests', () => {
|
|||||||
);
|
);
|
||||||
} else {
|
} else {
|
||||||
harness.assertWithinBaseline(result);
|
harness.assertWithinBaseline(result);
|
||||||
|
harness.assertMemoryReturnsToBaseline(result.snapshots, 20);
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
@@ -228,6 +242,7 @@ describe('Memory Usage Tests', () => {
|
|||||||
});
|
});
|
||||||
|
|
||||||
const result = await harness.runScenario(
|
const result = await harness.runScenario(
|
||||||
|
rig,
|
||||||
'large-chat',
|
'large-chat',
|
||||||
async (recordSnapshot) => {
|
async (recordSnapshot) => {
|
||||||
await rig.run({
|
await rig.run({
|
||||||
@@ -257,19 +272,21 @@ describe('Memory Usage Tests', () => {
|
|||||||
});
|
});
|
||||||
|
|
||||||
const result = await harness.runScenario(
|
const result = await harness.runScenario(
|
||||||
|
rig,
|
||||||
'resume-large-chat',
|
'resume-large-chat',
|
||||||
async (recordSnapshot) => {
|
async (recordSnapshot) => {
|
||||||
// Ensure the history file is linked
|
// Ensure the history file is linked
|
||||||
const targetChatsDir = join(
|
const targetChatsDir = join(
|
||||||
rig.testDir!,
|
rig.homeDir!,
|
||||||
|
'.gemini',
|
||||||
'tmp',
|
'tmp',
|
||||||
'test-project-hash',
|
getProjectHash(rig.testDir!),
|
||||||
'chats',
|
'chats',
|
||||||
);
|
);
|
||||||
mkdirSync(targetChatsDir, { recursive: true });
|
mkdirSync(targetChatsDir, { recursive: true });
|
||||||
const targetHistoryPath = join(
|
const targetHistoryPath = join(
|
||||||
targetChatsDir,
|
targetChatsDir,
|
||||||
'large-chat-session.json',
|
'session-large-chat.json',
|
||||||
);
|
);
|
||||||
if (existsSync(targetHistoryPath)) rmSync(targetHistoryPath);
|
if (existsSync(targetHistoryPath)) rmSync(targetHistoryPath);
|
||||||
copyFileSync(sharedHistoryPath, targetHistoryPath);
|
copyFileSync(sharedHistoryPath, targetHistoryPath);
|
||||||
@@ -302,19 +319,21 @@ describe('Memory Usage Tests', () => {
|
|||||||
});
|
});
|
||||||
|
|
||||||
const result = await harness.runScenario(
|
const result = await harness.runScenario(
|
||||||
|
rig,
|
||||||
'resume-large-chat-with-messages',
|
'resume-large-chat-with-messages',
|
||||||
async (recordSnapshot) => {
|
async (recordSnapshot) => {
|
||||||
// Ensure the history file is linked
|
// Ensure the history file is linked
|
||||||
const targetChatsDir = join(
|
const targetChatsDir = join(
|
||||||
rig.testDir!,
|
rig.homeDir!,
|
||||||
|
'.gemini',
|
||||||
'tmp',
|
'tmp',
|
||||||
'test-project-hash',
|
getProjectHash(rig.testDir!),
|
||||||
'chats',
|
'chats',
|
||||||
);
|
);
|
||||||
mkdirSync(targetChatsDir, { recursive: true });
|
mkdirSync(targetChatsDir, { recursive: true });
|
||||||
const targetHistoryPath = join(
|
const targetHistoryPath = join(
|
||||||
targetChatsDir,
|
targetChatsDir,
|
||||||
'large-chat-session.json',
|
'session-large-chat.json',
|
||||||
);
|
);
|
||||||
if (existsSync(targetHistoryPath)) rmSync(targetHistoryPath);
|
if (existsSync(targetHistoryPath)) rmSync(targetHistoryPath);
|
||||||
copyFileSync(sharedHistoryPath, targetHistoryPath);
|
copyFileSync(sharedHistoryPath, targetHistoryPath);
|
||||||
@@ -457,6 +476,9 @@ async function generateSharedLargeChatData(tempDir: string) {
|
|||||||
// Generate responses for resumed chat
|
// Generate responses for resumed chat
|
||||||
const resumeResponsesStream = createWriteStream(resumeResponsesPath);
|
const resumeResponsesStream = createWriteStream(resumeResponsesPath);
|
||||||
for (let i = 0; i < 5; i++) {
|
for (let i = 0; i < 5; i++) {
|
||||||
|
// Doubling up on non-streaming responses to satisfy classifier and complexity checks
|
||||||
|
resumeResponsesStream.write(JSON.stringify(complexityResponse) + '\n');
|
||||||
|
resumeResponsesStream.write(JSON.stringify(summaryResponse) + '\n');
|
||||||
resumeResponsesStream.write(JSON.stringify(complexityResponse) + '\n');
|
resumeResponsesStream.write(JSON.stringify(complexityResponse) + '\n');
|
||||||
resumeResponsesStream.write(
|
resumeResponsesStream.write(
|
||||||
JSON.stringify({
|
JSON.stringify({
|
||||||
|
|||||||
@@ -10,10 +10,10 @@ import { readFileSync, writeFileSync, existsSync } from 'node:fs';
|
|||||||
* Baseline entry for a single memory test scenario.
|
* Baseline entry for a single memory test scenario.
|
||||||
*/
|
*/
|
||||||
export interface MemoryBaseline {
|
export interface MemoryBaseline {
|
||||||
heapUsedBytes: number;
|
heapUsedMB: number;
|
||||||
heapTotalBytes: number;
|
heapTotalMB: number;
|
||||||
rssBytes: number;
|
rssMB: number;
|
||||||
externalBytes: number;
|
externalMB: number;
|
||||||
timestamp: string;
|
timestamp: string;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -61,18 +61,18 @@ export function updateBaseline(
|
|||||||
path: string,
|
path: string,
|
||||||
scenarioName: string,
|
scenarioName: string,
|
||||||
measured: {
|
measured: {
|
||||||
heapUsedBytes: number;
|
heapUsedMB: number;
|
||||||
heapTotalBytes: number;
|
heapTotalMB: number;
|
||||||
rssBytes: number;
|
rssMB: number;
|
||||||
externalBytes: number;
|
externalMB: number;
|
||||||
},
|
},
|
||||||
): void {
|
): void {
|
||||||
const baselines = loadBaselines(path);
|
const baselines = loadBaselines(path);
|
||||||
baselines.scenarios[scenarioName] = {
|
baselines.scenarios[scenarioName] = {
|
||||||
heapUsedBytes: measured.heapUsedBytes,
|
heapUsedMB: measured.heapUsedMB,
|
||||||
heapTotalBytes: measured.heapTotalBytes,
|
heapTotalMB: measured.heapTotalMB,
|
||||||
rssBytes: measured.rssBytes,
|
rssMB: measured.rssMB,
|
||||||
externalBytes: measured.externalBytes,
|
externalMB: measured.externalMB,
|
||||||
timestamp: new Date().toISOString(),
|
timestamp: new Date().toISOString(),
|
||||||
};
|
};
|
||||||
saveBaselines(path, baselines);
|
saveBaselines(path, baselines);
|
||||||
|
|||||||
@@ -4,10 +4,9 @@
|
|||||||
* SPDX-License-Identifier: Apache-2.0
|
* SPDX-License-Identifier: Apache-2.0
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import v8 from 'node:v8';
|
|
||||||
import { setTimeout as sleep } from 'node:timers/promises';
|
|
||||||
import { loadBaselines, updateBaseline } from './memory-baselines.js';
|
import { loadBaselines, updateBaseline } from './memory-baselines.js';
|
||||||
import type { MemoryBaseline, MemoryBaselineFile } from './memory-baselines.js';
|
import type { MemoryBaseline, MemoryBaselineFile } from './memory-baselines.js';
|
||||||
|
import type { TestRig } from './test-rig.js';
|
||||||
|
|
||||||
/** Configuration for asciichart plot function. */
|
/** Configuration for asciichart plot function. */
|
||||||
interface PlotConfig {
|
interface PlotConfig {
|
||||||
@@ -28,9 +27,6 @@ export interface MemorySnapshot {
|
|||||||
heapTotal: number;
|
heapTotal: number;
|
||||||
rss: number;
|
rss: number;
|
||||||
external: number;
|
external: number;
|
||||||
arrayBuffers: number;
|
|
||||||
heapSizeLimit: number;
|
|
||||||
heapSpaces: any[];
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -64,16 +60,13 @@ export interface MemoryTestHarnessOptions {
|
|||||||
gcDelayMs?: number;
|
gcDelayMs?: number;
|
||||||
/** Number of samples to take for median calculation. Default: 3 */
|
/** Number of samples to take for median calculation. Default: 3 */
|
||||||
sampleCount?: number;
|
sampleCount?: number;
|
||||||
/** Pause in ms between samples. Default: 50 */
|
|
||||||
samplePauseMs?: number;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* MemoryTestHarness provides infrastructure for running memory usage tests.
|
* MemoryTestHarness provides infrastructure for running memory usage tests.
|
||||||
*
|
*
|
||||||
* It handles:
|
* It handles:
|
||||||
* - Forcing V8 garbage collection to reduce noise
|
* - Extracting memory metrics from CLI process telemetry
|
||||||
* - Taking V8 heap snapshots for accurate memory measurement
|
|
||||||
* - Comparing against baselines with configurable tolerance
|
* - Comparing against baselines with configurable tolerance
|
||||||
* - Generating ASCII chart reports of memory trends
|
* - Generating ASCII chart reports of memory trends
|
||||||
*/
|
*/
|
||||||
@@ -81,88 +74,45 @@ export class MemoryTestHarness {
|
|||||||
private baselines: MemoryBaselineFile;
|
private baselines: MemoryBaselineFile;
|
||||||
private readonly baselinesPath: string;
|
private readonly baselinesPath: string;
|
||||||
private readonly defaultTolerancePercent: number;
|
private readonly defaultTolerancePercent: number;
|
||||||
private readonly gcCycles: number;
|
|
||||||
private readonly gcDelayMs: number;
|
|
||||||
private readonly sampleCount: number;
|
|
||||||
private readonly samplePauseMs: number;
|
|
||||||
private allResults: MemoryTestResult[] = [];
|
private allResults: MemoryTestResult[] = [];
|
||||||
|
|
||||||
constructor(options: MemoryTestHarnessOptions) {
|
constructor(options: MemoryTestHarnessOptions) {
|
||||||
this.baselinesPath = options.baselinesPath;
|
this.baselinesPath = options.baselinesPath;
|
||||||
this.defaultTolerancePercent = options.defaultTolerancePercent ?? 10;
|
this.defaultTolerancePercent = options.defaultTolerancePercent ?? 10;
|
||||||
this.gcCycles = options.gcCycles ?? 3;
|
|
||||||
this.gcDelayMs = options.gcDelayMs ?? 100;
|
|
||||||
this.sampleCount = options.sampleCount ?? 3;
|
|
||||||
this.samplePauseMs = options.samplePauseMs ?? 50;
|
|
||||||
this.baselines = loadBaselines(this.baselinesPath);
|
this.baselines = loadBaselines(this.baselinesPath);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Force garbage collection multiple times and take a V8 heap snapshot.
|
* Extract memory snapshot from TestRig telemetry.
|
||||||
* Forces GC multiple times with delays to allow weak references and
|
|
||||||
* FinalizationRegistry callbacks to run, reducing measurement noise.
|
|
||||||
*/
|
*/
|
||||||
async takeSnapshot(label: string = 'snapshot'): Promise<MemorySnapshot> {
|
async takeSnapshot(
|
||||||
await this.forceGC();
|
rig: TestRig,
|
||||||
|
label: string = 'snapshot',
|
||||||
const memUsage = process.memoryUsage();
|
strategy: 'peak' | 'last' = 'last',
|
||||||
const heapStats = v8.getHeapStatistics();
|
|
||||||
|
|
||||||
return {
|
|
||||||
timestamp: Date.now(),
|
|
||||||
label,
|
|
||||||
heapUsed: memUsage.heapUsed,
|
|
||||||
heapTotal: memUsage.heapTotal,
|
|
||||||
rss: memUsage.rss,
|
|
||||||
external: memUsage.external,
|
|
||||||
arrayBuffers: memUsage.arrayBuffers,
|
|
||||||
heapSizeLimit: heapStats.heap_size_limit,
|
|
||||||
heapSpaces: v8.getHeapSpaceStatistics(),
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Take multiple snapshot samples and return the median to reduce noise.
|
|
||||||
*/
|
|
||||||
async takeMedianSnapshot(
|
|
||||||
label: string = 'median',
|
|
||||||
count?: number,
|
|
||||||
): Promise<MemorySnapshot> {
|
): Promise<MemorySnapshot> {
|
||||||
const samples: MemorySnapshot[] = [];
|
const metrics = rig.readMemoryMetrics(strategy);
|
||||||
const numSamples = count ?? this.sampleCount;
|
|
||||||
|
|
||||||
for (let i = 0; i < numSamples; i++) {
|
|
||||||
samples.push(await this.takeSnapshot(`${label}_sample_${i}`));
|
|
||||||
if (i < numSamples - 1) {
|
|
||||||
await sleep(this.samplePauseMs);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Sort by heapUsed and take the median
|
|
||||||
samples.sort((a, b) => a.heapUsed - b.heapUsed);
|
|
||||||
const medianIdx = Math.floor(samples.length / 2);
|
|
||||||
const median = samples[medianIdx]!;
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
...median,
|
timestamp: metrics.timestamp,
|
||||||
label,
|
label,
|
||||||
timestamp: Date.now(),
|
heapUsed: metrics.heapUsed,
|
||||||
|
heapTotal: metrics.heapTotal,
|
||||||
|
rss: metrics.rss,
|
||||||
|
external: metrics.external,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Run a memory test scenario.
|
* Run a memory test scenario.
|
||||||
*
|
*
|
||||||
* Takes before/after snapshots around the scenario function, collects
|
* @param rig - The TestRig instance running the CLI
|
||||||
* intermediate snapshots if the scenario provides them, and compares
|
|
||||||
* the result against the stored baseline.
|
|
||||||
*
|
|
||||||
* @param name - Scenario name (must match baseline key)
|
* @param name - Scenario name (must match baseline key)
|
||||||
* @param fn - Async function that executes the scenario. Receives a
|
* @param fn - Async function that executes the scenario. Receives a
|
||||||
* `recordSnapshot` callback for recording intermediate snapshots.
|
* `recordSnapshot` callback for recording intermediate snapshots.
|
||||||
* @param tolerancePercent - Override default tolerance for this scenario
|
* @param tolerancePercent - Override default tolerance for this scenario
|
||||||
*/
|
*/
|
||||||
async runScenario(
|
async runScenario(
|
||||||
|
rig: TestRig,
|
||||||
name: string,
|
name: string,
|
||||||
fn: (
|
fn: (
|
||||||
recordSnapshot: (label: string) => Promise<MemorySnapshot>,
|
recordSnapshot: (label: string) => Promise<MemorySnapshot>,
|
||||||
@@ -172,27 +122,49 @@ export class MemoryTestHarness {
|
|||||||
const tolerance = tolerancePercent ?? this.defaultTolerancePercent;
|
const tolerance = tolerancePercent ?? this.defaultTolerancePercent;
|
||||||
const snapshots: MemorySnapshot[] = [];
|
const snapshots: MemorySnapshot[] = [];
|
||||||
|
|
||||||
|
// Record initial snapshot
|
||||||
|
const beforeSnap = await this.takeSnapshot(rig, 'before');
|
||||||
|
snapshots.push(beforeSnap);
|
||||||
|
|
||||||
// Record a callback for intermediate snapshots
|
// Record a callback for intermediate snapshots
|
||||||
const recordSnapshot = async (label: string): Promise<MemorySnapshot> => {
|
const recordSnapshot = async (label: string): Promise<MemorySnapshot> => {
|
||||||
const snap = await this.takeMedianSnapshot(label);
|
// Small delay to allow telemetry to flush if needed
|
||||||
|
await rig.waitForTelemetryReady();
|
||||||
|
const snap = await this.takeSnapshot(rig, label);
|
||||||
snapshots.push(snap);
|
snapshots.push(snap);
|
||||||
return snap;
|
return snap;
|
||||||
};
|
};
|
||||||
|
|
||||||
// Before snapshot
|
|
||||||
const beforeSnap = await this.takeMedianSnapshot('before');
|
|
||||||
snapshots.push(beforeSnap);
|
|
||||||
|
|
||||||
// Run the scenario
|
// Run the scenario
|
||||||
await fn(recordSnapshot);
|
await fn(recordSnapshot);
|
||||||
|
|
||||||
// After snapshot (median of multiple samples)
|
// Final wait for telemetry to ensure everything is flushed
|
||||||
const afterSnap = await this.takeMedianSnapshot('after');
|
await rig.waitForTelemetryReady();
|
||||||
|
|
||||||
|
// After snapshot
|
||||||
|
const afterSnap = await this.takeSnapshot(rig, 'after');
|
||||||
snapshots.push(afterSnap);
|
snapshots.push(afterSnap);
|
||||||
|
|
||||||
// Calculate peak values
|
// Calculate peak values from ALL snapshots seen during the scenario
|
||||||
const peakHeapUsed = Math.max(...snapshots.map((s) => s.heapUsed));
|
const allSnapshots = rig.readAllMemorySnapshots();
|
||||||
const peakRss = Math.max(...snapshots.map((s) => s.rss));
|
const scenarioSnapshots = allSnapshots.filter(
|
||||||
|
(s) =>
|
||||||
|
s.timestamp >= beforeSnap.timestamp &&
|
||||||
|
s.timestamp <= afterSnap.timestamp,
|
||||||
|
);
|
||||||
|
|
||||||
|
const peakHeapUsed = Math.max(
|
||||||
|
...scenarioSnapshots.map((s) => s.heapUsed),
|
||||||
|
...snapshots.map((s) => s.heapUsed),
|
||||||
|
);
|
||||||
|
const peakRss = Math.max(
|
||||||
|
...scenarioSnapshots.map((s) => s.rss),
|
||||||
|
...snapshots.map((s) => s.rss),
|
||||||
|
);
|
||||||
|
const peakExternal = Math.max(
|
||||||
|
...scenarioSnapshots.map((s) => s.external),
|
||||||
|
...snapshots.map((s) => s.external),
|
||||||
|
);
|
||||||
|
|
||||||
// Get baseline
|
// Get baseline
|
||||||
const baseline = this.baselines.scenarios[name];
|
const baseline = this.baselines.scenarios[name];
|
||||||
@@ -202,15 +174,12 @@ export class MemoryTestHarness {
|
|||||||
let withinTolerance = true;
|
let withinTolerance = true;
|
||||||
|
|
||||||
if (baseline) {
|
if (baseline) {
|
||||||
|
const measuredMB = afterSnap.heapUsed / (1024 * 1024);
|
||||||
deltaPercent =
|
deltaPercent =
|
||||||
((afterSnap.heapUsed - baseline.heapUsedBytes) /
|
((measuredMB - baseline.heapUsedMB) / baseline.heapUsedMB) * 100;
|
||||||
baseline.heapUsedBytes) *
|
|
||||||
100;
|
|
||||||
withinTolerance = deltaPercent <= tolerance;
|
withinTolerance = deltaPercent <= tolerance;
|
||||||
}
|
}
|
||||||
|
|
||||||
const peakExternal = Math.max(...snapshots.map((s) => s.external));
|
|
||||||
|
|
||||||
const result: MemoryTestResult = {
|
const result: MemoryTestResult = {
|
||||||
scenarioName: name,
|
scenarioName: name,
|
||||||
snapshots,
|
snapshots,
|
||||||
@@ -248,16 +217,16 @@ export class MemoryTestHarness {
|
|||||||
return; // Don't fail if no baseline exists yet
|
return; // Don't fail if no baseline exists yet
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const measuredMB = result.finalHeapUsed / (1024 * 1024);
|
||||||
const deltaPercent =
|
const deltaPercent =
|
||||||
((result.finalHeapUsed - result.baseline.heapUsedBytes) /
|
((measuredMB - result.baseline.heapUsedMB) / result.baseline.heapUsedMB) *
|
||||||
result.baseline.heapUsedBytes) *
|
|
||||||
100;
|
100;
|
||||||
|
|
||||||
if (deltaPercent > tolerance) {
|
if (deltaPercent > tolerance) {
|
||||||
throw new Error(
|
throw new Error(
|
||||||
`Memory regression detected for "${result.scenarioName}"!\n` +
|
`Memory regression detected for "${result.scenarioName}"!\n` +
|
||||||
` Measured: ${formatMB(result.finalHeapUsed)} heap used\n` +
|
` Measured: ${formatMB(result.finalHeapUsed)} heap used\n` +
|
||||||
` Baseline: ${formatMB(result.baseline.heapUsedBytes)} heap used\n` +
|
` Baseline: ${result.baseline.heapUsedMB.toFixed(1)} MB heap used\n` +
|
||||||
` Delta: ${deltaPercent.toFixed(1)}% (tolerance: ${tolerance}%)\n` +
|
` Delta: ${deltaPercent.toFixed(1)}% (tolerance: ${tolerance}%)\n` +
|
||||||
` Peak heap: ${formatMB(result.peakHeapUsed)}\n` +
|
` Peak heap: ${formatMB(result.peakHeapUsed)}\n` +
|
||||||
` Peak RSS: ${formatMB(result.peakRss)}\n` +
|
` Peak RSS: ${formatMB(result.peakRss)}\n` +
|
||||||
@@ -270,20 +239,22 @@ export class MemoryTestHarness {
|
|||||||
* Update the baseline for a scenario with the current measured values.
|
* Update the baseline for a scenario with the current measured values.
|
||||||
*/
|
*/
|
||||||
updateScenarioBaseline(result: MemoryTestResult): void {
|
updateScenarioBaseline(result: MemoryTestResult): void {
|
||||||
|
const lastSnapshot = result.snapshots[result.snapshots.length - 1];
|
||||||
updateBaseline(this.baselinesPath, result.scenarioName, {
|
updateBaseline(this.baselinesPath, result.scenarioName, {
|
||||||
heapUsedBytes: result.finalHeapUsed,
|
heapUsedMB: Number((result.finalHeapUsed / (1024 * 1024)).toFixed(1)),
|
||||||
heapTotalBytes:
|
heapTotalMB: Number(
|
||||||
result.snapshots[result.snapshots.length - 1]?.heapTotal ?? 0,
|
((lastSnapshot?.heapTotal ?? 0) / (1024 * 1024)).toFixed(1),
|
||||||
rssBytes: result.finalRss,
|
),
|
||||||
externalBytes: result.finalExternal,
|
rssMB: Number((result.finalRss / (1024 * 1024)).toFixed(1)),
|
||||||
|
externalMB: Number((result.finalExternal / (1024 * 1024)).toFixed(1)),
|
||||||
});
|
});
|
||||||
// Reload baselines after update
|
// Reload baselines after update
|
||||||
this.baselines = loadBaselines(this.baselinesPath);
|
this.baselines = loadBaselines(this.baselinesPath);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Analyze snapshots to detect sustained leaks across 3 snapshots.
|
* Analyze snapshots to detect sustained leaks.
|
||||||
* A leak is flagged if growth is observed in both phases for any heap space.
|
* A leak is flagged if growth is observed in both phases.
|
||||||
*/
|
*/
|
||||||
analyzeSnapshots(
|
analyzeSnapshots(
|
||||||
snapshots: MemorySnapshot[],
|
snapshots: MemorySnapshot[],
|
||||||
@@ -297,55 +268,20 @@ export class MemoryTestHarness {
|
|||||||
const snap2 = snapshots[snapshots.length - 2];
|
const snap2 = snapshots[snapshots.length - 2];
|
||||||
const snap3 = snapshots[snapshots.length - 1];
|
const snap3 = snapshots[snapshots.length - 1];
|
||||||
|
|
||||||
if (!snap1 || !snap2 || !snap3) {
|
const growth1 = snap2.heapUsed - snap1.heapUsed;
|
||||||
return { leaked: false, message: 'Missing snapshots' };
|
const growth2 = snap3.heapUsed - snap2.heapUsed;
|
||||||
}
|
|
||||||
|
|
||||||
const spaceNames = new Set<string>();
|
const leaked = growth1 > thresholdBytes && growth2 > thresholdBytes;
|
||||||
snap1.heapSpaces.forEach((s: any) => spaceNames.add(s.space_name));
|
let message = leaked
|
||||||
snap2.heapSpaces.forEach((s: any) => spaceNames.add(s.space_name));
|
? `Memory bloat detected: sustained growth (${formatMB(growth1)} -> ${formatMB(growth2)})`
|
||||||
snap3.heapSpaces.forEach((s: any) => spaceNames.add(s.space_name));
|
: `No sustained growth detected above threshold.`;
|
||||||
|
|
||||||
let hasSustainedGrowth = false;
|
return { leaked, message };
|
||||||
const growthDetails: string[] = [];
|
|
||||||
|
|
||||||
for (const name of spaceNames) {
|
|
||||||
const size1 =
|
|
||||||
snap1.heapSpaces.find((s: any) => s.space_name === name)
|
|
||||||
?.space_used_size ?? 0;
|
|
||||||
const size2 =
|
|
||||||
snap2.heapSpaces.find((s: any) => s.space_name === name)
|
|
||||||
?.space_used_size ?? 0;
|
|
||||||
const size3 =
|
|
||||||
snap3.heapSpaces.find((s: any) => s.space_name === name)
|
|
||||||
?.space_used_size ?? 0;
|
|
||||||
|
|
||||||
const growth1 = size2 - size1;
|
|
||||||
const growth2 = size3 - size2;
|
|
||||||
|
|
||||||
if (growth1 > thresholdBytes && growth2 > thresholdBytes) {
|
|
||||||
hasSustainedGrowth = true;
|
|
||||||
growthDetails.push(
|
|
||||||
`${name}: sustained growth (${formatMB(growth1)} -> ${formatMB(growth2)})`,
|
|
||||||
);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
let message = '';
|
|
||||||
if (hasSustainedGrowth) {
|
|
||||||
message =
|
|
||||||
`Memory bloat detected in heap spaces:\n ` +
|
|
||||||
growthDetails.join('\n ');
|
|
||||||
} else {
|
|
||||||
message = `No sustained growth detected in any heap space above threshold.`;
|
|
||||||
}
|
|
||||||
|
|
||||||
return { leaked: hasSustainedGrowth, message };
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Assert that memory returns to a baseline level after a peak.
|
* Assert that memory returns to a baseline level after a peak.
|
||||||
* Useful for verifying that large tool outputs are not retained.
|
* Useful for verifying that large tool outputs or history are not retained.
|
||||||
*/
|
*/
|
||||||
assertMemoryReturnsToBaseline(
|
assertMemoryReturnsToBaseline(
|
||||||
snapshots: MemorySnapshot[],
|
snapshots: MemorySnapshot[],
|
||||||
@@ -355,26 +291,22 @@ export class MemoryTestHarness {
|
|||||||
throw new Error('Need at least 3 snapshots to check return to baseline');
|
throw new Error('Need at least 3 snapshots to check return to baseline');
|
||||||
}
|
}
|
||||||
|
|
||||||
const baseline = snapshots[0]; // Assume first is baseline
|
// Find the first non-zero snapshot as baseline
|
||||||
const peak = snapshots.reduce(
|
const baseline = snapshots.find((s) => s.heapUsed > 0);
|
||||||
(max, s) => (s.heapUsed > max.heapUsed ? s : max),
|
if (!baseline) {
|
||||||
snapshots[0],
|
return; // No memory reported yet
|
||||||
);
|
|
||||||
const final = snapshots[snapshots.length - 1];
|
|
||||||
|
|
||||||
if (!baseline || !peak || !final) {
|
|
||||||
throw new Error('Missing snapshots for return to baseline check');
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const final = snapshots[snapshots.length - 1]!;
|
||||||
|
|
||||||
const tolerance = baseline.heapUsed * (tolerancePercent / 100);
|
const tolerance = baseline.heapUsed * (tolerancePercent / 100);
|
||||||
const delta = final.heapUsed - baseline.heapUsed;
|
const delta = final.heapUsed - baseline.heapUsed;
|
||||||
|
|
||||||
if (delta > tolerance) {
|
if (delta > tolerance) {
|
||||||
throw new Error(
|
throw new Error(
|
||||||
`Memory did not return to baseline!\n` +
|
`Memory did not return to baseline!\n` +
|
||||||
` Baseline: ${formatMB(baseline.heapUsed)}\n` +
|
` Baseline: ${formatMB(baseline.heapUsed)} (${baseline.label})\n` +
|
||||||
` Peak: ${formatMB(peak.heapUsed)}\n` +
|
` Final: ${formatMB(final.heapUsed)} (${final.label})\n` +
|
||||||
` Final: ${formatMB(final.heapUsed)}\n` +
|
|
||||||
` Delta: ${formatMB(delta)} (tolerance: ${formatMB(tolerance)})`,
|
` Delta: ${formatMB(delta)} (tolerance: ${formatMB(tolerance)})`,
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
@@ -397,7 +329,7 @@ export class MemoryTestHarness {
|
|||||||
for (const result of resultsToReport) {
|
for (const result of resultsToReport) {
|
||||||
const measured = formatMB(result.finalHeapUsed);
|
const measured = formatMB(result.finalHeapUsed);
|
||||||
const baseline = result.baseline
|
const baseline = result.baseline
|
||||||
? formatMB(result.baseline.heapUsedBytes)
|
? `${result.baseline.heapUsedMB.toFixed(1)} MB`
|
||||||
: 'N/A';
|
: 'N/A';
|
||||||
const delta = result.baseline
|
const delta = result.baseline
|
||||||
? `${result.deltaPercent >= 0 ? '+' : ''}${result.deltaPercent.toFixed(1)}%`
|
? `${result.deltaPercent >= 0 ? '+' : ''}${result.deltaPercent.toFixed(1)}%`
|
||||||
@@ -461,26 +393,6 @@ export class MemoryTestHarness {
|
|||||||
console.log(report);
|
console.log(report);
|
||||||
return report;
|
return report;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Force V8 garbage collection.
|
|
||||||
* Runs multiple GC cycles with delays to allow weak references
|
|
||||||
* and FinalizationRegistry callbacks to run.
|
|
||||||
*/
|
|
||||||
private async forceGC(): Promise<void> {
|
|
||||||
if (typeof globalThis.gc !== 'function') {
|
|
||||||
throw new Error(
|
|
||||||
'global.gc() not available. Run with --expose-gc for accurate measurements.',
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
for (let i = 0; i < this.gcCycles; i++) {
|
|
||||||
globalThis.gc();
|
|
||||||
if (i < this.gcCycles - 1) {
|
|
||||||
await sleep(this.gcDelayMs);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|||||||
@@ -1475,7 +1475,7 @@ export class TestRig {
|
|||||||
readMetric(metricName: string): TelemetryMetric | null {
|
readMetric(metricName: string): TelemetryMetric | null {
|
||||||
const logs = this._readAndParseTelemetryLog();
|
const logs = this._readAndParseTelemetryLog();
|
||||||
for (const logData of logs) {
|
for (const logData of logs) {
|
||||||
if (logData.scopeMetrics) {
|
if (logData && logData.scopeMetrics) {
|
||||||
for (const scopeMetric of logData.scopeMetrics) {
|
for (const scopeMetric of logData.scopeMetrics) {
|
||||||
for (const metric of scopeMetric.metrics) {
|
for (const metric of scopeMetric.metrics) {
|
||||||
if (metric.descriptor.name === `gemini_cli.${metricName}`) {
|
if (metric.descriptor.name === `gemini_cli.${metricName}`) {
|
||||||
@@ -1488,6 +1488,133 @@ export class TestRig {
|
|||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
readMemoryMetrics(strategy: 'peak' | 'last' = 'peak'): {
|
||||||
|
timestamp: number;
|
||||||
|
heapUsed: number;
|
||||||
|
heapTotal: number;
|
||||||
|
rss: number;
|
||||||
|
external: number;
|
||||||
|
} {
|
||||||
|
const snapshots = this._getMemorySnapshots();
|
||||||
|
if (snapshots.length === 0) {
|
||||||
|
return {
|
||||||
|
timestamp: Date.now(),
|
||||||
|
heapUsed: 0,
|
||||||
|
heapTotal: 0,
|
||||||
|
rss: 0,
|
||||||
|
external: 0,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
if (strategy === 'last') {
|
||||||
|
const last = snapshots[snapshots.length - 1];
|
||||||
|
return {
|
||||||
|
timestamp: last.timestamp,
|
||||||
|
heapUsed: last.heapUsed,
|
||||||
|
heapTotal: last.heapTotal,
|
||||||
|
rss: last.rss,
|
||||||
|
external: last.external,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
// Find the snapshot with the highest RSS
|
||||||
|
let peak = snapshots[0];
|
||||||
|
for (const snapshot of snapshots) {
|
||||||
|
if (snapshot.rss > peak.rss) {
|
||||||
|
peak = snapshot;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fallback: if we didn't find any RSS but found heap, use the max heap
|
||||||
|
if (peak.rss === 0) {
|
||||||
|
for (const snapshot of snapshots) {
|
||||||
|
if (snapshot.heapUsed > peak.heapUsed) {
|
||||||
|
peak = snapshot;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return {
|
||||||
|
timestamp: peak.timestamp,
|
||||||
|
heapUsed: peak.heapUsed,
|
||||||
|
heapTotal: peak.heapTotal,
|
||||||
|
rss: peak.rss,
|
||||||
|
external: peak.external,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
readAllMemorySnapshots(): {
|
||||||
|
timestamp: number;
|
||||||
|
heapUsed: number;
|
||||||
|
heapTotal: number;
|
||||||
|
rss: number;
|
||||||
|
external: number;
|
||||||
|
}[] {
|
||||||
|
return this._getMemorySnapshots();
|
||||||
|
}
|
||||||
|
|
||||||
|
private _getMemorySnapshots(): {
|
||||||
|
timestamp: number;
|
||||||
|
heapUsed: number;
|
||||||
|
heapTotal: number;
|
||||||
|
rss: number;
|
||||||
|
external: number;
|
||||||
|
}[] {
|
||||||
|
const snapshots: Record<
|
||||||
|
string,
|
||||||
|
{
|
||||||
|
timestamp: number;
|
||||||
|
heapUsed: number;
|
||||||
|
heapTotal: number;
|
||||||
|
rss: number;
|
||||||
|
external: number;
|
||||||
|
}
|
||||||
|
> = {};
|
||||||
|
|
||||||
|
const logs = this._readAndParseTelemetryLog();
|
||||||
|
for (const logData of logs) {
|
||||||
|
if (logData && logData.scopeMetrics) {
|
||||||
|
for (const scopeMetric of logData.scopeMetrics) {
|
||||||
|
for (const metric of scopeMetric.metrics) {
|
||||||
|
if (metric.descriptor.name === 'gemini_cli.memory.usage') {
|
||||||
|
for (const dp of metric.dataPoints) {
|
||||||
|
const sessionId =
|
||||||
|
(dp.attributes?.['session.id'] as string) || 'unknown';
|
||||||
|
const component =
|
||||||
|
(dp.attributes?.['component'] as string) || 'unknown';
|
||||||
|
const seconds = dp.startTime?.[0] || 0;
|
||||||
|
const nanos = dp.startTime?.[1] || 0;
|
||||||
|
const timeKey = `${sessionId}-${component}-${seconds}-${nanos}`;
|
||||||
|
|
||||||
|
if (!snapshots[timeKey]) {
|
||||||
|
snapshots[timeKey] = {
|
||||||
|
timestamp: seconds * 1000 + Math.floor(nanos / 1000000),
|
||||||
|
rss: 0,
|
||||||
|
heapUsed: 0,
|
||||||
|
heapTotal: 0,
|
||||||
|
external: 0,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
const type = dp.attributes?.['memory_type'];
|
||||||
|
const value = dp.value?.max ?? dp.value?.sum ?? 0;
|
||||||
|
|
||||||
|
if (type === 'heap_used') snapshots[timeKey].heapUsed = value;
|
||||||
|
else if (type === 'heap_total')
|
||||||
|
snapshots[timeKey].heapTotal = value;
|
||||||
|
else if (type === 'rss') snapshots[timeKey].rss = value;
|
||||||
|
else if (type === 'external')
|
||||||
|
snapshots[timeKey].external = value;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return Object.values(snapshots).sort((a, b) => a.timestamp - b.timestamp);
|
||||||
|
}
|
||||||
|
|
||||||
async runInteractive(options?: {
|
async runInteractive(options?: {
|
||||||
args?: string | string[];
|
args?: string | string[];
|
||||||
approvalMode?: 'default' | 'auto_edit' | 'yolo' | 'plan';
|
approvalMode?: 'default' | 'auto_edit' | 'yolo' | 'plan';
|
||||||
|
|||||||
Reference in New Issue
Block a user