feat(test): refactor the memory usage test to use metrics from CLI process instead of test runner (#25708)

2026-05-15 06:12:50 -07:00 · 2026-04-21 10:06:22 -07:00
parent dd4c41742b
commit 64645b6ff1
5 changed files with 284 additions and 223 deletions
@@ -1,55 +1,55 @@
 {
  "version": 1,
-  "updatedAt": "2026-04-10T15:36:04.547Z",
+  "updatedAt": "2026-04-20T18:04:59.671Z",
  "scenarios": {
    "multi-turn-conversation": {
-      "heapUsedBytes": 120082704,
+      "heapUsedMB": 68.8,
-      "heapTotalBytes": 177586176,
+      "heapTotalMB": 91.2,
-      "rssBytes": 269172736,
+      "rssMB": 215.4,
-      "externalBytes": 4304053,
+      "externalMB": 93.8,
-      "timestamp": "2026-04-10T15:35:17.603Z"
+      "timestamp": "2026-04-20T18:02:40.101Z"
    },
    "multi-function-call-repo-search": {
-      "heapUsedBytes": 104644984,
+      "heapUsedMB": 73.5,
-      "heapTotalBytes": 111575040,
+      "heapTotalMB": 93.1,
-      "rssBytes": 204079104,
+      "rssMB": 223.6,
-      "externalBytes": 4304053,
+      "externalMB": 97.7,
-      "timestamp": "2026-04-10T15:35:22.480Z"
+      "timestamp": "2026-04-20T18:02:42.032Z"
    },
    "idle-session-startup": {
-      "heapUsedBytes": 119813672,
+      "heapUsedMB": 69.8,
-      "heapTotalBytes": 177061888,
+      "heapTotalMB": 92.4,
-      "rssBytes": 267943936,
+      "rssMB": 217.4,
-      "externalBytes": 4304053,
+      "externalMB": 93.8,
-      "timestamp": "2026-04-10T15:35:08.035Z"
+      "timestamp": "2026-04-20T18:02:36.294Z"
    },
    "simple-prompt-response": {
-      "heapUsedBytes": 119722064,
+      "heapUsedMB": 69.5,
-      "heapTotalBytes": 177324032,
+      "heapTotalMB": 92.4,
-      "rssBytes": 268812288,
+      "rssMB": 216.1,
-      "externalBytes": 4304053,
+      "externalMB": 93.8,
-      "timestamp": "2026-04-10T15:35:12.770Z"
+      "timestamp": "2026-04-20T18:02:38.198Z"
    },
    "resume-large-chat-with-messages": {
-      "heapUsedBytes": 106545568,
+      "heapUsedMB": 887.1,
-      "heapTotalBytes": 111509504,
+      "heapTotalMB": 954.3,
-      "rssBytes": 202596352,
+      "rssMB": 1109.6,
-      "externalBytes": 4306101,
+      "externalMB": 103.2,
-      "timestamp": "2026-04-10T15:36:04.547Z"
+      "timestamp": "2026-04-20T18:04:59.671Z"
    },
    "resume-large-chat": {
-      "heapUsedBytes": 106513760,
+      "heapUsedMB": 885.6,
-      "heapTotalBytes": 111509504,
+      "heapTotalMB": 955.6,
-      "rssBytes": 202596352,
+      "rssMB": 1107.8,
-      "externalBytes": 4306101,
+      "externalMB": 110.5,
-      "timestamp": "2026-04-10T15:35:59.528Z"
+      "timestamp": "2026-04-20T18:04:06.526Z"
    },
    "large-chat": {
-      "heapUsedBytes": 106471568,
+      "heapUsedMB": 158.5,
-      "heapTotalBytes": 111509504,
+      "heapTotalMB": 193,
-      "rssBytes": 202596352,
+      "rssMB": 787.9,
-      "externalBytes": 4306101,
+      "externalMB": 104,
-      "timestamp": "2026-04-10T15:35:53.180Z"
+      "timestamp": "2026-04-20T18:03:12.486Z"
    }
  }
 }
@@ -16,15 +16,21 @@ import {
  mkdirSync,
  rmSync,
 } from 'node:fs';
-import { randomUUID } from 'node:crypto';
+import { randomUUID, createHash } from 'node:crypto';
 const __dirname = dirname(fileURLToPath(import.meta.url));
 const BASELINES_PATH = join(__dirname, 'baselines.json');
 const UPDATE_BASELINES = process.env['UPDATE_MEMORY_BASELINES'] === 'true';
 function getProjectHash(projectRoot: string): string {
  return createHash('sha256').update(projectRoot).digest('hex');
 }
 const TOLERANCE_PERCENT = 10;
 // Fake API key for tests using fake responses
-const TEST_ENV = { GEMINI_API_KEY: 'fake-memory-test-key' };
+const TEST_ENV = {
  GEMINI_API_KEY: 'fake-memory-test-key',
  GEMINI_MEMORY_MONITOR_INTERVAL: '100',
 };
 describe('Memory Usage Tests', () => {
  let harness: MemoryTestHarness;
@@ -56,6 +62,7 @@ describe('Memory Usage Tests', () => {
    });
    const result = await harness.runScenario(
      rig,
      'idle-session-startup',
      async (recordSnapshot) => {
        await rig.run({
@@ -85,6 +92,7 @@ describe('Memory Usage Tests', () => {
    });
    const result = await harness.runScenario(
      rig,
      'simple-prompt-response',
      async (recordSnapshot) => {
        await rig.run({
@@ -122,6 +130,7 @@ describe('Memory Usage Tests', () => {
    ];
    const result = await harness.runScenario(
      rig,
      'multi-turn-conversation',
      async (recordSnapshot) => {
        // Run through all turns as a piped sequence
@@ -144,6 +153,9 @@ describe('Memory Usage Tests', () => {
      );
    } else {
      harness.assertWithinBaseline(result);
      harness.assertMemoryReturnsToBaseline(result.snapshots, 20);
      const { leaked, message } = harness.analyzeSnapshots(result.snapshots);
      if (leaked) console.warn(`⚠ ${message}`);
    }
  });
@@ -168,6 +180,7 @@ describe('Memory Usage Tests', () => {
    );
    const result = await harness.runScenario(
      rig,
      'multi-function-call-repo-search',
      async (recordSnapshot) => {
        await rig.run({
@@ -189,6 +202,7 @@ describe('Memory Usage Tests', () => {
      );
    } else {
      harness.assertWithinBaseline(result);
      harness.assertMemoryReturnsToBaseline(result.snapshots, 20);
    }
  });
@@ -228,6 +242,7 @@ describe('Memory Usage Tests', () => {
      });
      const result = await harness.runScenario(
        rig,
        'large-chat',
        async (recordSnapshot) => {
          await rig.run({
@@ -257,19 +272,21 @@ describe('Memory Usage Tests', () => {
      });
      const result = await harness.runScenario(
        rig,
        'resume-large-chat',
        async (recordSnapshot) => {
          // Ensure the history file is linked
          const targetChatsDir = join(
-            rig.testDir!,
+            rig.homeDir!,
            '.gemini',
            'tmp',
-            'test-project-hash',
+            getProjectHash(rig.testDir!),
            'chats',
          );
          mkdirSync(targetChatsDir, { recursive: true });
          const targetHistoryPath = join(
            targetChatsDir,
-            'large-chat-session.json',
+            'session-large-chat.json',
          );
          if (existsSync(targetHistoryPath)) rmSync(targetHistoryPath);
          copyFileSync(sharedHistoryPath, targetHistoryPath);
@@ -302,19 +319,21 @@ describe('Memory Usage Tests', () => {
      });
      const result = await harness.runScenario(
        rig,
        'resume-large-chat-with-messages',
        async (recordSnapshot) => {
          // Ensure the history file is linked
          const targetChatsDir = join(
-            rig.testDir!,
+            rig.homeDir!,
            '.gemini',
            'tmp',
-            'test-project-hash',
+            getProjectHash(rig.testDir!),
            'chats',
          );
          mkdirSync(targetChatsDir, { recursive: true });
          const targetHistoryPath = join(
            targetChatsDir,
-            'large-chat-session.json',
+            'session-large-chat.json',
          );
          if (existsSync(targetHistoryPath)) rmSync(targetHistoryPath);
          copyFileSync(sharedHistoryPath, targetHistoryPath);
@@ -457,6 +476,9 @@ async function generateSharedLargeChatData(tempDir: string) {
  // Generate responses for resumed chat
  const resumeResponsesStream = createWriteStream(resumeResponsesPath);
  for (let i = 0; i < 5; i++) {
    // Doubling up on non-streaming responses to satisfy classifier and complexity checks
    resumeResponsesStream.write(JSON.stringify(complexityResponse) + '\n');
    resumeResponsesStream.write(JSON.stringify(summaryResponse) + '\n');
    resumeResponsesStream.write(JSON.stringify(complexityResponse) + '\n');
    resumeResponsesStream.write(
      JSON.stringify({
@@ -10,10 +10,10 @@ import { readFileSync, writeFileSync, existsSync } from 'node:fs';
 * Baseline entry for a single memory test scenario.
 */
 export interface MemoryBaseline {
-  heapUsedBytes: number;
+  heapUsedMB: number;
-  heapTotalBytes: number;
+  heapTotalMB: number;
-  rssBytes: number;
+  rssMB: number;
-  externalBytes: number;
+  externalMB: number;
  timestamp: string;
 }
@@ -61,18 +61,18 @@ export function updateBaseline(
  path: string,
  scenarioName: string,
  measured: {
-    heapUsedBytes: number;
+    heapUsedMB: number;
-    heapTotalBytes: number;
+    heapTotalMB: number;
-    rssBytes: number;
+    rssMB: number;
-    externalBytes: number;
+    externalMB: number;
  },
 ): void {
  const baselines = loadBaselines(path);
  baselines.scenarios[scenarioName] = {
-    heapUsedBytes: measured.heapUsedBytes,
+    heapUsedMB: measured.heapUsedMB,
-    heapTotalBytes: measured.heapTotalBytes,
+    heapTotalMB: measured.heapTotalMB,
-    rssBytes: measured.rssBytes,
+    rssMB: measured.rssMB,
-    externalBytes: measured.externalBytes,
+    externalMB: measured.externalMB,
    timestamp: new Date().toISOString(),
  };
  saveBaselines(path, baselines);
@@ -4,10 +4,9 @@
 * SPDX-License-Identifier: Apache-2.0
 */
 import v8 from 'node:v8';
 import { setTimeout as sleep } from 'node:timers/promises';
 import { loadBaselines, updateBaseline } from './memory-baselines.js';
 import type { MemoryBaseline, MemoryBaselineFile } from './memory-baselines.js';
 import type { TestRig } from './test-rig.js';
 /** Configuration for asciichart plot function. */
 interface PlotConfig {
@@ -28,9 +27,6 @@ export interface MemorySnapshot {
  heapTotal: number;
  rss: number;
  external: number;
  arrayBuffers: number;
  heapSizeLimit: number;
  heapSpaces: any[];
 }
 /**
@@ -64,16 +60,13 @@ export interface MemoryTestHarnessOptions {
  gcDelayMs?: number;
  /** Number of samples to take for median calculation. Default: 3 */
  sampleCount?: number;
  /** Pause in ms between samples. Default: 50 */
  samplePauseMs?: number;
 }
 /**
 * MemoryTestHarness provides infrastructure for running memory usage tests.
 *
 * It handles:
- * - Forcing V8 garbage collection to reduce noise
+ * - Extracting memory metrics from CLI process telemetry
 * - Taking V8 heap snapshots for accurate memory measurement
 * - Comparing against baselines with configurable tolerance
 * - Generating ASCII chart reports of memory trends
 */
@@ -81,88 +74,45 @@ export class MemoryTestHarness {
  private baselines: MemoryBaselineFile;
  private readonly baselinesPath: string;
  private readonly defaultTolerancePercent: number;
  private readonly gcCycles: number;
  private readonly gcDelayMs: number;
  private readonly sampleCount: number;
  private readonly samplePauseMs: number;
  private allResults: MemoryTestResult[] = [];
  constructor(options: MemoryTestHarnessOptions) {
    this.baselinesPath = options.baselinesPath;
    this.defaultTolerancePercent = options.defaultTolerancePercent ?? 10;
    this.gcCycles = options.gcCycles ?? 3;
    this.gcDelayMs = options.gcDelayMs ?? 100;
    this.sampleCount = options.sampleCount ?? 3;
    this.samplePauseMs = options.samplePauseMs ?? 50;
    this.baselines = loadBaselines(this.baselinesPath);
  }
  /**
-   * Force garbage collection multiple times and take a V8 heap snapshot.
+   * Extract memory snapshot from TestRig telemetry.
   * Forces GC multiple times with delays to allow weak references and
   * FinalizationRegistry callbacks to run, reducing measurement noise.
   */
-  async takeSnapshot(label: string = 'snapshot'): Promise<MemorySnapshot> {
+  async takeSnapshot(
-    await this.forceGC();
+    rig: TestRig,
-
+    label: string = 'snapshot',
-    const memUsage = process.memoryUsage();
+    strategy: 'peak' | 'last' = 'last',
    const heapStats = v8.getHeapStatistics();
    return {
      timestamp: Date.now(),
      label,
      heapUsed: memUsage.heapUsed,
      heapTotal: memUsage.heapTotal,
      rss: memUsage.rss,
      external: memUsage.external,
      arrayBuffers: memUsage.arrayBuffers,
      heapSizeLimit: heapStats.heap_size_limit,
      heapSpaces: v8.getHeapSpaceStatistics(),
    };
  }
  /**
   * Take multiple snapshot samples and return the median to reduce noise.
   */
  async takeMedianSnapshot(
    label: string = 'median',
    count?: number,
  ): Promise<MemorySnapshot> {
-    const samples: MemorySnapshot[] = [];
+    const metrics = rig.readMemoryMetrics(strategy);
    const numSamples = count ?? this.sampleCount;
    for (let i = 0; i < numSamples; i++) {
      samples.push(await this.takeSnapshot(`${label}_sample_${i}`));
      if (i < numSamples - 1) {
        await sleep(this.samplePauseMs);
      }
    }
    // Sort by heapUsed and take the median
    samples.sort((a, b) => a.heapUsed - b.heapUsed);
    const medianIdx = Math.floor(samples.length / 2);
    const median = samples[medianIdx]!;
    return {
-      ...median,
+      timestamp: metrics.timestamp,
      label,
-      timestamp: Date.now(),
+      heapUsed: metrics.heapUsed,
      heapTotal: metrics.heapTotal,
      rss: metrics.rss,
      external: metrics.external,
    };
  }
  /**
   * Run a memory test scenario.
   *
-   * Takes before/after snapshots around the scenario function, collects
+   * @param rig - The TestRig instance running the CLI
   * intermediate snapshots if the scenario provides them, and compares
   * the result against the stored baseline.
   *
   * @param name - Scenario name (must match baseline key)
   * @param fn - Async function that executes the scenario. Receives a
   *   `recordSnapshot` callback for recording intermediate snapshots.
   * @param tolerancePercent - Override default tolerance for this scenario
   */
  async runScenario(
    rig: TestRig,
    name: string,
    fn: (
      recordSnapshot: (label: string) => Promise<MemorySnapshot>,
@@ -172,27 +122,49 @@ export class MemoryTestHarness {
    const tolerance = tolerancePercent ?? this.defaultTolerancePercent;
    const snapshots: MemorySnapshot[] = [];
    // Record initial snapshot
    const beforeSnap = await this.takeSnapshot(rig, 'before');
    snapshots.push(beforeSnap);
    // Record a callback for intermediate snapshots
    const recordSnapshot = async (label: string): Promise<MemorySnapshot> => {
-      const snap = await this.takeMedianSnapshot(label);
+      // Small delay to allow telemetry to flush if needed
      await rig.waitForTelemetryReady();
      const snap = await this.takeSnapshot(rig, label);
      snapshots.push(snap);
      return snap;
    };
    // Before snapshot
    const beforeSnap = await this.takeMedianSnapshot('before');
    snapshots.push(beforeSnap);
    // Run the scenario
    await fn(recordSnapshot);
-    // After snapshot (median of multiple samples)
+    // Final wait for telemetry to ensure everything is flushed
-    const afterSnap = await this.takeMedianSnapshot('after');
+    await rig.waitForTelemetryReady();
    // After snapshot
    const afterSnap = await this.takeSnapshot(rig, 'after');
    snapshots.push(afterSnap);
-    // Calculate peak values
+    // Calculate peak values from ALL snapshots seen during the scenario
-    const peakHeapUsed = Math.max(...snapshots.map((s) => s.heapUsed));
+    const allSnapshots = rig.readAllMemorySnapshots();
-    const peakRss = Math.max(...snapshots.map((s) => s.rss));
+    const scenarioSnapshots = allSnapshots.filter(
      (s) =>
        s.timestamp >= beforeSnap.timestamp &&
        s.timestamp <= afterSnap.timestamp,
    );
    const peakHeapUsed = Math.max(
      ...scenarioSnapshots.map((s) => s.heapUsed),
      ...snapshots.map((s) => s.heapUsed),
    );
    const peakRss = Math.max(
      ...scenarioSnapshots.map((s) => s.rss),
      ...snapshots.map((s) => s.rss),
    );
    const peakExternal = Math.max(
      ...scenarioSnapshots.map((s) => s.external),
      ...snapshots.map((s) => s.external),
    );
    // Get baseline
    const baseline = this.baselines.scenarios[name];
@@ -202,15 +174,12 @@ export class MemoryTestHarness {
    let withinTolerance = true;
    if (baseline) {
      const measuredMB = afterSnap.heapUsed / (1024 * 1024);
      deltaPercent =
-        ((afterSnap.heapUsed - baseline.heapUsedBytes) /
+        ((measuredMB - baseline.heapUsedMB) / baseline.heapUsedMB) * 100;
          baseline.heapUsedBytes) *
        100;
      withinTolerance = deltaPercent <= tolerance;
    }
    const peakExternal = Math.max(...snapshots.map((s) => s.external));
    const result: MemoryTestResult = {
      scenarioName: name,
      snapshots,
@@ -248,16 +217,16 @@ export class MemoryTestHarness {
      return; // Don't fail if no baseline exists yet
    }
    const measuredMB = result.finalHeapUsed / (1024 * 1024);
    const deltaPercent =
-      ((result.finalHeapUsed - result.baseline.heapUsedBytes) /
+      ((measuredMB - result.baseline.heapUsedMB) / result.baseline.heapUsedMB) *
        result.baseline.heapUsedBytes) *
      100;
    if (deltaPercent > tolerance) {
      throw new Error(
        `Memory regression detected for "${result.scenarioName}"!\n` +
          `  Measured:  ${formatMB(result.finalHeapUsed)} heap used\n` +
-          `  Baseline:  ${formatMB(result.baseline.heapUsedBytes)} heap used\n` +
+          `  Baseline:  ${result.baseline.heapUsedMB.toFixed(1)} MB heap used\n` +
          `  Delta:     ${deltaPercent.toFixed(1)}% (tolerance: ${tolerance}%)\n` +
          `  Peak heap: ${formatMB(result.peakHeapUsed)}\n` +
          `  Peak RSS:  ${formatMB(result.peakRss)}\n` +
@@ -270,20 +239,22 @@ export class MemoryTestHarness {
   * Update the baseline for a scenario with the current measured values.
   */
  updateScenarioBaseline(result: MemoryTestResult): void {
    const lastSnapshot = result.snapshots[result.snapshots.length - 1];
    updateBaseline(this.baselinesPath, result.scenarioName, {
-      heapUsedBytes: result.finalHeapUsed,
+      heapUsedMB: Number((result.finalHeapUsed / (1024 * 1024)).toFixed(1)),
-      heapTotalBytes:
+      heapTotalMB: Number(
-        result.snapshots[result.snapshots.length - 1]?.heapTotal ?? 0,
+        ((lastSnapshot?.heapTotal ?? 0) / (1024 * 1024)).toFixed(1),
-      rssBytes: result.finalRss,
+      ),
-      externalBytes: result.finalExternal,
+      rssMB: Number((result.finalRss / (1024 * 1024)).toFixed(1)),
      externalMB: Number((result.finalExternal / (1024 * 1024)).toFixed(1)),
    });
    // Reload baselines after update
    this.baselines = loadBaselines(this.baselinesPath);
  }
  /**
-   * Analyze snapshots to detect sustained leaks across 3 snapshots.
+   * Analyze snapshots to detect sustained leaks.
-   * A leak is flagged if growth is observed in both phases for any heap space.
+   * A leak is flagged if growth is observed in both phases.
   */
  analyzeSnapshots(
    snapshots: MemorySnapshot[],
@@ -297,55 +268,20 @@ export class MemoryTestHarness {
    const snap2 = snapshots[snapshots.length - 2];
    const snap3 = snapshots[snapshots.length - 1];
-    if (!snap1 || !snap2 || !snap3) {
+    const growth1 = snap2.heapUsed - snap1.heapUsed;
-      return { leaked: false, message: 'Missing snapshots' };
+    const growth2 = snap3.heapUsed - snap2.heapUsed;
    }
-    const spaceNames = new Set<string>();
+    const leaked = growth1 > thresholdBytes && growth2 > thresholdBytes;
-    snap1.heapSpaces.forEach((s: any) => spaceNames.add(s.space_name));
+    let message = leaked
-    snap2.heapSpaces.forEach((s: any) => spaceNames.add(s.space_name));
+      ? `Memory bloat detected: sustained growth (${formatMB(growth1)} -> ${formatMB(growth2)})`
-    snap3.heapSpaces.forEach((s: any) => spaceNames.add(s.space_name));
+      : `No sustained growth detected above threshold.`;
-    let hasSustainedGrowth = false;
+    return { leaked, message };
    const growthDetails: string[] = [];
    for (const name of spaceNames) {
      const size1 =
        snap1.heapSpaces.find((s: any) => s.space_name === name)
          ?.space_used_size ?? 0;
      const size2 =
        snap2.heapSpaces.find((s: any) => s.space_name === name)
          ?.space_used_size ?? 0;
      const size3 =
        snap3.heapSpaces.find((s: any) => s.space_name === name)
          ?.space_used_size ?? 0;
      const growth1 = size2 - size1;
      const growth2 = size3 - size2;
      if (growth1 > thresholdBytes && growth2 > thresholdBytes) {
        hasSustainedGrowth = true;
        growthDetails.push(
          `${name}: sustained growth (${formatMB(growth1)} -> ${formatMB(growth2)})`,
        );
      }
    }
    let message = '';
    if (hasSustainedGrowth) {
      message =
        `Memory bloat detected in heap spaces:\n  ` +
        growthDetails.join('\n  ');
    } else {
      message = `No sustained growth detected in any heap space above threshold.`;
    }
    return { leaked: hasSustainedGrowth, message };
  }
  /**
   * Assert that memory returns to a baseline level after a peak.
-   * Useful for verifying that large tool outputs are not retained.
+   * Useful for verifying that large tool outputs or history are not retained.
   */
  assertMemoryReturnsToBaseline(
    snapshots: MemorySnapshot[],
@@ -355,26 +291,22 @@ export class MemoryTestHarness {
      throw new Error('Need at least 3 snapshots to check return to baseline');
    }
-    const baseline = snapshots[0]; // Assume first is baseline
+    // Find the first non-zero snapshot as baseline
-    const peak = snapshots.reduce(
+    const baseline = snapshots.find((s) => s.heapUsed > 0);
-      (max, s) => (s.heapUsed > max.heapUsed ? s : max),
+    if (!baseline) {
-      snapshots[0],
+      return; // No memory reported yet
    );
    const final = snapshots[snapshots.length - 1];
    if (!baseline || !peak || !final) {
      throw new Error('Missing snapshots for return to baseline check');
    }
    const final = snapshots[snapshots.length - 1]!;
    const tolerance = baseline.heapUsed * (tolerancePercent / 100);
    const delta = final.heapUsed - baseline.heapUsed;
    if (delta > tolerance) {
      throw new Error(
        `Memory did not return to baseline!\n` +
-          `  Baseline: ${formatMB(baseline.heapUsed)}\n` +
+          `  Baseline: ${formatMB(baseline.heapUsed)} (${baseline.label})\n` +
-          `  Peak:     ${formatMB(peak.heapUsed)}\n` +
+          `  Final:    ${formatMB(final.heapUsed)} (${final.label})\n` +
          `  Final:    ${formatMB(final.heapUsed)}\n` +
          `  Delta:    ${formatMB(delta)} (tolerance: ${formatMB(tolerance)})`,
      );
    }
@@ -397,7 +329,7 @@ export class MemoryTestHarness {
    for (const result of resultsToReport) {
      const measured = formatMB(result.finalHeapUsed);
      const baseline = result.baseline
-        ? formatMB(result.baseline.heapUsedBytes)
+        ? `${result.baseline.heapUsedMB.toFixed(1)} MB`
        : 'N/A';
      const delta = result.baseline
        ? `${result.deltaPercent >= 0 ? '+' : ''}${result.deltaPercent.toFixed(1)}%`
@@ -461,26 +393,6 @@ export class MemoryTestHarness {
    console.log(report);
    return report;
  }
  /**
   * Force V8 garbage collection.
   * Runs multiple GC cycles with delays to allow weak references
   * and FinalizationRegistry callbacks to run.
   */
  private async forceGC(): Promise<void> {
    if (typeof globalThis.gc !== 'function') {
      throw new Error(
        'global.gc() not available. Run with --expose-gc for accurate measurements.',
      );
    }
    for (let i = 0; i < this.gcCycles; i++) {
      globalThis.gc();
      if (i < this.gcCycles - 1) {
        await sleep(this.gcDelayMs);
      }
    }
  }
 }
 /**
@@ -1475,7 +1475,7 @@ export class TestRig {
  readMetric(metricName: string): TelemetryMetric | null {
    const logs = this._readAndParseTelemetryLog();
    for (const logData of logs) {
-      if (logData.scopeMetrics) {
+      if (logData && logData.scopeMetrics) {
        for (const scopeMetric of logData.scopeMetrics) {
          for (const metric of scopeMetric.metrics) {
            if (metric.descriptor.name === `gemini_cli.${metricName}`) {
@@ -1488,6 +1488,133 @@ export class TestRig {
    return null;
  }
  readMemoryMetrics(strategy: 'peak' | 'last' = 'peak'): {
    timestamp: number;
    heapUsed: number;
    heapTotal: number;
    rss: number;
    external: number;
  } {
    const snapshots = this._getMemorySnapshots();
    if (snapshots.length === 0) {
      return {
        timestamp: Date.now(),
        heapUsed: 0,
        heapTotal: 0,
        rss: 0,
        external: 0,
      };
    }
    if (strategy === 'last') {
      const last = snapshots[snapshots.length - 1];
      return {
        timestamp: last.timestamp,
        heapUsed: last.heapUsed,
        heapTotal: last.heapTotal,
        rss: last.rss,
        external: last.external,
      };
    }
    // Find the snapshot with the highest RSS
    let peak = snapshots[0];
    for (const snapshot of snapshots) {
      if (snapshot.rss > peak.rss) {
        peak = snapshot;
      }
    }
    // Fallback: if we didn't find any RSS but found heap, use the max heap
    if (peak.rss === 0) {
      for (const snapshot of snapshots) {
        if (snapshot.heapUsed > peak.heapUsed) {
          peak = snapshot;
        }
      }
    }
    return {
      timestamp: peak.timestamp,
      heapUsed: peak.heapUsed,
      heapTotal: peak.heapTotal,
      rss: peak.rss,
      external: peak.external,
    };
  }
  readAllMemorySnapshots(): {
    timestamp: number;
    heapUsed: number;
    heapTotal: number;
    rss: number;
    external: number;
  }[] {
    return this._getMemorySnapshots();
  }
  private _getMemorySnapshots(): {
    timestamp: number;
    heapUsed: number;
    heapTotal: number;
    rss: number;
    external: number;
  }[] {
    const snapshots: Record<
      string,
      {
        timestamp: number;
        heapUsed: number;
        heapTotal: number;
        rss: number;
        external: number;
      }
    > = {};
    const logs = this._readAndParseTelemetryLog();
    for (const logData of logs) {
      if (logData && logData.scopeMetrics) {
        for (const scopeMetric of logData.scopeMetrics) {
          for (const metric of scopeMetric.metrics) {
            if (metric.descriptor.name === 'gemini_cli.memory.usage') {
              for (const dp of metric.dataPoints) {
                const sessionId =
                  (dp.attributes?.['session.id'] as string) || 'unknown';
                const component =
                  (dp.attributes?.['component'] as string) || 'unknown';
                const seconds = dp.startTime?.[0] || 0;
                const nanos = dp.startTime?.[1] || 0;
                const timeKey = `${sessionId}-${component}-${seconds}-${nanos}`;
                if (!snapshots[timeKey]) {
                  snapshots[timeKey] = {
                    timestamp: seconds * 1000 + Math.floor(nanos / 1000000),
                    rss: 0,
                    heapUsed: 0,
                    heapTotal: 0,
                    external: 0,
                  };
                }
                const type = dp.attributes?.['memory_type'];
                const value = dp.value?.max ?? dp.value?.sum ?? 0;
                if (type === 'heap_used') snapshots[timeKey].heapUsed = value;
                else if (type === 'heap_total')
                  snapshots[timeKey].heapTotal = value;
                else if (type === 'rss') snapshots[timeKey].rss = value;
                else if (type === 'external')
                  snapshots[timeKey].external = value;
              }
            }
          }
        }
      }
    }
    return Object.values(snapshots).sort((a, b) => a.timestamp - b.timestamp);
  }
  async runInteractive(options?: {
    args?: string | string[];
    approvalMode?: 'default' | 'auto_edit' | 'yolo' | 'plan';