feat(test-utils): add memory usage integration test harness (#24876)

2026-04-11 22:00:41 -07:00 · 2026-04-08 10:42:18 -07:00
parent 34b4f1c6e4
commit 4ebc43bc66
18 changed files with 1021 additions and 3 deletions
--- a/memory-tests/baselines.json
+++ b/memory-tests/baselines.json
@@ -0,0 +1,30 @@
+{
+  "version": 1,
+  "updatedAt": "2026-04-08T01:21:58.770Z",
+  "scenarios": {
+    "multi-turn-conversation": {
+      "heapUsedBytes": 120082704,
+      "heapTotalBytes": 177586176,
+      "rssBytes": 269172736,
+      "timestamp": "2026-04-08T01:21:57.127Z"
+    },
+    "multi-function-call-repo-search": {
+      "heapUsedBytes": 104644984,
+      "heapTotalBytes": 111575040,
+      "rssBytes": 204079104,
+      "timestamp": "2026-04-08T01:21:58.770Z"
+    },
+    "idle-session-startup": {
+      "heapUsedBytes": 119813672,
+      "heapTotalBytes": 177061888,
+      "rssBytes": 267943936,
+      "timestamp": "2026-04-08T01:21:53.855Z"
+    },
+    "simple-prompt-response": {
+      "heapUsedBytes": 119722064,
+      "heapTotalBytes": 177324032,
+      "rssBytes": 268812288,
+      "timestamp": "2026-04-08T01:21:55.491Z"
+    }
+  }
+}
--- a/memory-tests/globalSetup.ts
+++ b/memory-tests/globalSetup.ts
@@ -0,0 +1,71 @@
+/**
+ * @license
+ * Copyright 2026 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import { mkdir, readdir, rm } from 'node:fs/promises';
+import { join, dirname } from 'node:path';
+import { fileURLToPath } from 'node:url';
+import { canUseRipgrep } from '../packages/core/src/tools/ripGrep.js';
+
+const __dirname = dirname(fileURLToPath(import.meta.url));
+const rootDir = join(__dirname, '..');
+const memoryTestsDir = join(rootDir, '.memory-tests');
+let runDir = '';
+
+export async function setup() {
+  runDir = join(memoryTestsDir, `${Date.now()}`);
+  await mkdir(runDir, { recursive: true });
+
+  // Set the home directory to the test run directory to avoid conflicts
+  // with the user's local config.
+  process.env['HOME'] = runDir;
+  if (process.platform === 'win32') {
+    process.env['USERPROFILE'] = runDir;
+  }
+  process.env['GEMINI_CONFIG_DIR'] = join(runDir, '.gemini');
+
+  // Download ripgrep to avoid race conditions
+  const available = await canUseRipgrep();
+  if (!available) {
+    throw new Error('Failed to download ripgrep binary');
+  }
+
+  // Clean up old test runs, keeping the latest few for debugging
+  try {
+    const testRuns = await readdir(memoryTestsDir);
+    if (testRuns.length > 3) {
+      const oldRuns = testRuns.sort().slice(0, testRuns.length - 3);
+      await Promise.all(
+        oldRuns.map((oldRun) =>
+          rm(join(memoryTestsDir, oldRun), {
+            recursive: true,
+            force: true,
+          }),
+        ),
+      );
+    }
+  } catch (e) {
+    console.error('Error cleaning up old memory test runs:', e);
+  }
+
+  process.env['INTEGRATION_TEST_FILE_DIR'] = runDir;
+  process.env['GEMINI_CLI_INTEGRATION_TEST'] = 'true';
+  process.env['GEMINI_FORCE_FILE_STORAGE'] = 'true';
+  process.env['TELEMETRY_LOG_FILE'] = join(runDir, 'telemetry.log');
+  process.env['VERBOSE'] = process.env['VERBOSE'] ?? 'false';
+
+  console.log(`\nMemory test output directory: ${runDir}`);
+}
+
+export async function teardown() {
+  // Cleanup unless KEEP_OUTPUT is set
+  if (process.env['KEEP_OUTPUT'] !== 'true' && runDir) {
+    try {
+      await rm(runDir, { recursive: true, force: true });
+    } catch (e) {
+      console.warn('Failed to clean up memory test directory:', e);
+    }
+  }
+}
--- a/memory-tests/memory-usage.test.ts
+++ b/memory-tests/memory-usage.test.ts
@@ -0,0 +1,185 @@
+/**
+ * @license
+ * Copyright 2026 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import { describe, it, beforeAll, afterAll, afterEach } from 'vitest';
+import { TestRig, MemoryTestHarness } from '@google/gemini-cli-test-utils';
+import { join, dirname } from 'node:path';
+import { fileURLToPath } from 'node:url';
+
+const __dirname = dirname(fileURLToPath(import.meta.url));
+const BASELINES_PATH = join(__dirname, 'baselines.json');
+const UPDATE_BASELINES = process.env['UPDATE_MEMORY_BASELINES'] === 'true';
+const TOLERANCE_PERCENT = 10;
+
+// Fake API key for tests using fake responses
+const TEST_ENV = { GEMINI_API_KEY: 'fake-memory-test-key' };
+
+describe('Memory Usage Tests', () => {
+  let harness: MemoryTestHarness;
+  let rig: TestRig;
+
+  beforeAll(() => {
+    harness = new MemoryTestHarness({
+      baselinesPath: BASELINES_PATH,
+      defaultTolerancePercent: TOLERANCE_PERCENT,
+      gcCycles: 3,
+      gcDelayMs: 100,
+      sampleCount: 3,
+    });
+  });
+
+  afterEach(async () => {
+    await rig.cleanup();
+  });
+
+  afterAll(async () => {
+    // Generate the summary report after all tests
+    await harness.generateReport();
+  });
+
+  it('idle-session-startup: memory usage within baseline', async () => {
+    rig = new TestRig();
+    rig.setup('memory-idle-startup', {
+      fakeResponsesPath: join(__dirname, 'memory.idle-startup.responses'),
+    });
+
+    const result = await harness.runScenario(
+      'idle-session-startup',
+      async (recordSnapshot) => {
+        await rig.run({
+          args: ['hello'],
+          timeout: 120000,
+          env: TEST_ENV,
+        });
+
+        await recordSnapshot('after-startup');
+      },
+    );
+
+    if (UPDATE_BASELINES) {
+      harness.updateScenarioBaseline(result);
+      console.log(
+        `Updated baseline for idle-session-startup: ${(result.finalHeapUsed / (1024 * 1024)).toFixed(1)} MB`,
+      );
+    } else {
+      harness.assertWithinBaseline(result);
+    }
+  });
+
+  it('simple-prompt-response: memory usage within baseline', async () => {
+    rig = new TestRig();
+    rig.setup('memory-simple-prompt', {
+      fakeResponsesPath: join(__dirname, 'memory.simple-prompt.responses'),
+    });
+
+    const result = await harness.runScenario(
+      'simple-prompt-response',
+      async (recordSnapshot) => {
+        await rig.run({
+          args: ['What is the capital of France?'],
+          timeout: 120000,
+          env: TEST_ENV,
+        });
+
+        await recordSnapshot('after-response');
+      },
+    );
+
+    if (UPDATE_BASELINES) {
+      harness.updateScenarioBaseline(result);
+      console.log(
+        `Updated baseline for simple-prompt-response: ${(result.finalHeapUsed / (1024 * 1024)).toFixed(1)} MB`,
+      );
+    } else {
+      harness.assertWithinBaseline(result);
+    }
+  });
+
+  it('multi-turn-conversation: memory remains stable over turns', async () => {
+    rig = new TestRig();
+    rig.setup('memory-multi-turn', {
+      fakeResponsesPath: join(__dirname, 'memory.multi-turn.responses'),
+    });
+
+    const prompts = [
+      'Hello, what can you help me with?',
+      'Tell me about JavaScript',
+      'How is TypeScript different?',
+      'Can you write a simple TypeScript function?',
+      'What are some TypeScript best practices?',
+    ];
+
+    const result = await harness.runScenario(
+      'multi-turn-conversation',
+      async (recordSnapshot) => {
+        // Run through all turns as a piped sequence
+        const stdinContent = prompts.join('\n');
+        await rig.run({
+          stdin: stdinContent,
+          timeout: 120000,
+          env: TEST_ENV,
+        });
+
+        // Take snapshots after the conversation completes
+        await recordSnapshot('after-all-turns');
+      },
+    );
+
+    if (UPDATE_BASELINES) {
+      harness.updateScenarioBaseline(result);
+      console.log(
+        `Updated baseline for multi-turn-conversation: ${(result.finalHeapUsed / (1024 * 1024)).toFixed(1)} MB`,
+      );
+    } else {
+      harness.assertWithinBaseline(result);
+    }
+  });
+
+  it('multi-function-call-repo-search: memory after tool use', async () => {
+    rig = new TestRig();
+    rig.setup('memory-multi-func-call', {
+      fakeResponsesPath: join(
+        __dirname,
+        'memory.multi-function-call.responses',
+      ),
+    });
+
+    // Create directories first, then files in the workspace so the tools have targets
+    rig.mkdir('packages/core/src/telemetry');
+    rig.createFile(
+      'packages/core/src/telemetry/memory-monitor.ts',
+      'export class MemoryMonitor { constructor() {} }',
+    );
+    rig.createFile(
+      'packages/core/src/telemetry/metrics.ts',
+      'export function recordMemoryUsage() {}',
+    );
+
+    const result = await harness.runScenario(
+      'multi-function-call-repo-search',
+      async (recordSnapshot) => {
+        await rig.run({
+          args: [
+            'Search this repository for MemoryMonitor and tell me what it does',
+          ],
+          timeout: 120000,
+          env: TEST_ENV,
+        });
+
+        await recordSnapshot('after-tool-calls');
+      },
+    );
+
+    if (UPDATE_BASELINES) {
+      harness.updateScenarioBaseline(result);
+      console.log(
+        `Updated baseline for multi-function-call-repo-search: ${(result.finalHeapUsed / (1024 * 1024)).toFixed(1)} MB`,
+      );
+    } else {
+      harness.assertWithinBaseline(result);
+    }
+  });
+});
--- a/memory-tests/memory.idle-startup.responses
+++ b/memory-tests/memory.idle-startup.responses
@@ -0,0 +1,2 @@
+{"method":"generateContent","response":{"candidates":[{"content":{"parts":[{"text":"0"}],"role":"model"},"finishReason":"STOP","index":0}]}}
+{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"Hello! I'm ready to help. What would you like to work on?"}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":5,"candidatesTokenCount":12,"totalTokenCount":17,"promptTokensDetails":[{"modality":"TEXT","tokenCount":5}]}}]}
--- a/memory-tests/memory.multi-function-call.responses
+++ b/memory-tests/memory.multi-function-call.responses
@@ -0,0 +1,4 @@
+{"method":"generateContent","response":{"candidates":[{"content":{"parts":[{"text":"0"}],"role":"model"},"finishReason":"STOP","index":0}]}}
+{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"I'll search for MemoryMonitor in the repository and analyze what it does."}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":30,"candidatesTokenCount":15,"totalTokenCount":45,"promptTokensDetails":[{"modality":"TEXT","tokenCount":30}]}}]}
+{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"functionCall":{"name":"grep_search","args":{"pattern":"MemoryMonitor","path":".","include_pattern":"*.ts"}}},{"functionCall":{"name":"list_directory","args":{"path":"packages/core/src/telemetry"}}},{"functionCall":{"name":"read_file","args":{"file_path":"packages/core/src/telemetry/memory-monitor.ts"}}}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":30,"candidatesTokenCount":80,"totalTokenCount":110,"promptTokensDetails":[{"modality":"TEXT","tokenCount":30}]}}]}
+{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"I found the memory monitoring code. Here's a summary:\n\nThe `MemoryMonitor` class in `packages/core/src/telemetry/memory-monitor.ts` provides:\n\n1. **Continuous monitoring** via `start()`/`stop()` with configurable intervals\n2. **V8 heap snapshots** using `v8.getHeapStatistics()` and `process.memoryUsage()`\n3. **High-water mark tracking** to detect significant memory growth\n4. **Rate-limited recording** to avoid metric flood\n5. **Activity detection** — only records when user is active\n\nThe class uses a singleton pattern via `initializeMemoryMonitor()` for global access."}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":500,"candidatesTokenCount":120,"totalTokenCount":620,"promptTokensDetails":[{"modality":"TEXT","tokenCount":500}]}}]}
--- a/memory-tests/memory.multi-turn.responses
+++ b/memory-tests/memory.multi-turn.responses
@@ -0,0 +1,10 @@
+{"method":"generateContent","response":{"candidates":[{"content":{"parts":[{"text":"0"}],"role":"model"},"finishReason":"STOP","index":0}]}}
+{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"Hello! I'm ready to help you with your coding tasks. What would you like to work on today?"}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":5,"candidatesTokenCount":18,"totalTokenCount":23,"promptTokensDetails":[{"modality":"TEXT","tokenCount":5}]}}]}
+{"method":"generateContent","response":{"candidates":[{"content":{"parts":[{"text":"0"}],"role":"model"},"finishReason":"STOP","index":0}]}}
+{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"JavaScript is a high-level, interpreted programming language. It was originally designed for adding interactivity to web pages."}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":25,"candidatesTokenCount":60,"totalTokenCount":85,"promptTokensDetails":[{"modality":"TEXT","tokenCount":25}]}}]}
+{"method":"generateContent","response":{"candidates":[{"content":{"parts":[{"text":"0"}],"role":"model"},"finishReason":"STOP","index":0}]}}
+{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"TypeScript is a typed superset of JavaScript developed by Microsoft. The main differences from JavaScript are static typing and better tooling."}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":45,"candidatesTokenCount":80,"totalTokenCount":125,"promptTokensDetails":[{"modality":"TEXT","tokenCount":45}]}}]}
+{"method":"generateContent","response":{"candidates":[{"content":{"parts":[{"text":"0"}],"role":"model"},"finishReason":"STOP","index":0}]}}
+{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"Here is a simple TypeScript function:\n\nfunction greet(name: string): string { return `Hello, ${name}!`; }"}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":60,"candidatesTokenCount":55,"totalTokenCount":115,"promptTokensDetails":[{"modality":"TEXT","tokenCount":60}]}}]}
+{"method":"generateContent","response":{"candidates":[{"content":{"parts":[{"text":"0"}],"role":"model"},"finishReason":"STOP","index":0}]}}
+{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"Here are 5 key TypeScript best practices: Enable strict mode, prefer interfaces, use union types, leverage type inference, and use readonly."}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":75,"candidatesTokenCount":70,"totalTokenCount":145,"promptTokensDetails":[{"modality":"TEXT","tokenCount":75}]}}]}
--- a/memory-tests/memory.simple-prompt.responses
+++ b/memory-tests/memory.simple-prompt.responses
@@ -0,0 +1,2 @@
+{"method":"generateContent","response":{"candidates":[{"content":{"parts":[{"text":"0"}],"role":"model"},"finishReason":"STOP","index":0}]}}
+{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"The capital of France is Paris. It has been the capital since the 10th century and is known for iconic landmarks like the Eiffel Tower, the Louvre Museum, and Notre-Dame Cathedral. Paris is also the most populous city in France, with a metropolitan area population of over 12 million people."}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":7,"candidatesTokenCount":55,"totalTokenCount":62,"promptTokensDetails":[{"modality":"TEXT","tokenCount":7}]}}]}
--- a/memory-tests/tsconfig.json
+++ b/memory-tests/tsconfig.json
@@ -0,0 +1,12 @@
+{
+  "extends": "../tsconfig.json",
+  "compilerOptions": {
+    "noEmit": true,
+    "allowJs": true
+  },
+  "include": ["**/*.ts"],
+  "references": [
+    { "path": "../packages/core" },
+    { "path": "../packages/test-utils" }
+  ]
+}
--- a/memory-tests/vitest.config.ts
+++ b/memory-tests/vitest.config.ts
@@ -0,0 +1,28 @@
+/**
+ * @license
+ * Copyright 2026 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import { defineConfig } from 'vitest/config';
+
+export default defineConfig({
+  test: {
+    testTimeout: 600000, // 10 minutes — memory profiling is slow
+    globalSetup: './globalSetup.ts',
+    reporters: ['default'],
+    include: ['**/*.test.ts'],
+    retry: 0, // No retries for memory tests — noise is handled by tolerance
+    fileParallelism: false, // Must run serially to avoid memory interference
+    pool: 'forks', // Use forks pool for --expose-gc support
+    poolOptions: {
+      forks: {
+        singleFork: true, // Single process for accurate per-test memory readings
+        execArgv: ['--expose-gc'], // Enable global.gc() for forced GC
+      },
+    },
+    env: {
+      GEMINI_TEST_TYPE: 'memory',
+    },
+  },
+});