diff --git a/.github/workflows/memory-nightly.yml b/.github/workflows/memory-nightly.yml new file mode 100644 index 0000000000..ee4e5e589c --- /dev/null +++ b/.github/workflows/memory-nightly.yml @@ -0,0 +1,33 @@ +name: 'Memory Tests: Nightly' + +on: + schedule: + - cron: '0 2 * * *' # Runs at 2 AM every day + workflow_dispatch: # Allow manual trigger + +permissions: + contents: 'read' + +jobs: + memory-test: + name: 'Run Memory Usage Tests' + runs-on: 'gemini-cli-ubuntu-16-core' + if: "github.repository == 'google-gemini/gemini-cli'" + steps: + - name: 'Checkout' + uses: 'actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8' # ratchet:actions/checkout@v5 + + - name: 'Set up Node.js' + uses: 'actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020' # ratchet:actions/setup-node@v4 + with: + node-version-file: '.nvmrc' + cache: 'npm' + + - name: 'Install dependencies' + run: 'npm ci' + + - name: 'Build project' + run: 'npm run build' + + - name: 'Run Memory Tests' + run: 'npm run test:memory' diff --git a/GEMINI.md b/GEMINI.md index c08e486b22..60824972d3 100644 --- a/GEMINI.md +++ b/GEMINI.md @@ -44,6 +44,8 @@ powerful tool for developers. - **Test Commands:** - **Unit (All):** `npm run test` - **Integration (E2E):** `npm run test:e2e` + - **Memory (Nightly):** `npm run test:memory` (Runs memory regression tests + against baselines. Excluded from `preflight`, run nightly.) - **Workspace-Specific:** `npm test -w -- ` (Note: `` must be relative to the workspace root, e.g., `-w @google/gemini-cli-core -- src/routing/modelRouterService.test.ts`) diff --git a/docs/integration-tests.md b/docs/integration-tests.md index f5784c344b..bfed813ebc 100644 --- a/docs/integration-tests.md +++ b/docs/integration-tests.md @@ -117,6 +117,46 @@ npm run test:integration:sandbox:docker npm run test:integration:sandbox:podman ``` +## Memory regression tests + +Memory regression tests are designed to detect heap growth and leaks across key +CLI scenarios. They are located in the `memory-tests` directory. + +These tests are distinct from standard integration tests because they measure +memory usage and compare it against committed baselines. + +### Running memory tests + +Memory tests are not run as part of the default `npm run test` or +`npm run test:e2e` commands. They are run nightly in CI but can be run manually: + +```bash +npm run test:memory +``` + +### Updating baselines + +If you intentionally change behavior that affects memory usage, you may need to +update the baselines. Set the `UPDATE_MEMORY_BASELINES` environment variable to +`true`: + +```bash +UPDATE_MEMORY_BASELINES=true npm run test:memory +``` + +This will run the tests, take median snapshots, and overwrite +`memory-tests/baselines.json`. You should review the changes and commit the +updated baseline file. + +### How it works + +The harness (`MemoryTestHarness` in `packages/test-utils`): + +- Forces garbage collection multiple times to reduce noise. +- Takes median snapshots to filter spikes. +- Compares against baselines with a 10% tolerance. +- Can analyze sustained leaks across 3 snapshots using `analyzeSnapshots()`. + ## Diagnostics The integration test runner provides several options for diagnostics to help diff --git a/memory-tests/baselines.json b/memory-tests/baselines.json new file mode 100644 index 0000000000..0fcab5dc02 --- /dev/null +++ b/memory-tests/baselines.json @@ -0,0 +1,30 @@ +{ + "version": 1, + "updatedAt": "2026-04-08T01:21:58.770Z", + "scenarios": { + "multi-turn-conversation": { + "heapUsedBytes": 120082704, + "heapTotalBytes": 177586176, + "rssBytes": 269172736, + "timestamp": "2026-04-08T01:21:57.127Z" + }, + "multi-function-call-repo-search": { + "heapUsedBytes": 104644984, + "heapTotalBytes": 111575040, + "rssBytes": 204079104, + "timestamp": "2026-04-08T01:21:58.770Z" + }, + "idle-session-startup": { + "heapUsedBytes": 119813672, + "heapTotalBytes": 177061888, + "rssBytes": 267943936, + "timestamp": "2026-04-08T01:21:53.855Z" + }, + "simple-prompt-response": { + "heapUsedBytes": 119722064, + "heapTotalBytes": 177324032, + "rssBytes": 268812288, + "timestamp": "2026-04-08T01:21:55.491Z" + } + } +} diff --git a/memory-tests/globalSetup.ts b/memory-tests/globalSetup.ts new file mode 100644 index 0000000000..3f52501838 --- /dev/null +++ b/memory-tests/globalSetup.ts @@ -0,0 +1,71 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { mkdir, readdir, rm } from 'node:fs/promises'; +import { join, dirname } from 'node:path'; +import { fileURLToPath } from 'node:url'; +import { canUseRipgrep } from '../packages/core/src/tools/ripGrep.js'; + +const __dirname = dirname(fileURLToPath(import.meta.url)); +const rootDir = join(__dirname, '..'); +const memoryTestsDir = join(rootDir, '.memory-tests'); +let runDir = ''; + +export async function setup() { + runDir = join(memoryTestsDir, `${Date.now()}`); + await mkdir(runDir, { recursive: true }); + + // Set the home directory to the test run directory to avoid conflicts + // with the user's local config. + process.env['HOME'] = runDir; + if (process.platform === 'win32') { + process.env['USERPROFILE'] = runDir; + } + process.env['GEMINI_CONFIG_DIR'] = join(runDir, '.gemini'); + + // Download ripgrep to avoid race conditions + const available = await canUseRipgrep(); + if (!available) { + throw new Error('Failed to download ripgrep binary'); + } + + // Clean up old test runs, keeping the latest few for debugging + try { + const testRuns = await readdir(memoryTestsDir); + if (testRuns.length > 3) { + const oldRuns = testRuns.sort().slice(0, testRuns.length - 3); + await Promise.all( + oldRuns.map((oldRun) => + rm(join(memoryTestsDir, oldRun), { + recursive: true, + force: true, + }), + ), + ); + } + } catch (e) { + console.error('Error cleaning up old memory test runs:', e); + } + + process.env['INTEGRATION_TEST_FILE_DIR'] = runDir; + process.env['GEMINI_CLI_INTEGRATION_TEST'] = 'true'; + process.env['GEMINI_FORCE_FILE_STORAGE'] = 'true'; + process.env['TELEMETRY_LOG_FILE'] = join(runDir, 'telemetry.log'); + process.env['VERBOSE'] = process.env['VERBOSE'] ?? 'false'; + + console.log(`\nMemory test output directory: ${runDir}`); +} + +export async function teardown() { + // Cleanup unless KEEP_OUTPUT is set + if (process.env['KEEP_OUTPUT'] !== 'true' && runDir) { + try { + await rm(runDir, { recursive: true, force: true }); + } catch (e) { + console.warn('Failed to clean up memory test directory:', e); + } + } +} diff --git a/memory-tests/memory-usage.test.ts b/memory-tests/memory-usage.test.ts new file mode 100644 index 0000000000..6455eec632 --- /dev/null +++ b/memory-tests/memory-usage.test.ts @@ -0,0 +1,185 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { describe, it, beforeAll, afterAll, afterEach } from 'vitest'; +import { TestRig, MemoryTestHarness } from '@google/gemini-cli-test-utils'; +import { join, dirname } from 'node:path'; +import { fileURLToPath } from 'node:url'; + +const __dirname = dirname(fileURLToPath(import.meta.url)); +const BASELINES_PATH = join(__dirname, 'baselines.json'); +const UPDATE_BASELINES = process.env['UPDATE_MEMORY_BASELINES'] === 'true'; +const TOLERANCE_PERCENT = 10; + +// Fake API key for tests using fake responses +const TEST_ENV = { GEMINI_API_KEY: 'fake-memory-test-key' }; + +describe('Memory Usage Tests', () => { + let harness: MemoryTestHarness; + let rig: TestRig; + + beforeAll(() => { + harness = new MemoryTestHarness({ + baselinesPath: BASELINES_PATH, + defaultTolerancePercent: TOLERANCE_PERCENT, + gcCycles: 3, + gcDelayMs: 100, + sampleCount: 3, + }); + }); + + afterEach(async () => { + await rig.cleanup(); + }); + + afterAll(async () => { + // Generate the summary report after all tests + await harness.generateReport(); + }); + + it('idle-session-startup: memory usage within baseline', async () => { + rig = new TestRig(); + rig.setup('memory-idle-startup', { + fakeResponsesPath: join(__dirname, 'memory.idle-startup.responses'), + }); + + const result = await harness.runScenario( + 'idle-session-startup', + async (recordSnapshot) => { + await rig.run({ + args: ['hello'], + timeout: 120000, + env: TEST_ENV, + }); + + await recordSnapshot('after-startup'); + }, + ); + + if (UPDATE_BASELINES) { + harness.updateScenarioBaseline(result); + console.log( + `Updated baseline for idle-session-startup: ${(result.finalHeapUsed / (1024 * 1024)).toFixed(1)} MB`, + ); + } else { + harness.assertWithinBaseline(result); + } + }); + + it('simple-prompt-response: memory usage within baseline', async () => { + rig = new TestRig(); + rig.setup('memory-simple-prompt', { + fakeResponsesPath: join(__dirname, 'memory.simple-prompt.responses'), + }); + + const result = await harness.runScenario( + 'simple-prompt-response', + async (recordSnapshot) => { + await rig.run({ + args: ['What is the capital of France?'], + timeout: 120000, + env: TEST_ENV, + }); + + await recordSnapshot('after-response'); + }, + ); + + if (UPDATE_BASELINES) { + harness.updateScenarioBaseline(result); + console.log( + `Updated baseline for simple-prompt-response: ${(result.finalHeapUsed / (1024 * 1024)).toFixed(1)} MB`, + ); + } else { + harness.assertWithinBaseline(result); + } + }); + + it('multi-turn-conversation: memory remains stable over turns', async () => { + rig = new TestRig(); + rig.setup('memory-multi-turn', { + fakeResponsesPath: join(__dirname, 'memory.multi-turn.responses'), + }); + + const prompts = [ + 'Hello, what can you help me with?', + 'Tell me about JavaScript', + 'How is TypeScript different?', + 'Can you write a simple TypeScript function?', + 'What are some TypeScript best practices?', + ]; + + const result = await harness.runScenario( + 'multi-turn-conversation', + async (recordSnapshot) => { + // Run through all turns as a piped sequence + const stdinContent = prompts.join('\n'); + await rig.run({ + stdin: stdinContent, + timeout: 120000, + env: TEST_ENV, + }); + + // Take snapshots after the conversation completes + await recordSnapshot('after-all-turns'); + }, + ); + + if (UPDATE_BASELINES) { + harness.updateScenarioBaseline(result); + console.log( + `Updated baseline for multi-turn-conversation: ${(result.finalHeapUsed / (1024 * 1024)).toFixed(1)} MB`, + ); + } else { + harness.assertWithinBaseline(result); + } + }); + + it('multi-function-call-repo-search: memory after tool use', async () => { + rig = new TestRig(); + rig.setup('memory-multi-func-call', { + fakeResponsesPath: join( + __dirname, + 'memory.multi-function-call.responses', + ), + }); + + // Create directories first, then files in the workspace so the tools have targets + rig.mkdir('packages/core/src/telemetry'); + rig.createFile( + 'packages/core/src/telemetry/memory-monitor.ts', + 'export class MemoryMonitor { constructor() {} }', + ); + rig.createFile( + 'packages/core/src/telemetry/metrics.ts', + 'export function recordMemoryUsage() {}', + ); + + const result = await harness.runScenario( + 'multi-function-call-repo-search', + async (recordSnapshot) => { + await rig.run({ + args: [ + 'Search this repository for MemoryMonitor and tell me what it does', + ], + timeout: 120000, + env: TEST_ENV, + }); + + await recordSnapshot('after-tool-calls'); + }, + ); + + if (UPDATE_BASELINES) { + harness.updateScenarioBaseline(result); + console.log( + `Updated baseline for multi-function-call-repo-search: ${(result.finalHeapUsed / (1024 * 1024)).toFixed(1)} MB`, + ); + } else { + harness.assertWithinBaseline(result); + } + }); +}); diff --git a/memory-tests/memory.idle-startup.responses b/memory-tests/memory.idle-startup.responses new file mode 100644 index 0000000000..7a5703e3d2 --- /dev/null +++ b/memory-tests/memory.idle-startup.responses @@ -0,0 +1,2 @@ +{"method":"generateContent","response":{"candidates":[{"content":{"parts":[{"text":"0"}],"role":"model"},"finishReason":"STOP","index":0}]}} +{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"Hello! I'm ready to help. What would you like to work on?"}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":5,"candidatesTokenCount":12,"totalTokenCount":17,"promptTokensDetails":[{"modality":"TEXT","tokenCount":5}]}}]} diff --git a/memory-tests/memory.multi-function-call.responses b/memory-tests/memory.multi-function-call.responses new file mode 100644 index 0000000000..8bdf75afc9 --- /dev/null +++ b/memory-tests/memory.multi-function-call.responses @@ -0,0 +1,4 @@ +{"method":"generateContent","response":{"candidates":[{"content":{"parts":[{"text":"0"}],"role":"model"},"finishReason":"STOP","index":0}]}} +{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"I'll search for MemoryMonitor in the repository and analyze what it does."}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":30,"candidatesTokenCount":15,"totalTokenCount":45,"promptTokensDetails":[{"modality":"TEXT","tokenCount":30}]}}]} +{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"functionCall":{"name":"grep_search","args":{"pattern":"MemoryMonitor","path":".","include_pattern":"*.ts"}}},{"functionCall":{"name":"list_directory","args":{"path":"packages/core/src/telemetry"}}},{"functionCall":{"name":"read_file","args":{"file_path":"packages/core/src/telemetry/memory-monitor.ts"}}}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":30,"candidatesTokenCount":80,"totalTokenCount":110,"promptTokensDetails":[{"modality":"TEXT","tokenCount":30}]}}]} +{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"I found the memory monitoring code. Here's a summary:\n\nThe `MemoryMonitor` class in `packages/core/src/telemetry/memory-monitor.ts` provides:\n\n1. **Continuous monitoring** via `start()`/`stop()` with configurable intervals\n2. **V8 heap snapshots** using `v8.getHeapStatistics()` and `process.memoryUsage()`\n3. **High-water mark tracking** to detect significant memory growth\n4. **Rate-limited recording** to avoid metric flood\n5. **Activity detection** — only records when user is active\n\nThe class uses a singleton pattern via `initializeMemoryMonitor()` for global access."}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":500,"candidatesTokenCount":120,"totalTokenCount":620,"promptTokensDetails":[{"modality":"TEXT","tokenCount":500}]}}]} diff --git a/memory-tests/memory.multi-turn.responses b/memory-tests/memory.multi-turn.responses new file mode 100644 index 0000000000..df428b56db --- /dev/null +++ b/memory-tests/memory.multi-turn.responses @@ -0,0 +1,10 @@ +{"method":"generateContent","response":{"candidates":[{"content":{"parts":[{"text":"0"}],"role":"model"},"finishReason":"STOP","index":0}]}} +{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"Hello! I'm ready to help you with your coding tasks. What would you like to work on today?"}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":5,"candidatesTokenCount":18,"totalTokenCount":23,"promptTokensDetails":[{"modality":"TEXT","tokenCount":5}]}}]} +{"method":"generateContent","response":{"candidates":[{"content":{"parts":[{"text":"0"}],"role":"model"},"finishReason":"STOP","index":0}]}} +{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"JavaScript is a high-level, interpreted programming language. It was originally designed for adding interactivity to web pages."}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":25,"candidatesTokenCount":60,"totalTokenCount":85,"promptTokensDetails":[{"modality":"TEXT","tokenCount":25}]}}]} +{"method":"generateContent","response":{"candidates":[{"content":{"parts":[{"text":"0"}],"role":"model"},"finishReason":"STOP","index":0}]}} +{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"TypeScript is a typed superset of JavaScript developed by Microsoft. The main differences from JavaScript are static typing and better tooling."}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":45,"candidatesTokenCount":80,"totalTokenCount":125,"promptTokensDetails":[{"modality":"TEXT","tokenCount":45}]}}]} +{"method":"generateContent","response":{"candidates":[{"content":{"parts":[{"text":"0"}],"role":"model"},"finishReason":"STOP","index":0}]}} +{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"Here is a simple TypeScript function:\n\nfunction greet(name: string): string { return `Hello, ${name}!`; }"}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":60,"candidatesTokenCount":55,"totalTokenCount":115,"promptTokensDetails":[{"modality":"TEXT","tokenCount":60}]}}]} +{"method":"generateContent","response":{"candidates":[{"content":{"parts":[{"text":"0"}],"role":"model"},"finishReason":"STOP","index":0}]}} +{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"Here are 5 key TypeScript best practices: Enable strict mode, prefer interfaces, use union types, leverage type inference, and use readonly."}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":75,"candidatesTokenCount":70,"totalTokenCount":145,"promptTokensDetails":[{"modality":"TEXT","tokenCount":75}]}}]} diff --git a/memory-tests/memory.simple-prompt.responses b/memory-tests/memory.simple-prompt.responses new file mode 100644 index 0000000000..ad3f20c9a1 --- /dev/null +++ b/memory-tests/memory.simple-prompt.responses @@ -0,0 +1,2 @@ +{"method":"generateContent","response":{"candidates":[{"content":{"parts":[{"text":"0"}],"role":"model"},"finishReason":"STOP","index":0}]}} +{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"The capital of France is Paris. It has been the capital since the 10th century and is known for iconic landmarks like the Eiffel Tower, the Louvre Museum, and Notre-Dame Cathedral. Paris is also the most populous city in France, with a metropolitan area population of over 12 million people."}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":7,"candidatesTokenCount":55,"totalTokenCount":62,"promptTokensDetails":[{"modality":"TEXT","tokenCount":7}]}}]} diff --git a/memory-tests/tsconfig.json b/memory-tests/tsconfig.json new file mode 100644 index 0000000000..7f2c199703 --- /dev/null +++ b/memory-tests/tsconfig.json @@ -0,0 +1,12 @@ +{ + "extends": "../tsconfig.json", + "compilerOptions": { + "noEmit": true, + "allowJs": true + }, + "include": ["**/*.ts"], + "references": [ + { "path": "../packages/core" }, + { "path": "../packages/test-utils" } + ] +} diff --git a/memory-tests/vitest.config.ts b/memory-tests/vitest.config.ts new file mode 100644 index 0000000000..c69af28826 --- /dev/null +++ b/memory-tests/vitest.config.ts @@ -0,0 +1,28 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { defineConfig } from 'vitest/config'; + +export default defineConfig({ + test: { + testTimeout: 600000, // 10 minutes — memory profiling is slow + globalSetup: './globalSetup.ts', + reporters: ['default'], + include: ['**/*.test.ts'], + retry: 0, // No retries for memory tests — noise is handled by tolerance + fileParallelism: false, // Must run serially to avoid memory interference + pool: 'forks', // Use forks pool for --expose-gc support + poolOptions: { + forks: { + singleFork: true, // Single process for accurate per-test memory readings + execArgv: ['--expose-gc'], // Enable global.gc() for forced GC + }, + }, + env: { + GEMINI_TEST_TYPE: 'memory', + }, + }, +}); diff --git a/package-lock.json b/package-lock.json index 2c8a4b64b8..7ec397323e 100644 --- a/package-lock.json +++ b/package-lock.json @@ -446,7 +446,8 @@ "version": "2.11.0", "resolved": "https://registry.npmjs.org/@bufbuild/protobuf/-/protobuf-2.11.0.tgz", "integrity": "sha512-sBXGT13cpmPR5BMgHE6UEEfEaShh5Ror6rfN3yEK5si7QVrtZg8LEPQb0VVhiLRUslD2yLnXtnRzG035J/mZXQ==", - "license": "(Apache-2.0 AND BSD-3-Clause)" + "license": "(Apache-2.0 AND BSD-3-Clause)", + "peer": true }, "node_modules/@bundled-es-modules/cookie": { "version": "2.0.1", @@ -1449,6 +1450,7 @@ "resolved": "https://registry.npmjs.org/@grpc/grpc-js/-/grpc-js-1.13.4.tgz", "integrity": "sha512-GsFaMXCkMqkKIvwCQjCrwH+GHbPKBjhwo/8ZuUkWHqbI73Kky9I+pQltrlT0+MWpedCoosda53lgjYfyEPgxBg==", "license": "Apache-2.0", + "peer": true, "dependencies": { "@grpc/proto-loader": "^0.7.13", "@js-sdsl/ordered-map": "^4.4.2" @@ -2155,6 +2157,7 @@ "integrity": "sha512-t54CUOsFMappY1Jbzb7fetWeO0n6K0k/4+/ZpkS+3Joz8I4VcvY9OiEBFRYISqaI2fq5sCiPtAjRDOzVYG8m+Q==", "dev": true, "license": "MIT", + "peer": true, "dependencies": { "@octokit/auth-token": "^6.0.0", "@octokit/graphql": "^9.0.2", @@ -2335,6 +2338,7 @@ "resolved": "https://registry.npmjs.org/@opentelemetry/api/-/api-1.9.0.tgz", "integrity": "sha512-3giAOQvZiH5F9bMlMiv8+GSPMeqg0dbaeo58/0SlA9sxSqZhnUtxzX9/2FzyhS9sWQf5S0GJE0AKBrFqjpeYcg==", "license": "Apache-2.0", + "peer": true, "engines": { "node": ">=8.0.0" } @@ -2384,6 +2388,7 @@ "resolved": "https://registry.npmjs.org/@opentelemetry/core/-/core-2.5.0.tgz", "integrity": "sha512-ka4H8OM6+DlUhSAZpONu0cPBtPPTQKxbxVzC4CzVx5+K4JnroJVBtDzLAMx4/3CDTJXRvVFhpFjtl4SaiTNoyQ==", "license": "Apache-2.0", + "peer": true, "dependencies": { "@opentelemetry/semantic-conventions": "^1.29.0" }, @@ -2758,6 +2763,7 @@ "resolved": "https://registry.npmjs.org/@opentelemetry/resources/-/resources-2.5.0.tgz", "integrity": "sha512-F8W52ApePshpoSrfsSk1H2yJn9aKjCrbpQF1M9Qii0GHzbfVeFUB+rc3X4aggyZD8x9Gu3Slua+s6krmq6Dt8g==", "license": "Apache-2.0", + "peer": true, "dependencies": { "@opentelemetry/core": "2.5.0", "@opentelemetry/semantic-conventions": "^1.29.0" @@ -2791,6 +2797,7 @@ "resolved": "https://registry.npmjs.org/@opentelemetry/sdk-metrics/-/sdk-metrics-2.5.0.tgz", "integrity": "sha512-BeJLtU+f5Gf905cJX9vXFQorAr6TAfK3SPvTFqP+scfIpDQEJfRaGJWta7sJgP+m4dNtBf9y3yvBKVAZZtJQVA==", "license": "Apache-2.0", + "peer": true, "dependencies": { "@opentelemetry/core": "2.5.0", "@opentelemetry/resources": "2.5.0" @@ -2845,6 +2852,7 @@ "resolved": "https://registry.npmjs.org/@opentelemetry/sdk-trace-base/-/sdk-trace-base-2.5.0.tgz", "integrity": "sha512-VzRf8LzotASEyNDUxTdaJ9IRJ1/h692WyArDBInf5puLCjxbICD6XkHgpuudis56EndyS7LYFmtTMny6UABNdQ==", "license": "Apache-2.0", + "peer": true, "dependencies": { "@opentelemetry/core": "2.5.0", "@opentelemetry/resources": "2.5.0", @@ -4081,6 +4089,7 @@ "integrity": "sha512-6mDvHUFSjyT2B2yeNx2nUgMxh9LtOWvkhIU3uePn2I2oyNymUAX1NIsdgviM4CH+JSrp2D2hsMvJOkxY+0wNRA==", "devOptional": true, "license": "MIT", + "peer": true, "dependencies": { "csstype": "^3.0.2" } @@ -4355,6 +4364,7 @@ "integrity": "sha512-6sMvZePQrnZH2/cJkwRpkT7DxoAWh+g6+GFRK6bV3YQo7ogi3SX5rgF6099r5Q53Ma5qeT7LGmOmuIutF4t3lA==", "dev": true, "license": "MIT", + "peer": true, "dependencies": { "@typescript-eslint/scope-manager": "8.35.0", "@typescript-eslint/types": "8.35.0", @@ -5228,6 +5238,7 @@ "resolved": "https://registry.npmjs.org/acorn/-/acorn-8.15.0.tgz", "integrity": "sha512-NZyJarBfL7nWwIq+FDL6Zp/yHEhePMNnnJ0y3qfieCrmNvYct8uvtiV41UvlSe6apAfk0fY1FbWx+NwfmpvtTg==", "license": "MIT", + "peer": true, "bin": { "acorn": "bin/acorn" }, @@ -5569,6 +5580,12 @@ "dev": true, "license": "MIT" }, + "node_modules/asciichart": { + "version": "1.5.25", + "resolved": "https://registry.npmjs.org/asciichart/-/asciichart-1.5.25.tgz", + "integrity": "sha512-PNxzXIPPOtWq8T7bgzBtk9cI2lgS4SJZthUHEiQ1aoIc3lNzGfUvIvo9LiAnq26TACo9t1/4qP6KTGAUbzX9Xg==", + "license": "MIT" + }, "node_modules/assertion-error": { "version": "2.0.1", "resolved": "https://registry.npmjs.org/assertion-error/-/assertion-error-2.0.1.tgz", @@ -7362,7 +7379,8 @@ "version": "0.0.1581282", "resolved": "https://registry.npmjs.org/devtools-protocol/-/devtools-protocol-0.0.1581282.tgz", "integrity": "sha512-nv7iKtNZQshSW2hKzYNr46nM/Cfh5SEvE2oV0/SEGgc9XupIY5ggf84Cz8eJIkBce7S3bmTAauFD6aysMpnqsQ==", - "license": "BSD-3-Clause" + "license": "BSD-3-Clause", + "peer": true }, "node_modules/dezalgo": { "version": "1.0.4", @@ -7946,6 +7964,7 @@ "integrity": "sha512-GsGizj2Y1rCWDu6XoEekL3RLilp0voSePurjZIkxL3wlm5o5EC9VpgaP7lrCvjnkuLvzFBQWB3vWB3K5KQTveQ==", "dev": true, "license": "MIT", + "peer": true, "dependencies": { "@eslint-community/eslint-utils": "^4.2.0", "@eslint-community/regexpp": "^4.12.1", @@ -8463,6 +8482,7 @@ "resolved": "https://registry.npmjs.org/express/-/express-5.2.1.tgz", "integrity": "sha512-hIS4idWWai69NezIdRt2xFVofaF4j+6INOpJlVOLDO8zXGpUVEVzIYk12UUi2JzjEzWL3IOAxcTubgz9Po0yXw==", "license": "MIT", + "peer": true, "dependencies": { "accepts": "^2.0.0", "body-parser": "^2.2.1", @@ -9775,6 +9795,7 @@ "resolved": "https://registry.npmjs.org/hono/-/hono-4.12.7.tgz", "integrity": "sha512-jq9l1DM0zVIvsm3lv9Nw9nlJnMNPOcAtsbsgiUhWcFzPE99Gvo6yRTlszSLLYacMeQ6quHD6hMfId8crVHvexw==", "license": "MIT", + "peer": true, "engines": { "node": ">=16.9.0" } @@ -10053,6 +10074,7 @@ "resolved": "https://registry.npmjs.org/@jrichman/ink/-/ink-6.6.7.tgz", "integrity": "sha512-bDzQLpLzK/dn9Ur/Ku88ZZR9totVcMGrGYAgPHidsAAbe9NKztU1fggj/iu0wRp5g1kBeALb3cfagFGdDxAU1w==", "license": "MIT", + "peer": true, "dependencies": { "ansi-escapes": "^7.0.0", "ansi-styles": "^6.2.3", @@ -13826,6 +13848,7 @@ "resolved": "https://registry.npmjs.org/react/-/react-19.2.4.tgz", "integrity": "sha512-9nfp2hYpCwOjAN+8TZFGhtWEwgvWHXqESH8qT89AT/lWklpLON22Lc8pEtnpsZz7VmawabSU0gCjnj8aC0euHQ==", "license": "MIT", + "peer": true, "engines": { "node": ">=0.10.0" } @@ -13836,6 +13859,7 @@ "integrity": "sha512-ePrwPfxAnB+7hgnEr8vpKxL9cmnp7F322t8oqcPshbIQQhDKgFDW4tjhF2wjVbdXF9O/nyuy3sQWd9JGpiLPvA==", "devOptional": true, "license": "MIT", + "peer": true, "dependencies": { "shell-quote": "^1.6.1", "ws": "^7" @@ -15985,6 +16009,7 @@ "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-4.0.3.tgz", "integrity": "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q==", "license": "MIT", + "peer": true, "engines": { "node": ">=12" }, @@ -16207,7 +16232,8 @@ "version": "2.8.1", "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.8.1.tgz", "integrity": "sha512-oJFu94HQb+KVduSUQL7wnpmqnfmLsOA/nAh6b6EH0wCEoK0/mPeXU6c3wKDV83MkOuHPRHtSXKKU99IBazS/2w==", - "license": "0BSD" + "license": "0BSD", + "peer": true }, "node_modules/tsx": { "version": "4.20.3", @@ -16215,6 +16241,7 @@ "integrity": "sha512-qjbnuR9Tr+FJOMBqJCW5ehvIo/buZq7vH7qD7JziU98h6l3qGy0a/yPFjwO+y0/T7GFpNgNAvEcPPVfyT8rrPQ==", "devOptional": true, "license": "MIT", + "peer": true, "dependencies": { "esbuild": "~0.25.0", "get-tsconfig": "^4.7.5" @@ -16380,6 +16407,7 @@ "integrity": "sha512-p1diW6TqL9L07nNxvRMM7hMMw4c5XOo/1ibL4aAIGmSAt9slTE1Xgw5KWuof2uTOvCg9BY7ZRi+GaF+7sfgPeQ==", "devOptional": true, "license": "Apache-2.0", + "peer": true, "bin": { "tsc": "bin/tsc", "tsserver": "bin/tsserver" @@ -16602,6 +16630,7 @@ "resolved": "https://registry.npmjs.org/vite/-/vite-7.2.2.tgz", "integrity": "sha512-BxAKBWmIbrDgrokdGZH1IgkIk/5mMHDreLDmCJ0qpyJaAteP8NvMhkwr/ZCQNqNH97bw/dANTE9PDzqwJghfMQ==", "license": "MIT", + "peer": true, "dependencies": { "esbuild": "^0.25.0", "fdir": "^6.5.0", @@ -16715,6 +16744,7 @@ "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-4.0.3.tgz", "integrity": "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q==", "license": "MIT", + "peer": true, "engines": { "node": ">=12" }, @@ -16727,6 +16757,7 @@ "resolved": "https://registry.npmjs.org/vitest/-/vitest-3.2.4.tgz", "integrity": "sha512-LUCP5ev3GURDysTWiP47wRRUpLKMOfPh+yKTx3kVIEiu5KOMeqzpnYNsKyOoVrULivR8tLcks4+lga33Whn90A==", "license": "MIT", + "peer": true, "dependencies": { "@types/chai": "^5.2.2", "@vitest/expect": "3.2.4", @@ -17374,6 +17405,7 @@ "resolved": "https://registry.npmjs.org/zod/-/zod-3.25.76.tgz", "integrity": "sha512-gzUt/qt81nXsFGKIFcC3YnfEAx5NkunCfnDlvuBSSFS02bcXu4Lmea0AFIUwbLWxWPx3d9p8S5QoaujKcNQxcQ==", "license": "MIT", + "peer": true, "funding": { "url": "https://github.com/sponsors/colinhacks" } @@ -17817,6 +17849,7 @@ "resolved": "https://registry.npmjs.org/@grpc/grpc-js/-/grpc-js-1.14.3.tgz", "integrity": "sha512-Iq8QQQ/7X3Sac15oB6p0FmUg/klxQvXLeileoqrTRGJYLV+/9tubbr9ipz0GKHjmXVsgFPo/+W+2cA8eNcR+XA==", "license": "Apache-2.0", + "peer": true, "dependencies": { "@grpc/proto-loader": "^0.8.0", "@js-sdsl/ordered-map": "^4.4.2" @@ -17920,6 +17953,7 @@ "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-4.0.3.tgz", "integrity": "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q==", "license": "MIT", + "peer": true, "engines": { "node": ">=12" }, @@ -17979,6 +18013,7 @@ "dependencies": { "@google/gemini-cli-core": "file:../core", "@lydell/node-pty": "1.1.0", + "asciichart": "^1.5.25", "strip-ansi": "^7.1.2", "vitest": "^3.2.4" }, diff --git a/package.json b/package.json index e24f6a20b5..9f67253ccc 100644 --- a/package.json +++ b/package.json @@ -51,6 +51,8 @@ "test:integration:all": "npm run test:integration:sandbox:none && npm run test:integration:sandbox:docker && npm run test:integration:sandbox:podman", "test:integration:flaky": "cross-env RUN_FLAKY_INTEGRATION=1 npm run test:integration:sandbox:none", "test:integration:sandbox:none": "cross-env GEMINI_SANDBOX=false vitest run --root ./integration-tests", + "test:memory": "vitest run --root ./memory-tests", + "test:memory:update-baselines": "cross-env UPDATE_MEMORY_BASELINES=true vitest run --root ./memory-tests", "test:integration:sandbox:docker": "cross-env GEMINI_SANDBOX=docker npm run build:sandbox && cross-env GEMINI_SANDBOX=docker vitest run --root ./integration-tests", "test:integration:sandbox:podman": "cross-env GEMINI_SANDBOX=podman vitest run --root ./integration-tests", "lint": "cross-env NODE_OPTIONS=\"--max-old-space-size=8192\" eslint . --cache --max-warnings 0", diff --git a/packages/test-utils/package.json b/packages/test-utils/package.json index caedd907e4..b16497da3c 100644 --- a/packages/test-utils/package.json +++ b/packages/test-utils/package.json @@ -12,6 +12,7 @@ "dependencies": { "@google/gemini-cli-core": "file:../core", "@lydell/node-pty": "1.1.0", + "asciichart": "^1.5.25", "strip-ansi": "^7.1.2", "vitest": "^3.2.4" }, diff --git a/packages/test-utils/src/index.ts b/packages/test-utils/src/index.ts index 7bae818040..49eaec66d3 100644 --- a/packages/test-utils/src/index.ts +++ b/packages/test-utils/src/index.ts @@ -6,6 +6,8 @@ export * from './file-system-test-helpers.js'; export * from './fixtures/agents.js'; +export * from './memory-baselines.js'; +export * from './memory-test-harness.js'; export * from './mock-utils.js'; export * from './test-mcp-server.js'; export * from './test-rig.js'; diff --git a/packages/test-utils/src/memory-baselines.ts b/packages/test-utils/src/memory-baselines.ts new file mode 100644 index 0000000000..295e80f61b --- /dev/null +++ b/packages/test-utils/src/memory-baselines.ts @@ -0,0 +1,76 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { readFileSync, writeFileSync, existsSync } from 'node:fs'; + +/** + * Baseline entry for a single memory test scenario. + */ +export interface MemoryBaseline { + heapUsedBytes: number; + heapTotalBytes: number; + rssBytes: number; + timestamp: string; +} + +/** + * Top-level structure of the baselines JSON file. + */ +export interface MemoryBaselineFile { + version: number; + updatedAt: string; + scenarios: Record; +} + +/** + * Load baselines from a JSON file. + * Returns an empty baseline file if the file does not exist yet. + */ +export function loadBaselines(path: string): MemoryBaselineFile { + if (!existsSync(path)) { + return { + version: 1, + updatedAt: new Date().toISOString(), + scenarios: {}, + }; + } + + const content = readFileSync(path, 'utf-8'); + return JSON.parse(content) as MemoryBaselineFile; +} + +/** + * Save baselines to a JSON file. + */ +export function saveBaselines( + path: string, + baselines: MemoryBaselineFile, +): void { + baselines.updatedAt = new Date().toISOString(); + writeFileSync(path, JSON.stringify(baselines, null, 2) + '\n'); +} + +/** + * Update (or create) a single scenario baseline in the file. + */ +export function updateBaseline( + path: string, + scenarioName: string, + measured: { + heapUsedBytes: number; + heapTotalBytes: number; + rssBytes: number; + }, +): void { + const baselines = loadBaselines(path); + baselines.scenarios[scenarioName] = { + heapUsedBytes: measured.heapUsedBytes, + heapTotalBytes: measured.heapTotalBytes, + rssBytes: measured.rssBytes, + timestamp: new Date().toISOString(), + }; + saveBaselines(path, baselines); +} diff --git a/packages/test-utils/src/memory-test-harness.ts b/packages/test-utils/src/memory-test-harness.ts new file mode 100644 index 0000000000..7dfb259453 --- /dev/null +++ b/packages/test-utils/src/memory-test-harness.ts @@ -0,0 +1,483 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import v8 from 'node:v8'; +import { setTimeout as sleep } from 'node:timers/promises'; +import { loadBaselines, updateBaseline } from './memory-baselines.js'; +import type { MemoryBaseline, MemoryBaselineFile } from './memory-baselines.js'; + +/** Configuration for asciichart plot function. */ +interface PlotConfig { + height?: number; + format?: (x: number) => string; +} + +/** Type for the asciichart plot function. */ +type PlotFn = (series: number[], config?: PlotConfig) => string; + +/** + * A single memory snapshot at a point in time. + */ +export interface MemorySnapshot { + timestamp: number; + label: string; + heapUsed: number; + heapTotal: number; + rss: number; + external: number; + arrayBuffers: number; + heapSizeLimit: number; + heapSpaces: any[]; +} + +/** + * Result from running a memory test scenario. + */ +export interface MemoryTestResult { + scenarioName: string; + snapshots: MemorySnapshot[]; + peakHeapUsed: number; + peakRss: number; + finalHeapUsed: number; + finalRss: number; + baseline: MemoryBaseline | undefined; + withinTolerance: boolean; + deltaPercent: number; +} + +/** + * Options for the MemoryTestHarness. + */ +export interface MemoryTestHarnessOptions { + /** Path to the baselines JSON file */ + baselinesPath: string; + /** Default tolerance percentage (0-100). Default: 10 */ + defaultTolerancePercent?: number; + /** Number of GC cycles to run before each snapshot. Default: 3 */ + gcCycles?: number; + /** Delay in ms between GC cycles. Default: 100 */ + gcDelayMs?: number; + /** Number of samples to take for median calculation. Default: 3 */ + sampleCount?: number; + /** Pause in ms between samples. Default: 50 */ + samplePauseMs?: number; +} + +/** + * MemoryTestHarness provides infrastructure for running memory usage tests. + * + * It handles: + * - Forcing V8 garbage collection to reduce noise + * - Taking V8 heap snapshots for accurate memory measurement + * - Comparing against baselines with configurable tolerance + * - Generating ASCII chart reports of memory trends + */ +export class MemoryTestHarness { + private baselines: MemoryBaselineFile; + private readonly baselinesPath: string; + private readonly defaultTolerancePercent: number; + private readonly gcCycles: number; + private readonly gcDelayMs: number; + private readonly sampleCount: number; + private readonly samplePauseMs: number; + private allResults: MemoryTestResult[] = []; + + constructor(options: MemoryTestHarnessOptions) { + this.baselinesPath = options.baselinesPath; + this.defaultTolerancePercent = options.defaultTolerancePercent ?? 10; + this.gcCycles = options.gcCycles ?? 3; + this.gcDelayMs = options.gcDelayMs ?? 100; + this.sampleCount = options.sampleCount ?? 3; + this.samplePauseMs = options.samplePauseMs ?? 50; + this.baselines = loadBaselines(this.baselinesPath); + } + + /** + * Force garbage collection multiple times and take a V8 heap snapshot. + * Forces GC multiple times with delays to allow weak references and + * FinalizationRegistry callbacks to run, reducing measurement noise. + */ + async takeSnapshot(label: string = 'snapshot'): Promise { + await this.forceGC(); + + const memUsage = process.memoryUsage(); + const heapStats = v8.getHeapStatistics(); + + return { + timestamp: Date.now(), + label, + heapUsed: memUsage.heapUsed, + heapTotal: memUsage.heapTotal, + rss: memUsage.rss, + external: memUsage.external, + arrayBuffers: memUsage.arrayBuffers, + heapSizeLimit: heapStats.heap_size_limit, + heapSpaces: v8.getHeapSpaceStatistics(), + }; + } + + /** + * Take multiple snapshot samples and return the median to reduce noise. + */ + async takeMedianSnapshot( + label: string = 'median', + count?: number, + ): Promise { + const samples: MemorySnapshot[] = []; + const numSamples = count ?? this.sampleCount; + + for (let i = 0; i < numSamples; i++) { + samples.push(await this.takeSnapshot(`${label}_sample_${i}`)); + if (i < numSamples - 1) { + await sleep(this.samplePauseMs); + } + } + + // Sort by heapUsed and take the median + samples.sort((a, b) => a.heapUsed - b.heapUsed); + const medianIdx = Math.floor(samples.length / 2); + const median = samples[medianIdx]!; + + return { + ...median, + label, + timestamp: Date.now(), + }; + } + + /** + * Run a memory test scenario. + * + * Takes before/after snapshots around the scenario function, collects + * intermediate snapshots if the scenario provides them, and compares + * the result against the stored baseline. + * + * @param name - Scenario name (must match baseline key) + * @param fn - Async function that executes the scenario. Receives a + * `recordSnapshot` callback for recording intermediate snapshots. + * @param tolerancePercent - Override default tolerance for this scenario + */ + async runScenario( + name: string, + fn: ( + recordSnapshot: (label: string) => Promise, + ) => Promise, + tolerancePercent?: number, + ): Promise { + const tolerance = tolerancePercent ?? this.defaultTolerancePercent; + const snapshots: MemorySnapshot[] = []; + + // Record a callback for intermediate snapshots + const recordSnapshot = async (label: string): Promise => { + const snap = await this.takeMedianSnapshot(label); + snapshots.push(snap); + return snap; + }; + + // Before snapshot + const beforeSnap = await this.takeMedianSnapshot('before'); + snapshots.push(beforeSnap); + + // Run the scenario + await fn(recordSnapshot); + + // After snapshot (median of multiple samples) + const afterSnap = await this.takeMedianSnapshot('after'); + snapshots.push(afterSnap); + + // Calculate peak values + const peakHeapUsed = Math.max(...snapshots.map((s) => s.heapUsed)); + const peakRss = Math.max(...snapshots.map((s) => s.rss)); + + // Get baseline + const baseline = this.baselines.scenarios[name]; + + // Determine if within tolerance + let deltaPercent = 0; + let withinTolerance = true; + + if (baseline) { + deltaPercent = + ((afterSnap.heapUsed - baseline.heapUsedBytes) / + baseline.heapUsedBytes) * + 100; + withinTolerance = deltaPercent <= tolerance; + } + + const result: MemoryTestResult = { + scenarioName: name, + snapshots, + peakHeapUsed, + peakRss, + finalHeapUsed: afterSnap.heapUsed, + finalRss: afterSnap.rss, + baseline, + withinTolerance, + deltaPercent, + }; + + this.allResults.push(result); + return result; + } + + /** + * Assert that a scenario result is within the baseline tolerance. + * Throws an assertion error with details if it exceeds the threshold. + */ + assertWithinBaseline( + result: MemoryTestResult, + tolerancePercent?: number, + ): void { + const tolerance = tolerancePercent ?? this.defaultTolerancePercent; + + if (!result.baseline) { + console.warn( + `⚠ No baseline found for "${result.scenarioName}". ` + + `Run with UPDATE_MEMORY_BASELINES=true to create one. ` + + `Measured: ${formatMB(result.finalHeapUsed)} heap used.`, + ); + return; // Don't fail if no baseline exists yet + } + + const deltaPercent = + ((result.finalHeapUsed - result.baseline.heapUsedBytes) / + result.baseline.heapUsedBytes) * + 100; + + if (deltaPercent > tolerance) { + throw new Error( + `Memory regression detected for "${result.scenarioName}"!\n` + + ` Measured: ${formatMB(result.finalHeapUsed)} heap used\n` + + ` Baseline: ${formatMB(result.baseline.heapUsedBytes)} heap used\n` + + ` Delta: ${deltaPercent.toFixed(1)}% (tolerance: ${tolerance}%)\n` + + ` Peak heap: ${formatMB(result.peakHeapUsed)}\n` + + ` Peak RSS: ${formatMB(result.peakRss)}`, + ); + } + } + + /** + * Update the baseline for a scenario with the current measured values. + */ + updateScenarioBaseline(result: MemoryTestResult): void { + updateBaseline(this.baselinesPath, result.scenarioName, { + heapUsedBytes: result.finalHeapUsed, + heapTotalBytes: + result.snapshots[result.snapshots.length - 1]?.heapTotal ?? 0, + rssBytes: result.finalRss, + }); + // Reload baselines after update + this.baselines = loadBaselines(this.baselinesPath); + } + + /** + * Analyze snapshots to detect sustained leaks across 3 snapshots. + * A leak is flagged if growth is observed in both phases for any heap space. + */ + analyzeSnapshots( + snapshots: MemorySnapshot[], + thresholdBytes: number = 1024 * 1024, // 1 MB + ): { leaked: boolean; message: string } { + if (snapshots.length < 3) { + return { leaked: false, message: 'Not enough snapshots to analyze' }; + } + + const snap1 = snapshots[snapshots.length - 3]; + const snap2 = snapshots[snapshots.length - 2]; + const snap3 = snapshots[snapshots.length - 1]; + + if (!snap1 || !snap2 || !snap3) { + return { leaked: false, message: 'Missing snapshots' }; + } + + const spaceNames = new Set(); + snap1.heapSpaces.forEach((s: any) => spaceNames.add(s.space_name)); + snap2.heapSpaces.forEach((s: any) => spaceNames.add(s.space_name)); + snap3.heapSpaces.forEach((s: any) => spaceNames.add(s.space_name)); + + let hasSustainedGrowth = false; + const growthDetails: string[] = []; + + for (const name of spaceNames) { + const size1 = + snap1.heapSpaces.find((s: any) => s.space_name === name) + ?.space_used_size ?? 0; + const size2 = + snap2.heapSpaces.find((s: any) => s.space_name === name) + ?.space_used_size ?? 0; + const size3 = + snap3.heapSpaces.find((s: any) => s.space_name === name) + ?.space_used_size ?? 0; + + const growth1 = size2 - size1; + const growth2 = size3 - size2; + + if (growth1 > thresholdBytes && growth2 > thresholdBytes) { + hasSustainedGrowth = true; + growthDetails.push( + `${name}: sustained growth (${formatMB(growth1)} -> ${formatMB(growth2)})`, + ); + } + } + + let message = ''; + if (hasSustainedGrowth) { + message = + `Memory bloat detected in heap spaces:\n ` + + growthDetails.join('\n '); + } else { + message = `No sustained growth detected in any heap space above threshold.`; + } + + return { leaked: hasSustainedGrowth, message }; + } + + /** + * Assert that memory returns to a baseline level after a peak. + * Useful for verifying that large tool outputs are not retained. + */ + assertMemoryReturnsToBaseline( + snapshots: MemorySnapshot[], + tolerancePercent: number = 10, + ): void { + if (snapshots.length < 3) { + throw new Error('Need at least 3 snapshots to check return to baseline'); + } + + const baseline = snapshots[0]; // Assume first is baseline + const peak = snapshots.reduce( + (max, s) => (s.heapUsed > max.heapUsed ? s : max), + snapshots[0], + ); + const final = snapshots[snapshots.length - 1]; + + if (!baseline || !peak || !final) { + throw new Error('Missing snapshots for return to baseline check'); + } + + const tolerance = baseline.heapUsed * (tolerancePercent / 100); + const delta = final.heapUsed - baseline.heapUsed; + + if (delta > tolerance) { + throw new Error( + `Memory did not return to baseline!\n` + + ` Baseline: ${formatMB(baseline.heapUsed)}\n` + + ` Peak: ${formatMB(peak.heapUsed)}\n` + + ` Final: ${formatMB(final.heapUsed)}\n` + + ` Delta: ${formatMB(delta)} (tolerance: ${formatMB(tolerance)})`, + ); + } + } + + /** + * Generate a report with ASCII charts and summary table. + * Uses the `asciichart` library for terminal visualization. + */ + async generateReport(results?: MemoryTestResult[]): Promise { + const resultsToReport = results ?? this.allResults; + const lines: string[] = []; + + lines.push(''); + lines.push('═══════════════════════════════════════════════════'); + lines.push(' MEMORY USAGE TEST REPORT'); + lines.push('═══════════════════════════════════════════════════'); + lines.push(''); + + for (const result of resultsToReport) { + const measured = formatMB(result.finalHeapUsed); + const baseline = result.baseline + ? formatMB(result.baseline.heapUsedBytes) + : 'N/A'; + const delta = result.baseline + ? `${result.deltaPercent >= 0 ? '+' : ''}${result.deltaPercent.toFixed(1)}%` + : 'N/A'; + const status = !result.baseline + ? 'NEW' + : result.withinTolerance + ? '✅' + : '❌'; + + lines.push( + `${result.scenarioName}: ${measured} (Baseline: ${baseline}, Delta: ${delta}) ${status}`, + ); + } + lines.push(''); + + // Generate ASCII chart for each scenario with multiple snapshots + try { + // @ts-expect-error - asciichart may not have types + const asciichart = (await import('asciichart')) as { + default?: { plot?: PlotFn }; + plot?: PlotFn; + }; + const plot: PlotFn | undefined = + asciichart.default?.plot ?? asciichart.plot; + + for (const result of resultsToReport) { + if (result.snapshots.length > 2) { + lines.push(`📈 Memory trend: ${result.scenarioName}`); + lines.push('─'.repeat(60)); + + const heapDataMB = result.snapshots.map( + (s) => s.heapUsed / (1024 * 1024), + ); + + if (plot) { + const chart = plot(heapDataMB, { + height: 10, + format: (x: number) => `${x.toFixed(1)} MB`.padStart(10), + }); + lines.push(chart); + } + + // Label the x-axis with snapshot labels + const labels = result.snapshots.map((s) => s.label); + lines.push(' ' + labels.join(' → ')); + lines.push(''); + } + } + } catch { + lines.push( + '(asciichart not available — install with: npm install --save-dev asciichart)', + ); + lines.push(''); + } + + lines.push('═══════════════════════════════════════════════════'); + lines.push(''); + + const report = lines.join('\n'); + console.log(report); + return report; + } + + /** + * Force V8 garbage collection. + * Runs multiple GC cycles with delays to allow weak references + * and FinalizationRegistry callbacks to run. + */ + private async forceGC(): Promise { + if (typeof globalThis.gc !== 'function') { + throw new Error( + 'global.gc() not available. Run with --expose-gc for accurate measurements.', + ); + } + + for (let i = 0; i < this.gcCycles; i++) { + globalThis.gc(); + if (i < this.gcCycles - 1) { + await sleep(this.gcDelayMs); + } + } + } +} + +/** + * Format bytes as a human-readable MB string. + */ +function formatMB(bytes: number): string { + return `${(bytes / (1024 * 1024)).toFixed(1)} MB`; +}