feat(test-utils): add memory usage integration test harness (#24876)

This commit is contained in:
Sri Pasumarthi
2026-04-08 10:42:18 -07:00
committed by GitHub
parent 34b4f1c6e4
commit 4ebc43bc66
18 changed files with 1021 additions and 3 deletions

View File

@@ -0,0 +1,30 @@
{
"version": 1,
"updatedAt": "2026-04-08T01:21:58.770Z",
"scenarios": {
"multi-turn-conversation": {
"heapUsedBytes": 120082704,
"heapTotalBytes": 177586176,
"rssBytes": 269172736,
"timestamp": "2026-04-08T01:21:57.127Z"
},
"multi-function-call-repo-search": {
"heapUsedBytes": 104644984,
"heapTotalBytes": 111575040,
"rssBytes": 204079104,
"timestamp": "2026-04-08T01:21:58.770Z"
},
"idle-session-startup": {
"heapUsedBytes": 119813672,
"heapTotalBytes": 177061888,
"rssBytes": 267943936,
"timestamp": "2026-04-08T01:21:53.855Z"
},
"simple-prompt-response": {
"heapUsedBytes": 119722064,
"heapTotalBytes": 177324032,
"rssBytes": 268812288,
"timestamp": "2026-04-08T01:21:55.491Z"
}
}
}

View File

@@ -0,0 +1,71 @@
/**
* @license
* Copyright 2026 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import { mkdir, readdir, rm } from 'node:fs/promises';
import { join, dirname } from 'node:path';
import { fileURLToPath } from 'node:url';
import { canUseRipgrep } from '../packages/core/src/tools/ripGrep.js';
const __dirname = dirname(fileURLToPath(import.meta.url));
const rootDir = join(__dirname, '..');
const memoryTestsDir = join(rootDir, '.memory-tests');
let runDir = '';
export async function setup() {
runDir = join(memoryTestsDir, `${Date.now()}`);
await mkdir(runDir, { recursive: true });
// Set the home directory to the test run directory to avoid conflicts
// with the user's local config.
process.env['HOME'] = runDir;
if (process.platform === 'win32') {
process.env['USERPROFILE'] = runDir;
}
process.env['GEMINI_CONFIG_DIR'] = join(runDir, '.gemini');
// Download ripgrep to avoid race conditions
const available = await canUseRipgrep();
if (!available) {
throw new Error('Failed to download ripgrep binary');
}
// Clean up old test runs, keeping the latest few for debugging
try {
const testRuns = await readdir(memoryTestsDir);
if (testRuns.length > 3) {
const oldRuns = testRuns.sort().slice(0, testRuns.length - 3);
await Promise.all(
oldRuns.map((oldRun) =>
rm(join(memoryTestsDir, oldRun), {
recursive: true,
force: true,
}),
),
);
}
} catch (e) {
console.error('Error cleaning up old memory test runs:', e);
}
process.env['INTEGRATION_TEST_FILE_DIR'] = runDir;
process.env['GEMINI_CLI_INTEGRATION_TEST'] = 'true';
process.env['GEMINI_FORCE_FILE_STORAGE'] = 'true';
process.env['TELEMETRY_LOG_FILE'] = join(runDir, 'telemetry.log');
process.env['VERBOSE'] = process.env['VERBOSE'] ?? 'false';
console.log(`\nMemory test output directory: ${runDir}`);
}
export async function teardown() {
// Cleanup unless KEEP_OUTPUT is set
if (process.env['KEEP_OUTPUT'] !== 'true' && runDir) {
try {
await rm(runDir, { recursive: true, force: true });
} catch (e) {
console.warn('Failed to clean up memory test directory:', e);
}
}
}

View File

@@ -0,0 +1,185 @@
/**
* @license
* Copyright 2026 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import { describe, it, beforeAll, afterAll, afterEach } from 'vitest';
import { TestRig, MemoryTestHarness } from '@google/gemini-cli-test-utils';
import { join, dirname } from 'node:path';
import { fileURLToPath } from 'node:url';
const __dirname = dirname(fileURLToPath(import.meta.url));
const BASELINES_PATH = join(__dirname, 'baselines.json');
const UPDATE_BASELINES = process.env['UPDATE_MEMORY_BASELINES'] === 'true';
const TOLERANCE_PERCENT = 10;
// Fake API key for tests using fake responses
const TEST_ENV = { GEMINI_API_KEY: 'fake-memory-test-key' };
describe('Memory Usage Tests', () => {
let harness: MemoryTestHarness;
let rig: TestRig;
beforeAll(() => {
harness = new MemoryTestHarness({
baselinesPath: BASELINES_PATH,
defaultTolerancePercent: TOLERANCE_PERCENT,
gcCycles: 3,
gcDelayMs: 100,
sampleCount: 3,
});
});
afterEach(async () => {
await rig.cleanup();
});
afterAll(async () => {
// Generate the summary report after all tests
await harness.generateReport();
});
it('idle-session-startup: memory usage within baseline', async () => {
rig = new TestRig();
rig.setup('memory-idle-startup', {
fakeResponsesPath: join(__dirname, 'memory.idle-startup.responses'),
});
const result = await harness.runScenario(
'idle-session-startup',
async (recordSnapshot) => {
await rig.run({
args: ['hello'],
timeout: 120000,
env: TEST_ENV,
});
await recordSnapshot('after-startup');
},
);
if (UPDATE_BASELINES) {
harness.updateScenarioBaseline(result);
console.log(
`Updated baseline for idle-session-startup: ${(result.finalHeapUsed / (1024 * 1024)).toFixed(1)} MB`,
);
} else {
harness.assertWithinBaseline(result);
}
});
it('simple-prompt-response: memory usage within baseline', async () => {
rig = new TestRig();
rig.setup('memory-simple-prompt', {
fakeResponsesPath: join(__dirname, 'memory.simple-prompt.responses'),
});
const result = await harness.runScenario(
'simple-prompt-response',
async (recordSnapshot) => {
await rig.run({
args: ['What is the capital of France?'],
timeout: 120000,
env: TEST_ENV,
});
await recordSnapshot('after-response');
},
);
if (UPDATE_BASELINES) {
harness.updateScenarioBaseline(result);
console.log(
`Updated baseline for simple-prompt-response: ${(result.finalHeapUsed / (1024 * 1024)).toFixed(1)} MB`,
);
} else {
harness.assertWithinBaseline(result);
}
});
it('multi-turn-conversation: memory remains stable over turns', async () => {
rig = new TestRig();
rig.setup('memory-multi-turn', {
fakeResponsesPath: join(__dirname, 'memory.multi-turn.responses'),
});
const prompts = [
'Hello, what can you help me with?',
'Tell me about JavaScript',
'How is TypeScript different?',
'Can you write a simple TypeScript function?',
'What are some TypeScript best practices?',
];
const result = await harness.runScenario(
'multi-turn-conversation',
async (recordSnapshot) => {
// Run through all turns as a piped sequence
const stdinContent = prompts.join('\n');
await rig.run({
stdin: stdinContent,
timeout: 120000,
env: TEST_ENV,
});
// Take snapshots after the conversation completes
await recordSnapshot('after-all-turns');
},
);
if (UPDATE_BASELINES) {
harness.updateScenarioBaseline(result);
console.log(
`Updated baseline for multi-turn-conversation: ${(result.finalHeapUsed / (1024 * 1024)).toFixed(1)} MB`,
);
} else {
harness.assertWithinBaseline(result);
}
});
it('multi-function-call-repo-search: memory after tool use', async () => {
rig = new TestRig();
rig.setup('memory-multi-func-call', {
fakeResponsesPath: join(
__dirname,
'memory.multi-function-call.responses',
),
});
// Create directories first, then files in the workspace so the tools have targets
rig.mkdir('packages/core/src/telemetry');
rig.createFile(
'packages/core/src/telemetry/memory-monitor.ts',
'export class MemoryMonitor { constructor() {} }',
);
rig.createFile(
'packages/core/src/telemetry/metrics.ts',
'export function recordMemoryUsage() {}',
);
const result = await harness.runScenario(
'multi-function-call-repo-search',
async (recordSnapshot) => {
await rig.run({
args: [
'Search this repository for MemoryMonitor and tell me what it does',
],
timeout: 120000,
env: TEST_ENV,
});
await recordSnapshot('after-tool-calls');
},
);
if (UPDATE_BASELINES) {
harness.updateScenarioBaseline(result);
console.log(
`Updated baseline for multi-function-call-repo-search: ${(result.finalHeapUsed / (1024 * 1024)).toFixed(1)} MB`,
);
} else {
harness.assertWithinBaseline(result);
}
});
});

View File

@@ -0,0 +1,2 @@
{"method":"generateContent","response":{"candidates":[{"content":{"parts":[{"text":"0"}],"role":"model"},"finishReason":"STOP","index":0}]}}
{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"Hello! I'm ready to help. What would you like to work on?"}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":5,"candidatesTokenCount":12,"totalTokenCount":17,"promptTokensDetails":[{"modality":"TEXT","tokenCount":5}]}}]}

View File

@@ -0,0 +1,4 @@
{"method":"generateContent","response":{"candidates":[{"content":{"parts":[{"text":"0"}],"role":"model"},"finishReason":"STOP","index":0}]}}
{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"I'll search for MemoryMonitor in the repository and analyze what it does."}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":30,"candidatesTokenCount":15,"totalTokenCount":45,"promptTokensDetails":[{"modality":"TEXT","tokenCount":30}]}}]}
{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"functionCall":{"name":"grep_search","args":{"pattern":"MemoryMonitor","path":".","include_pattern":"*.ts"}}},{"functionCall":{"name":"list_directory","args":{"path":"packages/core/src/telemetry"}}},{"functionCall":{"name":"read_file","args":{"file_path":"packages/core/src/telemetry/memory-monitor.ts"}}}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":30,"candidatesTokenCount":80,"totalTokenCount":110,"promptTokensDetails":[{"modality":"TEXT","tokenCount":30}]}}]}
{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"I found the memory monitoring code. Here's a summary:\n\nThe `MemoryMonitor` class in `packages/core/src/telemetry/memory-monitor.ts` provides:\n\n1. **Continuous monitoring** via `start()`/`stop()` with configurable intervals\n2. **V8 heap snapshots** using `v8.getHeapStatistics()` and `process.memoryUsage()`\n3. **High-water mark tracking** to detect significant memory growth\n4. **Rate-limited recording** to avoid metric flood\n5. **Activity detection** — only records when user is active\n\nThe class uses a singleton pattern via `initializeMemoryMonitor()` for global access."}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":500,"candidatesTokenCount":120,"totalTokenCount":620,"promptTokensDetails":[{"modality":"TEXT","tokenCount":500}]}}]}

View File

@@ -0,0 +1,10 @@
{"method":"generateContent","response":{"candidates":[{"content":{"parts":[{"text":"0"}],"role":"model"},"finishReason":"STOP","index":0}]}}
{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"Hello! I'm ready to help you with your coding tasks. What would you like to work on today?"}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":5,"candidatesTokenCount":18,"totalTokenCount":23,"promptTokensDetails":[{"modality":"TEXT","tokenCount":5}]}}]}
{"method":"generateContent","response":{"candidates":[{"content":{"parts":[{"text":"0"}],"role":"model"},"finishReason":"STOP","index":0}]}}
{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"JavaScript is a high-level, interpreted programming language. It was originally designed for adding interactivity to web pages."}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":25,"candidatesTokenCount":60,"totalTokenCount":85,"promptTokensDetails":[{"modality":"TEXT","tokenCount":25}]}}]}
{"method":"generateContent","response":{"candidates":[{"content":{"parts":[{"text":"0"}],"role":"model"},"finishReason":"STOP","index":0}]}}
{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"TypeScript is a typed superset of JavaScript developed by Microsoft. The main differences from JavaScript are static typing and better tooling."}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":45,"candidatesTokenCount":80,"totalTokenCount":125,"promptTokensDetails":[{"modality":"TEXT","tokenCount":45}]}}]}
{"method":"generateContent","response":{"candidates":[{"content":{"parts":[{"text":"0"}],"role":"model"},"finishReason":"STOP","index":0}]}}
{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"Here is a simple TypeScript function:\n\nfunction greet(name: string): string { return `Hello, ${name}!`; }"}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":60,"candidatesTokenCount":55,"totalTokenCount":115,"promptTokensDetails":[{"modality":"TEXT","tokenCount":60}]}}]}
{"method":"generateContent","response":{"candidates":[{"content":{"parts":[{"text":"0"}],"role":"model"},"finishReason":"STOP","index":0}]}}
{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"Here are 5 key TypeScript best practices: Enable strict mode, prefer interfaces, use union types, leverage type inference, and use readonly."}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":75,"candidatesTokenCount":70,"totalTokenCount":145,"promptTokensDetails":[{"modality":"TEXT","tokenCount":75}]}}]}

View File

@@ -0,0 +1,2 @@
{"method":"generateContent","response":{"candidates":[{"content":{"parts":[{"text":"0"}],"role":"model"},"finishReason":"STOP","index":0}]}}
{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"The capital of France is Paris. It has been the capital since the 10th century and is known for iconic landmarks like the Eiffel Tower, the Louvre Museum, and Notre-Dame Cathedral. Paris is also the most populous city in France, with a metropolitan area population of over 12 million people."}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":7,"candidatesTokenCount":55,"totalTokenCount":62,"promptTokensDetails":[{"modality":"TEXT","tokenCount":7}]}}]}

View File

@@ -0,0 +1,12 @@
{
"extends": "../tsconfig.json",
"compilerOptions": {
"noEmit": true,
"allowJs": true
},
"include": ["**/*.ts"],
"references": [
{ "path": "../packages/core" },
{ "path": "../packages/test-utils" }
]
}

View File

@@ -0,0 +1,28 @@
/**
* @license
* Copyright 2026 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import { defineConfig } from 'vitest/config';
export default defineConfig({
test: {
testTimeout: 600000, // 10 minutes — memory profiling is slow
globalSetup: './globalSetup.ts',
reporters: ['default'],
include: ['**/*.test.ts'],
retry: 0, // No retries for memory tests — noise is handled by tolerance
fileParallelism: false, // Must run serially to avoid memory interference
pool: 'forks', // Use forks pool for --expose-gc support
poolOptions: {
forks: {
singleFork: true, // Single process for accurate per-test memory readings
execArgv: ['--expose-gc'], // Enable global.gc() for forced GC
},
},
env: {
GEMINI_TEST_TYPE: 'memory',
},
},
});