mirror of
https://github.com/google-gemini/gemini-cli.git
synced 2026-04-11 22:00:41 -07:00
feat(test-utils): add memory usage integration test harness (#24876)
This commit is contained in:
30
memory-tests/baselines.json
Normal file
30
memory-tests/baselines.json
Normal file
@@ -0,0 +1,30 @@
|
||||
{
|
||||
"version": 1,
|
||||
"updatedAt": "2026-04-08T01:21:58.770Z",
|
||||
"scenarios": {
|
||||
"multi-turn-conversation": {
|
||||
"heapUsedBytes": 120082704,
|
||||
"heapTotalBytes": 177586176,
|
||||
"rssBytes": 269172736,
|
||||
"timestamp": "2026-04-08T01:21:57.127Z"
|
||||
},
|
||||
"multi-function-call-repo-search": {
|
||||
"heapUsedBytes": 104644984,
|
||||
"heapTotalBytes": 111575040,
|
||||
"rssBytes": 204079104,
|
||||
"timestamp": "2026-04-08T01:21:58.770Z"
|
||||
},
|
||||
"idle-session-startup": {
|
||||
"heapUsedBytes": 119813672,
|
||||
"heapTotalBytes": 177061888,
|
||||
"rssBytes": 267943936,
|
||||
"timestamp": "2026-04-08T01:21:53.855Z"
|
||||
},
|
||||
"simple-prompt-response": {
|
||||
"heapUsedBytes": 119722064,
|
||||
"heapTotalBytes": 177324032,
|
||||
"rssBytes": 268812288,
|
||||
"timestamp": "2026-04-08T01:21:55.491Z"
|
||||
}
|
||||
}
|
||||
}
|
||||
71
memory-tests/globalSetup.ts
Normal file
71
memory-tests/globalSetup.ts
Normal file
@@ -0,0 +1,71 @@
|
||||
/**
|
||||
* @license
|
||||
* Copyright 2026 Google LLC
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
import { mkdir, readdir, rm } from 'node:fs/promises';
|
||||
import { join, dirname } from 'node:path';
|
||||
import { fileURLToPath } from 'node:url';
|
||||
import { canUseRipgrep } from '../packages/core/src/tools/ripGrep.js';
|
||||
|
||||
const __dirname = dirname(fileURLToPath(import.meta.url));
|
||||
const rootDir = join(__dirname, '..');
|
||||
const memoryTestsDir = join(rootDir, '.memory-tests');
|
||||
let runDir = '';
|
||||
|
||||
export async function setup() {
|
||||
runDir = join(memoryTestsDir, `${Date.now()}`);
|
||||
await mkdir(runDir, { recursive: true });
|
||||
|
||||
// Set the home directory to the test run directory to avoid conflicts
|
||||
// with the user's local config.
|
||||
process.env['HOME'] = runDir;
|
||||
if (process.platform === 'win32') {
|
||||
process.env['USERPROFILE'] = runDir;
|
||||
}
|
||||
process.env['GEMINI_CONFIG_DIR'] = join(runDir, '.gemini');
|
||||
|
||||
// Download ripgrep to avoid race conditions
|
||||
const available = await canUseRipgrep();
|
||||
if (!available) {
|
||||
throw new Error('Failed to download ripgrep binary');
|
||||
}
|
||||
|
||||
// Clean up old test runs, keeping the latest few for debugging
|
||||
try {
|
||||
const testRuns = await readdir(memoryTestsDir);
|
||||
if (testRuns.length > 3) {
|
||||
const oldRuns = testRuns.sort().slice(0, testRuns.length - 3);
|
||||
await Promise.all(
|
||||
oldRuns.map((oldRun) =>
|
||||
rm(join(memoryTestsDir, oldRun), {
|
||||
recursive: true,
|
||||
force: true,
|
||||
}),
|
||||
),
|
||||
);
|
||||
}
|
||||
} catch (e) {
|
||||
console.error('Error cleaning up old memory test runs:', e);
|
||||
}
|
||||
|
||||
process.env['INTEGRATION_TEST_FILE_DIR'] = runDir;
|
||||
process.env['GEMINI_CLI_INTEGRATION_TEST'] = 'true';
|
||||
process.env['GEMINI_FORCE_FILE_STORAGE'] = 'true';
|
||||
process.env['TELEMETRY_LOG_FILE'] = join(runDir, 'telemetry.log');
|
||||
process.env['VERBOSE'] = process.env['VERBOSE'] ?? 'false';
|
||||
|
||||
console.log(`\nMemory test output directory: ${runDir}`);
|
||||
}
|
||||
|
||||
export async function teardown() {
|
||||
// Cleanup unless KEEP_OUTPUT is set
|
||||
if (process.env['KEEP_OUTPUT'] !== 'true' && runDir) {
|
||||
try {
|
||||
await rm(runDir, { recursive: true, force: true });
|
||||
} catch (e) {
|
||||
console.warn('Failed to clean up memory test directory:', e);
|
||||
}
|
||||
}
|
||||
}
|
||||
185
memory-tests/memory-usage.test.ts
Normal file
185
memory-tests/memory-usage.test.ts
Normal file
@@ -0,0 +1,185 @@
|
||||
/**
|
||||
* @license
|
||||
* Copyright 2026 Google LLC
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
import { describe, it, beforeAll, afterAll, afterEach } from 'vitest';
|
||||
import { TestRig, MemoryTestHarness } from '@google/gemini-cli-test-utils';
|
||||
import { join, dirname } from 'node:path';
|
||||
import { fileURLToPath } from 'node:url';
|
||||
|
||||
const __dirname = dirname(fileURLToPath(import.meta.url));
|
||||
const BASELINES_PATH = join(__dirname, 'baselines.json');
|
||||
const UPDATE_BASELINES = process.env['UPDATE_MEMORY_BASELINES'] === 'true';
|
||||
const TOLERANCE_PERCENT = 10;
|
||||
|
||||
// Fake API key for tests using fake responses
|
||||
const TEST_ENV = { GEMINI_API_KEY: 'fake-memory-test-key' };
|
||||
|
||||
describe('Memory Usage Tests', () => {
|
||||
let harness: MemoryTestHarness;
|
||||
let rig: TestRig;
|
||||
|
||||
beforeAll(() => {
|
||||
harness = new MemoryTestHarness({
|
||||
baselinesPath: BASELINES_PATH,
|
||||
defaultTolerancePercent: TOLERANCE_PERCENT,
|
||||
gcCycles: 3,
|
||||
gcDelayMs: 100,
|
||||
sampleCount: 3,
|
||||
});
|
||||
});
|
||||
|
||||
afterEach(async () => {
|
||||
await rig.cleanup();
|
||||
});
|
||||
|
||||
afterAll(async () => {
|
||||
// Generate the summary report after all tests
|
||||
await harness.generateReport();
|
||||
});
|
||||
|
||||
it('idle-session-startup: memory usage within baseline', async () => {
|
||||
rig = new TestRig();
|
||||
rig.setup('memory-idle-startup', {
|
||||
fakeResponsesPath: join(__dirname, 'memory.idle-startup.responses'),
|
||||
});
|
||||
|
||||
const result = await harness.runScenario(
|
||||
'idle-session-startup',
|
||||
async (recordSnapshot) => {
|
||||
await rig.run({
|
||||
args: ['hello'],
|
||||
timeout: 120000,
|
||||
env: TEST_ENV,
|
||||
});
|
||||
|
||||
await recordSnapshot('after-startup');
|
||||
},
|
||||
);
|
||||
|
||||
if (UPDATE_BASELINES) {
|
||||
harness.updateScenarioBaseline(result);
|
||||
console.log(
|
||||
`Updated baseline for idle-session-startup: ${(result.finalHeapUsed / (1024 * 1024)).toFixed(1)} MB`,
|
||||
);
|
||||
} else {
|
||||
harness.assertWithinBaseline(result);
|
||||
}
|
||||
});
|
||||
|
||||
it('simple-prompt-response: memory usage within baseline', async () => {
|
||||
rig = new TestRig();
|
||||
rig.setup('memory-simple-prompt', {
|
||||
fakeResponsesPath: join(__dirname, 'memory.simple-prompt.responses'),
|
||||
});
|
||||
|
||||
const result = await harness.runScenario(
|
||||
'simple-prompt-response',
|
||||
async (recordSnapshot) => {
|
||||
await rig.run({
|
||||
args: ['What is the capital of France?'],
|
||||
timeout: 120000,
|
||||
env: TEST_ENV,
|
||||
});
|
||||
|
||||
await recordSnapshot('after-response');
|
||||
},
|
||||
);
|
||||
|
||||
if (UPDATE_BASELINES) {
|
||||
harness.updateScenarioBaseline(result);
|
||||
console.log(
|
||||
`Updated baseline for simple-prompt-response: ${(result.finalHeapUsed / (1024 * 1024)).toFixed(1)} MB`,
|
||||
);
|
||||
} else {
|
||||
harness.assertWithinBaseline(result);
|
||||
}
|
||||
});
|
||||
|
||||
it('multi-turn-conversation: memory remains stable over turns', async () => {
|
||||
rig = new TestRig();
|
||||
rig.setup('memory-multi-turn', {
|
||||
fakeResponsesPath: join(__dirname, 'memory.multi-turn.responses'),
|
||||
});
|
||||
|
||||
const prompts = [
|
||||
'Hello, what can you help me with?',
|
||||
'Tell me about JavaScript',
|
||||
'How is TypeScript different?',
|
||||
'Can you write a simple TypeScript function?',
|
||||
'What are some TypeScript best practices?',
|
||||
];
|
||||
|
||||
const result = await harness.runScenario(
|
||||
'multi-turn-conversation',
|
||||
async (recordSnapshot) => {
|
||||
// Run through all turns as a piped sequence
|
||||
const stdinContent = prompts.join('\n');
|
||||
await rig.run({
|
||||
stdin: stdinContent,
|
||||
timeout: 120000,
|
||||
env: TEST_ENV,
|
||||
});
|
||||
|
||||
// Take snapshots after the conversation completes
|
||||
await recordSnapshot('after-all-turns');
|
||||
},
|
||||
);
|
||||
|
||||
if (UPDATE_BASELINES) {
|
||||
harness.updateScenarioBaseline(result);
|
||||
console.log(
|
||||
`Updated baseline for multi-turn-conversation: ${(result.finalHeapUsed / (1024 * 1024)).toFixed(1)} MB`,
|
||||
);
|
||||
} else {
|
||||
harness.assertWithinBaseline(result);
|
||||
}
|
||||
});
|
||||
|
||||
it('multi-function-call-repo-search: memory after tool use', async () => {
|
||||
rig = new TestRig();
|
||||
rig.setup('memory-multi-func-call', {
|
||||
fakeResponsesPath: join(
|
||||
__dirname,
|
||||
'memory.multi-function-call.responses',
|
||||
),
|
||||
});
|
||||
|
||||
// Create directories first, then files in the workspace so the tools have targets
|
||||
rig.mkdir('packages/core/src/telemetry');
|
||||
rig.createFile(
|
||||
'packages/core/src/telemetry/memory-monitor.ts',
|
||||
'export class MemoryMonitor { constructor() {} }',
|
||||
);
|
||||
rig.createFile(
|
||||
'packages/core/src/telemetry/metrics.ts',
|
||||
'export function recordMemoryUsage() {}',
|
||||
);
|
||||
|
||||
const result = await harness.runScenario(
|
||||
'multi-function-call-repo-search',
|
||||
async (recordSnapshot) => {
|
||||
await rig.run({
|
||||
args: [
|
||||
'Search this repository for MemoryMonitor and tell me what it does',
|
||||
],
|
||||
timeout: 120000,
|
||||
env: TEST_ENV,
|
||||
});
|
||||
|
||||
await recordSnapshot('after-tool-calls');
|
||||
},
|
||||
);
|
||||
|
||||
if (UPDATE_BASELINES) {
|
||||
harness.updateScenarioBaseline(result);
|
||||
console.log(
|
||||
`Updated baseline for multi-function-call-repo-search: ${(result.finalHeapUsed / (1024 * 1024)).toFixed(1)} MB`,
|
||||
);
|
||||
} else {
|
||||
harness.assertWithinBaseline(result);
|
||||
}
|
||||
});
|
||||
});
|
||||
2
memory-tests/memory.idle-startup.responses
Normal file
2
memory-tests/memory.idle-startup.responses
Normal file
@@ -0,0 +1,2 @@
|
||||
{"method":"generateContent","response":{"candidates":[{"content":{"parts":[{"text":"0"}],"role":"model"},"finishReason":"STOP","index":0}]}}
|
||||
{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"Hello! I'm ready to help. What would you like to work on?"}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":5,"candidatesTokenCount":12,"totalTokenCount":17,"promptTokensDetails":[{"modality":"TEXT","tokenCount":5}]}}]}
|
||||
4
memory-tests/memory.multi-function-call.responses
Normal file
4
memory-tests/memory.multi-function-call.responses
Normal file
@@ -0,0 +1,4 @@
|
||||
{"method":"generateContent","response":{"candidates":[{"content":{"parts":[{"text":"0"}],"role":"model"},"finishReason":"STOP","index":0}]}}
|
||||
{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"I'll search for MemoryMonitor in the repository and analyze what it does."}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":30,"candidatesTokenCount":15,"totalTokenCount":45,"promptTokensDetails":[{"modality":"TEXT","tokenCount":30}]}}]}
|
||||
{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"functionCall":{"name":"grep_search","args":{"pattern":"MemoryMonitor","path":".","include_pattern":"*.ts"}}},{"functionCall":{"name":"list_directory","args":{"path":"packages/core/src/telemetry"}}},{"functionCall":{"name":"read_file","args":{"file_path":"packages/core/src/telemetry/memory-monitor.ts"}}}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":30,"candidatesTokenCount":80,"totalTokenCount":110,"promptTokensDetails":[{"modality":"TEXT","tokenCount":30}]}}]}
|
||||
{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"I found the memory monitoring code. Here's a summary:\n\nThe `MemoryMonitor` class in `packages/core/src/telemetry/memory-monitor.ts` provides:\n\n1. **Continuous monitoring** via `start()`/`stop()` with configurable intervals\n2. **V8 heap snapshots** using `v8.getHeapStatistics()` and `process.memoryUsage()`\n3. **High-water mark tracking** to detect significant memory growth\n4. **Rate-limited recording** to avoid metric flood\n5. **Activity detection** — only records when user is active\n\nThe class uses a singleton pattern via `initializeMemoryMonitor()` for global access."}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":500,"candidatesTokenCount":120,"totalTokenCount":620,"promptTokensDetails":[{"modality":"TEXT","tokenCount":500}]}}]}
|
||||
10
memory-tests/memory.multi-turn.responses
Normal file
10
memory-tests/memory.multi-turn.responses
Normal file
@@ -0,0 +1,10 @@
|
||||
{"method":"generateContent","response":{"candidates":[{"content":{"parts":[{"text":"0"}],"role":"model"},"finishReason":"STOP","index":0}]}}
|
||||
{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"Hello! I'm ready to help you with your coding tasks. What would you like to work on today?"}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":5,"candidatesTokenCount":18,"totalTokenCount":23,"promptTokensDetails":[{"modality":"TEXT","tokenCount":5}]}}]}
|
||||
{"method":"generateContent","response":{"candidates":[{"content":{"parts":[{"text":"0"}],"role":"model"},"finishReason":"STOP","index":0}]}}
|
||||
{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"JavaScript is a high-level, interpreted programming language. It was originally designed for adding interactivity to web pages."}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":25,"candidatesTokenCount":60,"totalTokenCount":85,"promptTokensDetails":[{"modality":"TEXT","tokenCount":25}]}}]}
|
||||
{"method":"generateContent","response":{"candidates":[{"content":{"parts":[{"text":"0"}],"role":"model"},"finishReason":"STOP","index":0}]}}
|
||||
{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"TypeScript is a typed superset of JavaScript developed by Microsoft. The main differences from JavaScript are static typing and better tooling."}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":45,"candidatesTokenCount":80,"totalTokenCount":125,"promptTokensDetails":[{"modality":"TEXT","tokenCount":45}]}}]}
|
||||
{"method":"generateContent","response":{"candidates":[{"content":{"parts":[{"text":"0"}],"role":"model"},"finishReason":"STOP","index":0}]}}
|
||||
{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"Here is a simple TypeScript function:\n\nfunction greet(name: string): string { return `Hello, ${name}!`; }"}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":60,"candidatesTokenCount":55,"totalTokenCount":115,"promptTokensDetails":[{"modality":"TEXT","tokenCount":60}]}}]}
|
||||
{"method":"generateContent","response":{"candidates":[{"content":{"parts":[{"text":"0"}],"role":"model"},"finishReason":"STOP","index":0}]}}
|
||||
{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"Here are 5 key TypeScript best practices: Enable strict mode, prefer interfaces, use union types, leverage type inference, and use readonly."}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":75,"candidatesTokenCount":70,"totalTokenCount":145,"promptTokensDetails":[{"modality":"TEXT","tokenCount":75}]}}]}
|
||||
2
memory-tests/memory.simple-prompt.responses
Normal file
2
memory-tests/memory.simple-prompt.responses
Normal file
@@ -0,0 +1,2 @@
|
||||
{"method":"generateContent","response":{"candidates":[{"content":{"parts":[{"text":"0"}],"role":"model"},"finishReason":"STOP","index":0}]}}
|
||||
{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"The capital of France is Paris. It has been the capital since the 10th century and is known for iconic landmarks like the Eiffel Tower, the Louvre Museum, and Notre-Dame Cathedral. Paris is also the most populous city in France, with a metropolitan area population of over 12 million people."}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":7,"candidatesTokenCount":55,"totalTokenCount":62,"promptTokensDetails":[{"modality":"TEXT","tokenCount":7}]}}]}
|
||||
12
memory-tests/tsconfig.json
Normal file
12
memory-tests/tsconfig.json
Normal file
@@ -0,0 +1,12 @@
|
||||
{
|
||||
"extends": "../tsconfig.json",
|
||||
"compilerOptions": {
|
||||
"noEmit": true,
|
||||
"allowJs": true
|
||||
},
|
||||
"include": ["**/*.ts"],
|
||||
"references": [
|
||||
{ "path": "../packages/core" },
|
||||
{ "path": "../packages/test-utils" }
|
||||
]
|
||||
}
|
||||
28
memory-tests/vitest.config.ts
Normal file
28
memory-tests/vitest.config.ts
Normal file
@@ -0,0 +1,28 @@
|
||||
/**
|
||||
* @license
|
||||
* Copyright 2026 Google LLC
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
import { defineConfig } from 'vitest/config';
|
||||
|
||||
export default defineConfig({
|
||||
test: {
|
||||
testTimeout: 600000, // 10 minutes — memory profiling is slow
|
||||
globalSetup: './globalSetup.ts',
|
||||
reporters: ['default'],
|
||||
include: ['**/*.test.ts'],
|
||||
retry: 0, // No retries for memory tests — noise is handled by tolerance
|
||||
fileParallelism: false, // Must run serially to avoid memory interference
|
||||
pool: 'forks', // Use forks pool for --expose-gc support
|
||||
poolOptions: {
|
||||
forks: {
|
||||
singleFork: true, // Single process for accurate per-test memory readings
|
||||
execArgv: ['--expose-gc'], // Enable global.gc() for forced GC
|
||||
},
|
||||
},
|
||||
env: {
|
||||
GEMINI_TEST_TYPE: 'memory',
|
||||
},
|
||||
},
|
||||
});
|
||||
Reference in New Issue
Block a user