Files
gemini-cli/memory-tests/memory-usage.test.ts
2026-04-10 18:47:23 +00:00

503 lines
14 KiB
TypeScript

/**
* @license
* Copyright 2026 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import { describe, it, beforeAll, afterAll, afterEach } from 'vitest';
import { TestRig, MemoryTestHarness } from '@google/gemini-cli-test-utils';
import { join, dirname } from 'node:path';
import { fileURLToPath } from 'node:url';
import {
createWriteStream,
copyFileSync,
readFileSync,
existsSync,
mkdirSync,
rmSync,
} from 'node:fs';
import { randomUUID } from 'node:crypto';
const __dirname = dirname(fileURLToPath(import.meta.url));
const BASELINES_PATH = join(__dirname, 'baselines.json');
const UPDATE_BASELINES = process.env['UPDATE_MEMORY_BASELINES'] === 'true';
const TOLERANCE_PERCENT = 10;
// Fake API key for tests using fake responses
const TEST_ENV = { GEMINI_API_KEY: 'fake-memory-test-key' };
describe('Memory Usage Tests', () => {
let harness: MemoryTestHarness;
let rig: TestRig;
beforeAll(() => {
harness = new MemoryTestHarness({
baselinesPath: BASELINES_PATH,
defaultTolerancePercent: TOLERANCE_PERCENT,
gcCycles: 3,
gcDelayMs: 100,
sampleCount: 3,
});
});
afterEach(async () => {
await rig.cleanup();
});
afterAll(async () => {
// Generate the summary report after all tests
await harness.generateReport();
});
it('idle-session-startup: memory usage within baseline', async () => {
rig = new TestRig();
rig.setup('memory-idle-startup', {
fakeResponsesPath: join(__dirname, 'memory.idle-startup.responses'),
});
const result = await harness.runScenario(
'idle-session-startup',
async (recordSnapshot) => {
await rig.run({
args: ['hello'],
timeout: 120000,
env: TEST_ENV,
});
await recordSnapshot('after-startup');
},
);
if (UPDATE_BASELINES) {
harness.updateScenarioBaseline(result);
console.log(
`Updated baseline for idle-session-startup: ${(result.finalHeapUsed / (1024 * 1024)).toFixed(1)} MB`,
);
} else {
harness.assertWithinBaseline(result);
}
});
it('simple-prompt-response: memory usage within baseline', async () => {
rig = new TestRig();
rig.setup('memory-simple-prompt', {
fakeResponsesPath: join(__dirname, 'memory.simple-prompt.responses'),
});
const result = await harness.runScenario(
'simple-prompt-response',
async (recordSnapshot) => {
await rig.run({
args: ['What is the capital of France?'],
timeout: 120000,
env: TEST_ENV,
});
await recordSnapshot('after-response');
},
);
if (UPDATE_BASELINES) {
harness.updateScenarioBaseline(result);
console.log(
`Updated baseline for simple-prompt-response: ${(result.finalHeapUsed / (1024 * 1024)).toFixed(1)} MB`,
);
} else {
harness.assertWithinBaseline(result);
}
});
it('multi-turn-conversation: memory remains stable over turns', async () => {
rig = new TestRig();
rig.setup('memory-multi-turn', {
fakeResponsesPath: join(__dirname, 'memory.multi-turn.responses'),
});
const prompts = [
'Hello, what can you help me with?',
'Tell me about JavaScript',
'How is TypeScript different?',
'Can you write a simple TypeScript function?',
'What are some TypeScript best practices?',
];
const result = await harness.runScenario(
'multi-turn-conversation',
async (recordSnapshot) => {
// Run through all turns as a piped sequence
const stdinContent = prompts.join('\n');
await rig.run({
stdin: stdinContent,
timeout: 120000,
env: TEST_ENV,
});
// Take snapshots after the conversation completes
await recordSnapshot('after-all-turns');
},
);
if (UPDATE_BASELINES) {
harness.updateScenarioBaseline(result);
console.log(
`Updated baseline for multi-turn-conversation: ${(result.finalHeapUsed / (1024 * 1024)).toFixed(1)} MB`,
);
} else {
harness.assertWithinBaseline(result);
}
});
it('multi-function-call-repo-search: memory after tool use', async () => {
rig = new TestRig();
rig.setup('memory-multi-func-call', {
fakeResponsesPath: join(
__dirname,
'memory.multi-function-call.responses',
),
});
// Create directories first, then files in the workspace so the tools have targets
rig.mkdir('packages/core/src/telemetry');
rig.createFile(
'packages/core/src/telemetry/memory-monitor.ts',
'export class MemoryMonitor { constructor() {} }',
);
rig.createFile(
'packages/core/src/telemetry/metrics.ts',
'export function recordMemoryUsage() {}',
);
const result = await harness.runScenario(
'multi-function-call-repo-search',
async (recordSnapshot) => {
await rig.run({
args: [
'Search this repository for MemoryMonitor and tell me what it does',
],
timeout: 120000,
env: TEST_ENV,
});
await recordSnapshot('after-tool-calls');
},
);
if (UPDATE_BASELINES) {
harness.updateScenarioBaseline(result);
console.log(
`Updated baseline for multi-function-call-repo-search: ${(result.finalHeapUsed / (1024 * 1024)).toFixed(1)} MB`,
);
} else {
harness.assertWithinBaseline(result);
}
});
describe('Large Chat Scenarios', () => {
let sharedResumeResponsesPath: string;
let sharedActiveResponsesPath: string;
let sharedHistoryPath: string;
let sharedPrompts: string;
let tempDir: string;
beforeAll(async () => {
tempDir = join(__dirname, `large-chat-tmp-${randomUUID()}`);
mkdirSync(tempDir, { recursive: true });
const { resumeResponsesPath, activeResponsesPath, historyPath, prompts } =
await generateSharedLargeChatData(tempDir);
sharedActiveResponsesPath = activeResponsesPath;
sharedResumeResponsesPath = resumeResponsesPath;
sharedHistoryPath = historyPath;
sharedPrompts = prompts;
}, 60000);
afterAll(() => {
if (existsSync(tempDir)) {
rmSync(tempDir, { recursive: true, force: true });
}
});
afterEach(async () => {
await rig.cleanup();
});
it('large-chat: memory usage within baseline', async () => {
rig = new TestRig();
rig.setup('memory-large-chat', {
fakeResponsesPath: sharedActiveResponsesPath,
});
const result = await harness.runScenario(
'large-chat',
async (recordSnapshot) => {
await rig.run({
stdin: sharedPrompts,
timeout: 600000,
env: TEST_ENV,
});
await recordSnapshot('after-large-chat');
},
);
if (UPDATE_BASELINES) {
harness.updateScenarioBaseline(result);
console.log(
`Updated baseline for large-chat: ${(result.finalHeapUsed / (1024 * 1024)).toFixed(1)} MB`,
);
} else {
harness.assertWithinBaseline(result);
}
});
it('resume-large-chat: memory usage within baseline', async () => {
rig = new TestRig();
rig.setup('memory-resume-large-chat', {
fakeResponsesPath: sharedResumeResponsesPath,
});
const result = await harness.runScenario(
'resume-large-chat',
async (recordSnapshot) => {
// Ensure the history file is linked
const targetChatsDir = join(
rig.testDir!,
'tmp',
'test-project-hash',
'chats',
);
mkdirSync(targetChatsDir, { recursive: true });
const targetHistoryPath = join(
targetChatsDir,
'large-chat-session.json',
);
if (existsSync(targetHistoryPath)) rmSync(targetHistoryPath);
copyFileSync(sharedHistoryPath, targetHistoryPath);
await rig.run({
// add a prompt to make sure it does not hang there and exits immediately
args: ['--resume', 'latest', '--prompt', 'hello'],
timeout: 600000,
env: TEST_ENV,
});
await recordSnapshot('after-resume-large-chat');
},
);
if (UPDATE_BASELINES) {
harness.updateScenarioBaseline(result);
console.log(
`Updated baseline for resume-large-chat: ${(result.finalHeapUsed / (1024 * 1024)).toFixed(1)} MB`,
);
} else {
harness.assertWithinBaseline(result);
}
});
it('resume-large-chat-with-messages: memory usage within baseline', async () => {
rig = new TestRig();
rig.setup('memory-resume-large-chat-msgs', {
fakeResponsesPath: sharedResumeResponsesPath,
});
const result = await harness.runScenario(
'resume-large-chat-with-messages',
async (recordSnapshot) => {
// Ensure the history file is linked
const targetChatsDir = join(
rig.testDir!,
'tmp',
'test-project-hash',
'chats',
);
mkdirSync(targetChatsDir, { recursive: true });
const targetHistoryPath = join(
targetChatsDir,
'large-chat-session.json',
);
if (existsSync(targetHistoryPath)) rmSync(targetHistoryPath);
copyFileSync(sharedHistoryPath, targetHistoryPath);
const stdinContent = 'new prompt 1\nnew prompt 2\n';
await rig.run({
args: ['--resume', 'latest'],
stdin: stdinContent,
timeout: 600000,
env: TEST_ENV,
});
await recordSnapshot('after-resume-and-append');
},
);
if (UPDATE_BASELINES) {
harness.updateScenarioBaseline(result);
console.log(
`Updated baseline for resume-large-chat-with-messages: ${(result.finalHeapUsed / (1024 * 1024)).toFixed(1)} MB`,
);
} else {
harness.assertWithinBaseline(result);
}
});
});
});
async function generateSharedLargeChatData(tempDir: string) {
const resumeResponsesPath = join(tempDir, 'large-chat-resume-chat.responses');
const activeResponsesPath = join(tempDir, 'large-chat-active-chat.responses');
const historyPath = join(tempDir, 'large-chat-history.json');
const sourceSessionPath = join(__dirname, 'large-chat-session.json');
const session = JSON.parse(readFileSync(sourceSessionPath, 'utf8'));
const messages = session.messages;
copyFileSync(sourceSessionPath, historyPath);
// Generate fake responses for active chat
const promptsList: string[] = [];
const activeResponsesStream = createWriteStream(activeResponsesPath);
const complexityResponse = {
method: 'generateContent',
response: {
candidates: [
{
content: {
parts: [
{
text: '{"complexity_reasoning":"simple","complexity_score":1}',
},
],
role: 'model',
},
finishReason: 'STOP',
index: 0,
},
],
},
};
const summaryResponse = {
method: 'generateContent',
response: {
candidates: [
{
content: {
parts: [
{ text: '{"originalSummary":"large chat summary","events":[]}' },
],
role: 'model',
},
finishReason: 'STOP',
index: 0,
},
],
},
};
for (let i = 0; i < messages.length; i++) {
const msg = messages[i];
if (msg.type === 'user') {
promptsList.push(msg.content[0].text);
// Start of a new turn
activeResponsesStream.write(JSON.stringify(complexityResponse) + '\n');
// Find all subsequent gemini messages until the next user message
let j = i + 1;
while (j < messages.length && messages[j].type === 'gemini') {
const geminiMsg = messages[j];
const parts = [];
if (geminiMsg.content) {
parts.push({ text: geminiMsg.content });
}
if (geminiMsg.toolCalls) {
for (const tc of geminiMsg.toolCalls) {
parts.push({
functionCall: {
name: tc.name,
args: tc.args,
},
});
}
}
activeResponsesStream.write(
JSON.stringify({
method: 'generateContentStream',
response: [
{
candidates: [
{
content: { parts, role: 'model' },
finishReason: 'STOP',
index: 0,
},
],
usageMetadata: {
promptTokenCount: 100,
candidatesTokenCount: 100,
totalTokenCount: 200,
promptTokensDetails: [{ modality: 'TEXT', tokenCount: 100 }],
},
},
],
}) + '\n',
);
j++;
}
// End of turn
activeResponsesStream.write(JSON.stringify(summaryResponse) + '\n');
// Skip the gemini messages we just processed
i = j - 1;
}
}
activeResponsesStream.end();
// Generate responses for resumed chat
const resumeResponsesStream = createWriteStream(resumeResponsesPath);
for (let i = 0; i < 5; i++) {
resumeResponsesStream.write(JSON.stringify(complexityResponse) + '\n');
resumeResponsesStream.write(
JSON.stringify({
method: 'generateContentStream',
response: [
{
candidates: [
{
content: {
parts: [{ text: `Resume response ${i}` }],
role: 'model',
},
finishReason: 'STOP',
index: 0,
},
],
usageMetadata: {
promptTokenCount: 10,
candidatesTokenCount: 10,
totalTokenCount: 20,
promptTokensDetails: [{ modality: 'TEXT', tokenCount: 10 }],
},
},
],
}) + '\n',
);
resumeResponsesStream.write(JSON.stringify(summaryResponse) + '\n');
}
resumeResponsesStream.end();
// Wait for streams to finish
await Promise.all([
new Promise((res) => activeResponsesStream.on('finish', res)),
new Promise((res) => resumeResponsesStream.on('finish', res)),
]);
return {
resumeResponsesPath,
activeResponsesPath,
historyPath,
prompts: promptsList.join('\n'),
};
}