mirror of
https://github.com/google-gemini/gemini-cli.git
synced 2026-04-15 07:41:03 -07:00
503 lines
14 KiB
TypeScript
503 lines
14 KiB
TypeScript
/**
|
|
* @license
|
|
* Copyright 2026 Google LLC
|
|
* SPDX-License-Identifier: Apache-2.0
|
|
*/
|
|
|
|
import { describe, it, beforeAll, afterAll, afterEach } from 'vitest';
|
|
import { TestRig, MemoryTestHarness } from '@google/gemini-cli-test-utils';
|
|
import { join, dirname } from 'node:path';
|
|
import { fileURLToPath } from 'node:url';
|
|
import {
|
|
createWriteStream,
|
|
copyFileSync,
|
|
readFileSync,
|
|
existsSync,
|
|
mkdirSync,
|
|
rmSync,
|
|
} from 'node:fs';
|
|
import { randomUUID } from 'node:crypto';
|
|
|
|
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
const BASELINES_PATH = join(__dirname, 'baselines.json');
|
|
const UPDATE_BASELINES = process.env['UPDATE_MEMORY_BASELINES'] === 'true';
|
|
const TOLERANCE_PERCENT = 10;
|
|
|
|
// Fake API key for tests using fake responses
|
|
const TEST_ENV = { GEMINI_API_KEY: 'fake-memory-test-key' };
|
|
|
|
describe('Memory Usage Tests', () => {
|
|
let harness: MemoryTestHarness;
|
|
let rig: TestRig;
|
|
|
|
beforeAll(() => {
|
|
harness = new MemoryTestHarness({
|
|
baselinesPath: BASELINES_PATH,
|
|
defaultTolerancePercent: TOLERANCE_PERCENT,
|
|
gcCycles: 3,
|
|
gcDelayMs: 100,
|
|
sampleCount: 3,
|
|
});
|
|
});
|
|
|
|
afterEach(async () => {
|
|
await rig.cleanup();
|
|
});
|
|
|
|
afterAll(async () => {
|
|
// Generate the summary report after all tests
|
|
await harness.generateReport();
|
|
});
|
|
|
|
it('idle-session-startup: memory usage within baseline', async () => {
|
|
rig = new TestRig();
|
|
rig.setup('memory-idle-startup', {
|
|
fakeResponsesPath: join(__dirname, 'memory.idle-startup.responses'),
|
|
});
|
|
|
|
const result = await harness.runScenario(
|
|
'idle-session-startup',
|
|
async (recordSnapshot) => {
|
|
await rig.run({
|
|
args: ['hello'],
|
|
timeout: 120000,
|
|
env: TEST_ENV,
|
|
});
|
|
|
|
await recordSnapshot('after-startup');
|
|
},
|
|
);
|
|
|
|
if (UPDATE_BASELINES) {
|
|
harness.updateScenarioBaseline(result);
|
|
console.log(
|
|
`Updated baseline for idle-session-startup: ${(result.finalHeapUsed / (1024 * 1024)).toFixed(1)} MB`,
|
|
);
|
|
} else {
|
|
harness.assertWithinBaseline(result);
|
|
}
|
|
});
|
|
|
|
it('simple-prompt-response: memory usage within baseline', async () => {
|
|
rig = new TestRig();
|
|
rig.setup('memory-simple-prompt', {
|
|
fakeResponsesPath: join(__dirname, 'memory.simple-prompt.responses'),
|
|
});
|
|
|
|
const result = await harness.runScenario(
|
|
'simple-prompt-response',
|
|
async (recordSnapshot) => {
|
|
await rig.run({
|
|
args: ['What is the capital of France?'],
|
|
timeout: 120000,
|
|
env: TEST_ENV,
|
|
});
|
|
|
|
await recordSnapshot('after-response');
|
|
},
|
|
);
|
|
|
|
if (UPDATE_BASELINES) {
|
|
harness.updateScenarioBaseline(result);
|
|
console.log(
|
|
`Updated baseline for simple-prompt-response: ${(result.finalHeapUsed / (1024 * 1024)).toFixed(1)} MB`,
|
|
);
|
|
} else {
|
|
harness.assertWithinBaseline(result);
|
|
}
|
|
});
|
|
|
|
it('multi-turn-conversation: memory remains stable over turns', async () => {
|
|
rig = new TestRig();
|
|
rig.setup('memory-multi-turn', {
|
|
fakeResponsesPath: join(__dirname, 'memory.multi-turn.responses'),
|
|
});
|
|
|
|
const prompts = [
|
|
'Hello, what can you help me with?',
|
|
'Tell me about JavaScript',
|
|
'How is TypeScript different?',
|
|
'Can you write a simple TypeScript function?',
|
|
'What are some TypeScript best practices?',
|
|
];
|
|
|
|
const result = await harness.runScenario(
|
|
'multi-turn-conversation',
|
|
async (recordSnapshot) => {
|
|
// Run through all turns as a piped sequence
|
|
const stdinContent = prompts.join('\n');
|
|
await rig.run({
|
|
stdin: stdinContent,
|
|
timeout: 120000,
|
|
env: TEST_ENV,
|
|
});
|
|
|
|
// Take snapshots after the conversation completes
|
|
await recordSnapshot('after-all-turns');
|
|
},
|
|
);
|
|
|
|
if (UPDATE_BASELINES) {
|
|
harness.updateScenarioBaseline(result);
|
|
console.log(
|
|
`Updated baseline for multi-turn-conversation: ${(result.finalHeapUsed / (1024 * 1024)).toFixed(1)} MB`,
|
|
);
|
|
} else {
|
|
harness.assertWithinBaseline(result);
|
|
}
|
|
});
|
|
|
|
it('multi-function-call-repo-search: memory after tool use', async () => {
|
|
rig = new TestRig();
|
|
rig.setup('memory-multi-func-call', {
|
|
fakeResponsesPath: join(
|
|
__dirname,
|
|
'memory.multi-function-call.responses',
|
|
),
|
|
});
|
|
|
|
// Create directories first, then files in the workspace so the tools have targets
|
|
rig.mkdir('packages/core/src/telemetry');
|
|
rig.createFile(
|
|
'packages/core/src/telemetry/memory-monitor.ts',
|
|
'export class MemoryMonitor { constructor() {} }',
|
|
);
|
|
rig.createFile(
|
|
'packages/core/src/telemetry/metrics.ts',
|
|
'export function recordMemoryUsage() {}',
|
|
);
|
|
|
|
const result = await harness.runScenario(
|
|
'multi-function-call-repo-search',
|
|
async (recordSnapshot) => {
|
|
await rig.run({
|
|
args: [
|
|
'Search this repository for MemoryMonitor and tell me what it does',
|
|
],
|
|
timeout: 120000,
|
|
env: TEST_ENV,
|
|
});
|
|
|
|
await recordSnapshot('after-tool-calls');
|
|
},
|
|
);
|
|
|
|
if (UPDATE_BASELINES) {
|
|
harness.updateScenarioBaseline(result);
|
|
console.log(
|
|
`Updated baseline for multi-function-call-repo-search: ${(result.finalHeapUsed / (1024 * 1024)).toFixed(1)} MB`,
|
|
);
|
|
} else {
|
|
harness.assertWithinBaseline(result);
|
|
}
|
|
});
|
|
|
|
describe('Large Chat Scenarios', () => {
|
|
let sharedResumeResponsesPath: string;
|
|
let sharedActiveResponsesPath: string;
|
|
let sharedHistoryPath: string;
|
|
let sharedPrompts: string;
|
|
let tempDir: string;
|
|
|
|
beforeAll(async () => {
|
|
tempDir = join(__dirname, `large-chat-tmp-${randomUUID()}`);
|
|
mkdirSync(tempDir, { recursive: true });
|
|
|
|
const { resumeResponsesPath, activeResponsesPath, historyPath, prompts } =
|
|
await generateSharedLargeChatData(tempDir);
|
|
sharedActiveResponsesPath = activeResponsesPath;
|
|
sharedResumeResponsesPath = resumeResponsesPath;
|
|
sharedHistoryPath = historyPath;
|
|
sharedPrompts = prompts;
|
|
}, 60000);
|
|
|
|
afterAll(() => {
|
|
if (existsSync(tempDir)) {
|
|
rmSync(tempDir, { recursive: true, force: true });
|
|
}
|
|
});
|
|
|
|
afterEach(async () => {
|
|
await rig.cleanup();
|
|
});
|
|
|
|
it('large-chat: memory usage within baseline', async () => {
|
|
rig = new TestRig();
|
|
rig.setup('memory-large-chat', {
|
|
fakeResponsesPath: sharedActiveResponsesPath,
|
|
});
|
|
|
|
const result = await harness.runScenario(
|
|
'large-chat',
|
|
async (recordSnapshot) => {
|
|
await rig.run({
|
|
stdin: sharedPrompts,
|
|
timeout: 600000,
|
|
env: TEST_ENV,
|
|
});
|
|
|
|
await recordSnapshot('after-large-chat');
|
|
},
|
|
);
|
|
|
|
if (UPDATE_BASELINES) {
|
|
harness.updateScenarioBaseline(result);
|
|
console.log(
|
|
`Updated baseline for large-chat: ${(result.finalHeapUsed / (1024 * 1024)).toFixed(1)} MB`,
|
|
);
|
|
} else {
|
|
harness.assertWithinBaseline(result);
|
|
}
|
|
});
|
|
|
|
it('resume-large-chat: memory usage within baseline', async () => {
|
|
rig = new TestRig();
|
|
rig.setup('memory-resume-large-chat', {
|
|
fakeResponsesPath: sharedResumeResponsesPath,
|
|
});
|
|
|
|
const result = await harness.runScenario(
|
|
'resume-large-chat',
|
|
async (recordSnapshot) => {
|
|
// Ensure the history file is linked
|
|
const targetChatsDir = join(
|
|
rig.testDir!,
|
|
'tmp',
|
|
'test-project-hash',
|
|
'chats',
|
|
);
|
|
mkdirSync(targetChatsDir, { recursive: true });
|
|
const targetHistoryPath = join(
|
|
targetChatsDir,
|
|
'large-chat-session.json',
|
|
);
|
|
if (existsSync(targetHistoryPath)) rmSync(targetHistoryPath);
|
|
copyFileSync(sharedHistoryPath, targetHistoryPath);
|
|
|
|
await rig.run({
|
|
// add a prompt to make sure it does not hang there and exits immediately
|
|
args: ['--resume', 'latest', '--prompt', 'hello'],
|
|
timeout: 600000,
|
|
env: TEST_ENV,
|
|
});
|
|
|
|
await recordSnapshot('after-resume-large-chat');
|
|
},
|
|
);
|
|
|
|
if (UPDATE_BASELINES) {
|
|
harness.updateScenarioBaseline(result);
|
|
console.log(
|
|
`Updated baseline for resume-large-chat: ${(result.finalHeapUsed / (1024 * 1024)).toFixed(1)} MB`,
|
|
);
|
|
} else {
|
|
harness.assertWithinBaseline(result);
|
|
}
|
|
});
|
|
|
|
it('resume-large-chat-with-messages: memory usage within baseline', async () => {
|
|
rig = new TestRig();
|
|
rig.setup('memory-resume-large-chat-msgs', {
|
|
fakeResponsesPath: sharedResumeResponsesPath,
|
|
});
|
|
|
|
const result = await harness.runScenario(
|
|
'resume-large-chat-with-messages',
|
|
async (recordSnapshot) => {
|
|
// Ensure the history file is linked
|
|
const targetChatsDir = join(
|
|
rig.testDir!,
|
|
'tmp',
|
|
'test-project-hash',
|
|
'chats',
|
|
);
|
|
mkdirSync(targetChatsDir, { recursive: true });
|
|
const targetHistoryPath = join(
|
|
targetChatsDir,
|
|
'large-chat-session.json',
|
|
);
|
|
if (existsSync(targetHistoryPath)) rmSync(targetHistoryPath);
|
|
copyFileSync(sharedHistoryPath, targetHistoryPath);
|
|
|
|
const stdinContent = 'new prompt 1\nnew prompt 2\n';
|
|
|
|
await rig.run({
|
|
args: ['--resume', 'latest'],
|
|
stdin: stdinContent,
|
|
timeout: 600000,
|
|
env: TEST_ENV,
|
|
});
|
|
|
|
await recordSnapshot('after-resume-and-append');
|
|
},
|
|
);
|
|
|
|
if (UPDATE_BASELINES) {
|
|
harness.updateScenarioBaseline(result);
|
|
console.log(
|
|
`Updated baseline for resume-large-chat-with-messages: ${(result.finalHeapUsed / (1024 * 1024)).toFixed(1)} MB`,
|
|
);
|
|
} else {
|
|
harness.assertWithinBaseline(result);
|
|
}
|
|
});
|
|
});
|
|
});
|
|
|
|
async function generateSharedLargeChatData(tempDir: string) {
|
|
const resumeResponsesPath = join(tempDir, 'large-chat-resume-chat.responses');
|
|
const activeResponsesPath = join(tempDir, 'large-chat-active-chat.responses');
|
|
const historyPath = join(tempDir, 'large-chat-history.json');
|
|
const sourceSessionPath = join(__dirname, 'large-chat-session.json');
|
|
|
|
const session = JSON.parse(readFileSync(sourceSessionPath, 'utf8'));
|
|
const messages = session.messages;
|
|
|
|
copyFileSync(sourceSessionPath, historyPath);
|
|
|
|
// Generate fake responses for active chat
|
|
const promptsList: string[] = [];
|
|
const activeResponsesStream = createWriteStream(activeResponsesPath);
|
|
const complexityResponse = {
|
|
method: 'generateContent',
|
|
response: {
|
|
candidates: [
|
|
{
|
|
content: {
|
|
parts: [
|
|
{
|
|
text: '{"complexity_reasoning":"simple","complexity_score":1}',
|
|
},
|
|
],
|
|
role: 'model',
|
|
},
|
|
finishReason: 'STOP',
|
|
index: 0,
|
|
},
|
|
],
|
|
},
|
|
};
|
|
const summaryResponse = {
|
|
method: 'generateContent',
|
|
response: {
|
|
candidates: [
|
|
{
|
|
content: {
|
|
parts: [
|
|
{ text: '{"originalSummary":"large chat summary","events":[]}' },
|
|
],
|
|
role: 'model',
|
|
},
|
|
finishReason: 'STOP',
|
|
index: 0,
|
|
},
|
|
],
|
|
},
|
|
};
|
|
|
|
for (let i = 0; i < messages.length; i++) {
|
|
const msg = messages[i];
|
|
if (msg.type === 'user') {
|
|
promptsList.push(msg.content[0].text);
|
|
|
|
// Start of a new turn
|
|
activeResponsesStream.write(JSON.stringify(complexityResponse) + '\n');
|
|
|
|
// Find all subsequent gemini messages until the next user message
|
|
let j = i + 1;
|
|
while (j < messages.length && messages[j].type === 'gemini') {
|
|
const geminiMsg = messages[j];
|
|
const parts = [];
|
|
if (geminiMsg.content) {
|
|
parts.push({ text: geminiMsg.content });
|
|
}
|
|
if (geminiMsg.toolCalls) {
|
|
for (const tc of geminiMsg.toolCalls) {
|
|
parts.push({
|
|
functionCall: {
|
|
name: tc.name,
|
|
args: tc.args,
|
|
},
|
|
});
|
|
}
|
|
}
|
|
|
|
activeResponsesStream.write(
|
|
JSON.stringify({
|
|
method: 'generateContentStream',
|
|
response: [
|
|
{
|
|
candidates: [
|
|
{
|
|
content: { parts, role: 'model' },
|
|
finishReason: 'STOP',
|
|
index: 0,
|
|
},
|
|
],
|
|
usageMetadata: {
|
|
promptTokenCount: 100,
|
|
candidatesTokenCount: 100,
|
|
totalTokenCount: 200,
|
|
promptTokensDetails: [{ modality: 'TEXT', tokenCount: 100 }],
|
|
},
|
|
},
|
|
],
|
|
}) + '\n',
|
|
);
|
|
j++;
|
|
}
|
|
// End of turn
|
|
activeResponsesStream.write(JSON.stringify(summaryResponse) + '\n');
|
|
// Skip the gemini messages we just processed
|
|
i = j - 1;
|
|
}
|
|
}
|
|
activeResponsesStream.end();
|
|
|
|
// Generate responses for resumed chat
|
|
const resumeResponsesStream = createWriteStream(resumeResponsesPath);
|
|
for (let i = 0; i < 5; i++) {
|
|
resumeResponsesStream.write(JSON.stringify(complexityResponse) + '\n');
|
|
resumeResponsesStream.write(
|
|
JSON.stringify({
|
|
method: 'generateContentStream',
|
|
response: [
|
|
{
|
|
candidates: [
|
|
{
|
|
content: {
|
|
parts: [{ text: `Resume response ${i}` }],
|
|
role: 'model',
|
|
},
|
|
finishReason: 'STOP',
|
|
index: 0,
|
|
},
|
|
],
|
|
usageMetadata: {
|
|
promptTokenCount: 10,
|
|
candidatesTokenCount: 10,
|
|
totalTokenCount: 20,
|
|
promptTokensDetails: [{ modality: 'TEXT', tokenCount: 10 }],
|
|
},
|
|
},
|
|
],
|
|
}) + '\n',
|
|
);
|
|
resumeResponsesStream.write(JSON.stringify(summaryResponse) + '\n');
|
|
}
|
|
resumeResponsesStream.end();
|
|
|
|
// Wait for streams to finish
|
|
await Promise.all([
|
|
new Promise((res) => activeResponsesStream.on('finish', res)),
|
|
new Promise((res) => resumeResponsesStream.on('finish', res)),
|
|
]);
|
|
|
|
return {
|
|
resumeResponsesPath,
|
|
activeResponsesPath,
|
|
historyPath,
|
|
prompts: promptsList.join('\n'),
|
|
};
|
|
}
|