feat(test): refactor the memory usage test to use metrics from CLI process instead of test runner (#25708)

This commit is contained in:
cynthialong0-0
2026-04-21 10:06:22 -07:00
committed by GitHub
parent 2c14954010
commit aee2cde1a3
5 changed files with 284 additions and 223 deletions
+36 -36
View File
@@ -1,55 +1,55 @@
{
"version": 1,
"updatedAt": "2026-04-10T15:36:04.547Z",
"updatedAt": "2026-04-20T18:04:59.671Z",
"scenarios": {
"multi-turn-conversation": {
"heapUsedBytes": 120082704,
"heapTotalBytes": 177586176,
"rssBytes": 269172736,
"externalBytes": 4304053,
"timestamp": "2026-04-10T15:35:17.603Z"
"heapUsedMB": 68.8,
"heapTotalMB": 91.2,
"rssMB": 215.4,
"externalMB": 93.8,
"timestamp": "2026-04-20T18:02:40.101Z"
},
"multi-function-call-repo-search": {
"heapUsedBytes": 104644984,
"heapTotalBytes": 111575040,
"rssBytes": 204079104,
"externalBytes": 4304053,
"timestamp": "2026-04-10T15:35:22.480Z"
"heapUsedMB": 73.5,
"heapTotalMB": 93.1,
"rssMB": 223.6,
"externalMB": 97.7,
"timestamp": "2026-04-20T18:02:42.032Z"
},
"idle-session-startup": {
"heapUsedBytes": 119813672,
"heapTotalBytes": 177061888,
"rssBytes": 267943936,
"externalBytes": 4304053,
"timestamp": "2026-04-10T15:35:08.035Z"
"heapUsedMB": 69.8,
"heapTotalMB": 92.4,
"rssMB": 217.4,
"externalMB": 93.8,
"timestamp": "2026-04-20T18:02:36.294Z"
},
"simple-prompt-response": {
"heapUsedBytes": 119722064,
"heapTotalBytes": 177324032,
"rssBytes": 268812288,
"externalBytes": 4304053,
"timestamp": "2026-04-10T15:35:12.770Z"
"heapUsedMB": 69.5,
"heapTotalMB": 92.4,
"rssMB": 216.1,
"externalMB": 93.8,
"timestamp": "2026-04-20T18:02:38.198Z"
},
"resume-large-chat-with-messages": {
"heapUsedBytes": 106545568,
"heapTotalBytes": 111509504,
"rssBytes": 202596352,
"externalBytes": 4306101,
"timestamp": "2026-04-10T15:36:04.547Z"
"heapUsedMB": 887.1,
"heapTotalMB": 954.3,
"rssMB": 1109.6,
"externalMB": 103.2,
"timestamp": "2026-04-20T18:04:59.671Z"
},
"resume-large-chat": {
"heapUsedBytes": 106513760,
"heapTotalBytes": 111509504,
"rssBytes": 202596352,
"externalBytes": 4306101,
"timestamp": "2026-04-10T15:35:59.528Z"
"heapUsedMB": 885.6,
"heapTotalMB": 955.6,
"rssMB": 1107.8,
"externalMB": 110.5,
"timestamp": "2026-04-20T18:04:06.526Z"
},
"large-chat": {
"heapUsedBytes": 106471568,
"heapTotalBytes": 111509504,
"rssBytes": 202596352,
"externalBytes": 4306101,
"timestamp": "2026-04-10T15:35:53.180Z"
"heapUsedMB": 158.5,
"heapTotalMB": 193,
"rssMB": 787.9,
"externalMB": 104,
"timestamp": "2026-04-20T18:03:12.486Z"
}
}
}
+30 -8
View File
@@ -16,15 +16,21 @@ import {
mkdirSync,
rmSync,
} from 'node:fs';
import { randomUUID } from 'node:crypto';
import { randomUUID, createHash } from 'node:crypto';
const __dirname = dirname(fileURLToPath(import.meta.url));
const BASELINES_PATH = join(__dirname, 'baselines.json');
const UPDATE_BASELINES = process.env['UPDATE_MEMORY_BASELINES'] === 'true';
function getProjectHash(projectRoot: string): string {
return createHash('sha256').update(projectRoot).digest('hex');
}
const TOLERANCE_PERCENT = 10;
// Fake API key for tests using fake responses
const TEST_ENV = { GEMINI_API_KEY: 'fake-memory-test-key' };
const TEST_ENV = {
GEMINI_API_KEY: 'fake-memory-test-key',
GEMINI_MEMORY_MONITOR_INTERVAL: '100',
};
describe('Memory Usage Tests', () => {
let harness: MemoryTestHarness;
@@ -56,6 +62,7 @@ describe('Memory Usage Tests', () => {
});
const result = await harness.runScenario(
rig,
'idle-session-startup',
async (recordSnapshot) => {
await rig.run({
@@ -85,6 +92,7 @@ describe('Memory Usage Tests', () => {
});
const result = await harness.runScenario(
rig,
'simple-prompt-response',
async (recordSnapshot) => {
await rig.run({
@@ -122,6 +130,7 @@ describe('Memory Usage Tests', () => {
];
const result = await harness.runScenario(
rig,
'multi-turn-conversation',
async (recordSnapshot) => {
// Run through all turns as a piped sequence
@@ -144,6 +153,9 @@ describe('Memory Usage Tests', () => {
);
} else {
harness.assertWithinBaseline(result);
harness.assertMemoryReturnsToBaseline(result.snapshots, 20);
const { leaked, message } = harness.analyzeSnapshots(result.snapshots);
if (leaked) console.warn(`${message}`);
}
});
@@ -168,6 +180,7 @@ describe('Memory Usage Tests', () => {
);
const result = await harness.runScenario(
rig,
'multi-function-call-repo-search',
async (recordSnapshot) => {
await rig.run({
@@ -189,6 +202,7 @@ describe('Memory Usage Tests', () => {
);
} else {
harness.assertWithinBaseline(result);
harness.assertMemoryReturnsToBaseline(result.snapshots, 20);
}
});
@@ -228,6 +242,7 @@ describe('Memory Usage Tests', () => {
});
const result = await harness.runScenario(
rig,
'large-chat',
async (recordSnapshot) => {
await rig.run({
@@ -257,19 +272,21 @@ describe('Memory Usage Tests', () => {
});
const result = await harness.runScenario(
rig,
'resume-large-chat',
async (recordSnapshot) => {
// Ensure the history file is linked
const targetChatsDir = join(
rig.testDir!,
rig.homeDir!,
'.gemini',
'tmp',
'test-project-hash',
getProjectHash(rig.testDir!),
'chats',
);
mkdirSync(targetChatsDir, { recursive: true });
const targetHistoryPath = join(
targetChatsDir,
'large-chat-session.json',
'session-large-chat.json',
);
if (existsSync(targetHistoryPath)) rmSync(targetHistoryPath);
copyFileSync(sharedHistoryPath, targetHistoryPath);
@@ -302,19 +319,21 @@ describe('Memory Usage Tests', () => {
});
const result = await harness.runScenario(
rig,
'resume-large-chat-with-messages',
async (recordSnapshot) => {
// Ensure the history file is linked
const targetChatsDir = join(
rig.testDir!,
rig.homeDir!,
'.gemini',
'tmp',
'test-project-hash',
getProjectHash(rig.testDir!),
'chats',
);
mkdirSync(targetChatsDir, { recursive: true });
const targetHistoryPath = join(
targetChatsDir,
'large-chat-session.json',
'session-large-chat.json',
);
if (existsSync(targetHistoryPath)) rmSync(targetHistoryPath);
copyFileSync(sharedHistoryPath, targetHistoryPath);
@@ -457,6 +476,9 @@ async function generateSharedLargeChatData(tempDir: string) {
// Generate responses for resumed chat
const resumeResponsesStream = createWriteStream(resumeResponsesPath);
for (let i = 0; i < 5; i++) {
// Doubling up on non-streaming responses to satisfy classifier and complexity checks
resumeResponsesStream.write(JSON.stringify(complexityResponse) + '\n');
resumeResponsesStream.write(JSON.stringify(summaryResponse) + '\n');
resumeResponsesStream.write(JSON.stringify(complexityResponse) + '\n');
resumeResponsesStream.write(
JSON.stringify({