feat(core): add large memory regression test (#25059)

2026-04-15 07:41:03 -07:00 · 2026-04-10 11:47:23 -07:00
parent 565eafc1ec
commit a74bb603c0
5 changed files with 833368 additions and 6 deletions
--- a/memory-tests/baselines.json
+++ b/memory-tests/baselines.json
@@ -1,30 +1,55 @@
 {
  "version": 1,
-  "updatedAt": "2026-04-08T01:21:58.770Z",
+  "updatedAt": "2026-04-10T15:36:04.547Z",
  "scenarios": {
    "multi-turn-conversation": {
      "heapUsedBytes": 120082704,
      "heapTotalBytes": 177586176,
      "rssBytes": 269172736,
-      "timestamp": "2026-04-08T01:21:57.127Z"
+      "externalBytes": 4304053,
+      "timestamp": "2026-04-10T15:35:17.603Z"
    },
    "multi-function-call-repo-search": {
      "heapUsedBytes": 104644984,
      "heapTotalBytes": 111575040,
      "rssBytes": 204079104,
-      "timestamp": "2026-04-08T01:21:58.770Z"
+      "externalBytes": 4304053,
+      "timestamp": "2026-04-10T15:35:22.480Z"
    },
    "idle-session-startup": {
      "heapUsedBytes": 119813672,
      "heapTotalBytes": 177061888,
      "rssBytes": 267943936,
-      "timestamp": "2026-04-08T01:21:53.855Z"
+      "externalBytes": 4304053,
+      "timestamp": "2026-04-10T15:35:08.035Z"
    },
    "simple-prompt-response": {
      "heapUsedBytes": 119722064,
      "heapTotalBytes": 177324032,
      "rssBytes": 268812288,
-      "timestamp": "2026-04-08T01:21:55.491Z"
+      "externalBytes": 4304053,
+      "timestamp": "2026-04-10T15:35:12.770Z"
+    },
+    "resume-large-chat-with-messages": {
+      "heapUsedBytes": 106545568,
+      "heapTotalBytes": 111509504,
+      "rssBytes": 202596352,
+      "externalBytes": 4306101,
+      "timestamp": "2026-04-10T15:36:04.547Z"
+    },
+    "resume-large-chat": {
+      "heapUsedBytes": 106513760,
+      "heapTotalBytes": 111509504,
+      "rssBytes": 202596352,
+      "externalBytes": 4306101,
+      "timestamp": "2026-04-10T15:35:59.528Z"
+    },
+    "large-chat": {
+      "heapUsedBytes": 106471568,
+      "heapTotalBytes": 111509504,
+      "rssBytes": 202596352,
+      "externalBytes": 4306101,
+      "timestamp": "2026-04-10T15:35:53.180Z"
    }
  }
 }
--- a/memory-tests/large-chat-session.json
+++ b/memory-tests/large-chat-session.json
--- a/memory-tests/memory-usage.test.ts
+++ b/memory-tests/memory-usage.test.ts
@@ -8,6 +8,15 @@ import { describe, it, beforeAll, afterAll, afterEach } from 'vitest';
 import { TestRig, MemoryTestHarness } from '@google/gemini-cli-test-utils';
 import { join, dirname } from 'node:path';
 import { fileURLToPath } from 'node:url';
+import {
+  createWriteStream,
+  copyFileSync,
+  readFileSync,
+  existsSync,
+  mkdirSync,
+  rmSync,
+} from 'node:fs';
+import { randomUUID } from 'node:crypto';

 const __dirname = dirname(fileURLToPath(import.meta.url));
 const BASELINES_PATH = join(__dirname, 'baselines.json');
@@ -182,4 +191,312 @@ describe('Memory Usage Tests', () => {
      harness.assertWithinBaseline(result);
    }
  });
+
+  describe('Large Chat Scenarios', () => {
+    let sharedResumeResponsesPath: string;
+    let sharedActiveResponsesPath: string;
+    let sharedHistoryPath: string;
+    let sharedPrompts: string;
+    let tempDir: string;
+
+    beforeAll(async () => {
+      tempDir = join(__dirname, `large-chat-tmp-${randomUUID()}`);
+      mkdirSync(tempDir, { recursive: true });
+
+      const { resumeResponsesPath, activeResponsesPath, historyPath, prompts } =
+        await generateSharedLargeChatData(tempDir);
+      sharedActiveResponsesPath = activeResponsesPath;
+      sharedResumeResponsesPath = resumeResponsesPath;
+      sharedHistoryPath = historyPath;
+      sharedPrompts = prompts;
+    }, 60000);
+
+    afterAll(() => {
+      if (existsSync(tempDir)) {
+        rmSync(tempDir, { recursive: true, force: true });
+      }
+    });
+
+    afterEach(async () => {
+      await rig.cleanup();
+    });
+
+    it('large-chat: memory usage within baseline', async () => {
+      rig = new TestRig();
+      rig.setup('memory-large-chat', {
+        fakeResponsesPath: sharedActiveResponsesPath,
+      });
+
+      const result = await harness.runScenario(
+        'large-chat',
+        async (recordSnapshot) => {
+          await rig.run({
+            stdin: sharedPrompts,
+            timeout: 600000,
+            env: TEST_ENV,
+          });
+
+          await recordSnapshot('after-large-chat');
+        },
+      );
+
+      if (UPDATE_BASELINES) {
+        harness.updateScenarioBaseline(result);
+        console.log(
+          `Updated baseline for large-chat: ${(result.finalHeapUsed / (1024 * 1024)).toFixed(1)} MB`,
+        );
+      } else {
+        harness.assertWithinBaseline(result);
+      }
+    });
+
+    it('resume-large-chat: memory usage within baseline', async () => {
+      rig = new TestRig();
+      rig.setup('memory-resume-large-chat', {
+        fakeResponsesPath: sharedResumeResponsesPath,
+      });
+
+      const result = await harness.runScenario(
+        'resume-large-chat',
+        async (recordSnapshot) => {
+          // Ensure the history file is linked
+          const targetChatsDir = join(
+            rig.testDir!,
+            'tmp',
+            'test-project-hash',
+            'chats',
+          );
+          mkdirSync(targetChatsDir, { recursive: true });
+          const targetHistoryPath = join(
+            targetChatsDir,
+            'large-chat-session.json',
+          );
+          if (existsSync(targetHistoryPath)) rmSync(targetHistoryPath);
+          copyFileSync(sharedHistoryPath, targetHistoryPath);
+
+          await rig.run({
+            // add a prompt to make sure it does not hang there and exits immediately
+            args: ['--resume', 'latest', '--prompt', 'hello'],
+            timeout: 600000,
+            env: TEST_ENV,
+          });
+
+          await recordSnapshot('after-resume-large-chat');
+        },
+      );
+
+      if (UPDATE_BASELINES) {
+        harness.updateScenarioBaseline(result);
+        console.log(
+          `Updated baseline for resume-large-chat: ${(result.finalHeapUsed / (1024 * 1024)).toFixed(1)} MB`,
+        );
+      } else {
+        harness.assertWithinBaseline(result);
+      }
+    });
+
+    it('resume-large-chat-with-messages: memory usage within baseline', async () => {
+      rig = new TestRig();
+      rig.setup('memory-resume-large-chat-msgs', {
+        fakeResponsesPath: sharedResumeResponsesPath,
+      });
+
+      const result = await harness.runScenario(
+        'resume-large-chat-with-messages',
+        async (recordSnapshot) => {
+          // Ensure the history file is linked
+          const targetChatsDir = join(
+            rig.testDir!,
+            'tmp',
+            'test-project-hash',
+            'chats',
+          );
+          mkdirSync(targetChatsDir, { recursive: true });
+          const targetHistoryPath = join(
+            targetChatsDir,
+            'large-chat-session.json',
+          );
+          if (existsSync(targetHistoryPath)) rmSync(targetHistoryPath);
+          copyFileSync(sharedHistoryPath, targetHistoryPath);
+
+          const stdinContent = 'new prompt 1\nnew prompt 2\n';
+
+          await rig.run({
+            args: ['--resume', 'latest'],
+            stdin: stdinContent,
+            timeout: 600000,
+            env: TEST_ENV,
+          });
+
+          await recordSnapshot('after-resume-and-append');
+        },
+      );
+
+      if (UPDATE_BASELINES) {
+        harness.updateScenarioBaseline(result);
+        console.log(
+          `Updated baseline for resume-large-chat-with-messages: ${(result.finalHeapUsed / (1024 * 1024)).toFixed(1)} MB`,
+        );
+      } else {
+        harness.assertWithinBaseline(result);
+      }
+    });
+  });
 });
+
+async function generateSharedLargeChatData(tempDir: string) {
+  const resumeResponsesPath = join(tempDir, 'large-chat-resume-chat.responses');
+  const activeResponsesPath = join(tempDir, 'large-chat-active-chat.responses');
+  const historyPath = join(tempDir, 'large-chat-history.json');
+  const sourceSessionPath = join(__dirname, 'large-chat-session.json');
+
+  const session = JSON.parse(readFileSync(sourceSessionPath, 'utf8'));
+  const messages = session.messages;
+
+  copyFileSync(sourceSessionPath, historyPath);
+
+  // Generate fake responses for active chat
+  const promptsList: string[] = [];
+  const activeResponsesStream = createWriteStream(activeResponsesPath);
+  const complexityResponse = {
+    method: 'generateContent',
+    response: {
+      candidates: [
+        {
+          content: {
+            parts: [
+              {
+                text: '{"complexity_reasoning":"simple","complexity_score":1}',
+              },
+            ],
+            role: 'model',
+          },
+          finishReason: 'STOP',
+          index: 0,
+        },
+      ],
+    },
+  };
+  const summaryResponse = {
+    method: 'generateContent',
+    response: {
+      candidates: [
+        {
+          content: {
+            parts: [
+              { text: '{"originalSummary":"large chat summary","events":[]}' },
+            ],
+            role: 'model',
+          },
+          finishReason: 'STOP',
+          index: 0,
+        },
+      ],
+    },
+  };
+
+  for (let i = 0; i < messages.length; i++) {
+    const msg = messages[i];
+    if (msg.type === 'user') {
+      promptsList.push(msg.content[0].text);
+
+      // Start of a new turn
+      activeResponsesStream.write(JSON.stringify(complexityResponse) + '\n');
+
+      // Find all subsequent gemini messages until the next user message
+      let j = i + 1;
+      while (j < messages.length && messages[j].type === 'gemini') {
+        const geminiMsg = messages[j];
+        const parts = [];
+        if (geminiMsg.content) {
+          parts.push({ text: geminiMsg.content });
+        }
+        if (geminiMsg.toolCalls) {
+          for (const tc of geminiMsg.toolCalls) {
+            parts.push({
+              functionCall: {
+                name: tc.name,
+                args: tc.args,
+              },
+            });
+          }
+        }
+
+        activeResponsesStream.write(
+          JSON.stringify({
+            method: 'generateContentStream',
+            response: [
+              {
+                candidates: [
+                  {
+                    content: { parts, role: 'model' },
+                    finishReason: 'STOP',
+                    index: 0,
+                  },
+                ],
+                usageMetadata: {
+                  promptTokenCount: 100,
+                  candidatesTokenCount: 100,
+                  totalTokenCount: 200,
+                  promptTokensDetails: [{ modality: 'TEXT', tokenCount: 100 }],
+                },
+              },
+            ],
+          }) + '\n',
+        );
+        j++;
+      }
+      // End of turn
+      activeResponsesStream.write(JSON.stringify(summaryResponse) + '\n');
+      // Skip the gemini messages we just processed
+      i = j - 1;
+    }
+  }
+  activeResponsesStream.end();
+
+  // Generate responses for resumed chat
+  const resumeResponsesStream = createWriteStream(resumeResponsesPath);
+  for (let i = 0; i < 5; i++) {
+    resumeResponsesStream.write(JSON.stringify(complexityResponse) + '\n');
+    resumeResponsesStream.write(
+      JSON.stringify({
+        method: 'generateContentStream',
+        response: [
+          {
+            candidates: [
+              {
+                content: {
+                  parts: [{ text: `Resume response ${i}` }],
+                  role: 'model',
+                },
+                finishReason: 'STOP',
+                index: 0,
+              },
+            ],
+            usageMetadata: {
+              promptTokenCount: 10,
+              candidatesTokenCount: 10,
+              totalTokenCount: 20,
+              promptTokensDetails: [{ modality: 'TEXT', tokenCount: 10 }],
+            },
+          },
+        ],
+      }) + '\n',
+    );
+    resumeResponsesStream.write(JSON.stringify(summaryResponse) + '\n');
+  }
+  resumeResponsesStream.end();
+
+  // Wait for streams to finish
+  await Promise.all([
+    new Promise((res) => activeResponsesStream.on('finish', res)),
+    new Promise((res) => resumeResponsesStream.on('finish', res)),
+  ]);
+
+  return {
+    resumeResponsesPath,
+    activeResponsesPath,
+    historyPath,
+    prompts: promptsList.join('\n'),
+  };
+}
--- a/packages/test-utils/src/memory-baselines.ts
+++ b/packages/test-utils/src/memory-baselines.ts
@@ -13,6 +13,7 @@ export interface MemoryBaseline {
  heapUsedBytes: number;
  heapTotalBytes: number;
  rssBytes: number;
+  externalBytes: number;
  timestamp: string;
 }

@@ -63,6 +64,7 @@ export function updateBaseline(
    heapUsedBytes: number;
    heapTotalBytes: number;
    rssBytes: number;
+    externalBytes: number;
  },
 ): void {
  const baselines = loadBaselines(path);
@@ -70,6 +72,7 @@ export function updateBaseline(
    heapUsedBytes: measured.heapUsedBytes,
    heapTotalBytes: measured.heapTotalBytes,
    rssBytes: measured.rssBytes,
+    externalBytes: measured.externalBytes,
    timestamp: new Date().toISOString(),
  };
  saveBaselines(path, baselines);
--- a/packages/test-utils/src/memory-test-harness.ts
+++ b/packages/test-utils/src/memory-test-harness.ts
@@ -41,8 +41,10 @@ export interface MemoryTestResult {
  snapshots: MemorySnapshot[];
  peakHeapUsed: number;
  peakRss: number;
+  peakExternal: number;
  finalHeapUsed: number;
  finalRss: number;
+  finalExternal: number;
  baseline: MemoryBaseline | undefined;
  withinTolerance: boolean;
  deltaPercent: number;
@@ -207,13 +209,17 @@ export class MemoryTestHarness {
      withinTolerance = deltaPercent <= tolerance;
    }

+    const peakExternal = Math.max(...snapshots.map((s) => s.external));
+
    const result: MemoryTestResult = {
      scenarioName: name,
      snapshots,
      peakHeapUsed,
      peakRss,
+      peakExternal,
      finalHeapUsed: afterSnap.heapUsed,
      finalRss: afterSnap.rss,
+      finalExternal: afterSnap.external,
      baseline,
      withinTolerance,
      deltaPercent,
@@ -254,7 +260,8 @@ export class MemoryTestHarness {
          `  Baseline:  ${formatMB(result.baseline.heapUsedBytes)} heap used\n` +
          `  Delta:     ${deltaPercent.toFixed(1)}% (tolerance: ${tolerance}%)\n` +
          `  Peak heap: ${formatMB(result.peakHeapUsed)}\n` +
-          `  Peak RSS:  ${formatMB(result.peakRss)}`,
+          `  Peak RSS:  ${formatMB(result.peakRss)}\n` +
+          `  Peak External:  ${formatMB(result.peakExternal)}`,
      );
    }
  }
@@ -268,6 +275,7 @@ export class MemoryTestHarness {
      heapTotalBytes:
        result.snapshots[result.snapshots.length - 1]?.heapTotal ?? 0,
      rssBytes: result.finalRss,
+      externalBytes: result.finalExternal,
    });
    // Reload baselines after update
    this.baselines = loadBaselines(this.baselinesPath);