diff --git a/memor-prd.md b/memor-prd.md new file mode 100644 index 0000000000..ab4d017b5a --- /dev/null +++ b/memor-prd.md @@ -0,0 +1,173 @@ +# Product Requirements Document (PRD): Gemini CLI Memory Optimization + +## 1. Objective + +Reduce the memory footprint of `gemini-cli` during long-running sessions +(multi-hour) from the current peak of ~2GB down to a sustainable baseline (e.g., +< 500MB), without degrading existing functionality, user experience, or context +awareness. + +## 2. Problem Statement + +Users experience high memory consumption (up to 2GB) when running `gemini-cli` +for extended periods. High memory usage leads to sluggish terminal +responsiveness, system swapping, increased GC (Garbage Collection) pauses, and +eventually OOM (Out of Memory) crashes. Node.js applications that retain large +amounts of execution history, tool results (like large shell outputs or file +reads), and conversational context in memory often suffer from "soft memory +leaks" (unbounded data growth). + +## 3. Scope + +**In Scope:** + +- Analyzing and profiling the memory usage of the `@google/gemini-cli-core` and + `@google/gemini-cli` packages. +- Identifying and resolving memory leaks (e.g., un-deregistered event + listeners). +- Implementing bounded memory for unbounded data structures (e.g., chat history, + activity logs, tool execution results). +- Optimizing data serialization/deserialization and large string handling. +- Creating automated memory profiling scripts and validation workflows. + +**Out of Scope:** + +- Rewriting the CLI in another language (e.g., Rust/Go). +- Removing core features or aggressively truncating the LLM context window + (unless specifically configured by the user). + +## 4. Key Results & Metrics + +- **Peak Memory Usage:** Reduce peak memory usage (`RSS`) during a 4-hour + simulated session from ~2.0GB to < 500MB. +- **Baseline Memory:** Ensure baseline memory after forced garbage collection + remains flat (does not grow linearly with the number of turns). +- **Quality Gates:** 100% of existing unit, integration (E2E), and preflight + tests (`npm run preflight`) must pass. + +## 5. Technical Approach & Hypotheses + +1. **Unbounded History Retention:** The agent's session history stores full + payloads of every tool execution (e.g., `read_file` of a 5MB file, or verbose + `run_shell_command` outputs). + - _Mitigation:_ Implement aggressive in-memory truncation for older turns + that are no longer sent to the model, or offload historical payloads to + temporary disk files. +2. **React/Ink Memory Leaks in CLI UI:** Unmounted Ink components might not be + garbage collected if references are held in global state, context providers, + or event listeners. + - _Mitigation:_ Audit `useEffect` cleanup functions and global event listener + deregistration in UI components. +3. **DevTools / Logger Retention:** The `activityLogger.ts` or telemetry systems + might buffer unbounded amounts of events in memory before flushing. + - _Mitigation:_ Ensure logs are streamed directly to disk or the WebSocket + without retaining a massive ring buffer in memory. + +## 6. Testing & Validation Strategy + +To validate memory usage, we must simulate a heavy session, measure memory, and +ensure correctness. + +### 6.1 Creating the Memory Profiling Script + +Create a script `scripts/simulate-long-session.ts` to programmatically drive the +CLI and measure memory growth. + +```typescript +// scripts/simulate-long-session.ts +import { exec } from 'child_process'; +import * as v8 from 'v8'; +import * as fs from 'fs'; + +// Helper to force GC if run with --expose-gc +const runGC = () => { + if (global.gc) { + global.gc(); + } +}; + +const printMemory = (turn: number) => { + runGC(); + const usage = process.memoryUsage(); + console.log(`Turn ${turn} - RSS: ${(usage.rss / 1024 / 1024).toFixed(2)} MB, HeapUsed: ${(usage.heapUsed / 1024 / 1024).toFixed(2)} MB`); +}; + +async function runSimulation() { + console.log("Starting memory simulation..."); + // Simulate 100 heavy turns + for (let i = 1; i <= 100; i++) { + // Inject mock messages or trigger SDK agent actions here + // e.g. agent.processInput("Read a large file and summarize it") + + // Simulate heavy string allocation + const dummyData = "A".repeat(1024 * 1024 * 10); // 10MB dummy data + + printMemory(i); + + // Periodically take heap snapshots + if (i % 25 === 0) { + const snapshotName = \`heap-snapshot-turn-\${i}.heapsnapshot\`; + v8.writeHeapSnapshot(snapshotName); + console.log(\`Saved \${snapshotName}\`); + } + } +} + +runSimulation(); +``` + +### 6.2 Steps to Validate Memory Usage + +1. **Establish the Baseline:** + - Run the simulation script on the `main` branch to capture the baseline + metrics. + - `NODE_OPTIONS="--expose-gc" npx tsx scripts/simulate-long-session.ts` +2. **Heap Snapshot Analysis:** + - Run the CLI manually with the inspector enabled: `npm run debug` (or + `NODE_OPTIONS="--inspect" npm start`). + - Open Chrome DevTools (`chrome://inspect`). + - Take a baseline heap snapshot at startup. + - Run heavy tasks (e.g., `read_file` on large files, `run_shell_command` with + huge outputs). + - Take a second heap snapshot. + - Compare the two snapshots in DevTools. Look for retained objects, detached + DOM nodes (Ink elements), or massive string allocations. +3. **Verify the Fixes:** + - Apply the memory optimizations. + - Re-run the simulation script. The printed `HeapUsed` and `RSS` should + flatline after a certain number of turns rather than growing linearly. + - Compare the final heap snapshot size to the baseline. + +### 6.3 Ensuring Build and Tests Pass + +Memory optimization can inadvertently break functionality if data is truncated +too aggressively. + +1. **Run Targeted Tests:** During development, verify core logic using targeted + tests: + - `npm test -w @google/gemini-cli-core` + - `npm run test:e2e` +2. **Run the Preflight Checks:** Before creating a PR, run the exhaustive + validation suite to ensure no regressions: + - `npm run preflight` +3. **E2E Validation:** The existing E2E tests + (`packages/cli/integration-tests/`) will verify that the CLI still behaves + correctly from a user's perspective, ensuring that history truncation or + memory offloading doesn't break multi-turn context. + +## 7. Execution Plan + +- [x] **Phase 1: Instrumentation & Baselines** + - [x] Implement `scripts/simulate-long-session.ts` or add an eval script. + - [x] Capture baseline memory metrics and initial heap snapshots. + +2. **Phase 2: Analysis & Implementation** + - Identify the top 3 memory retainers using Chrome DevTools. + - Implement bounded retention (e.g., capping array sizes in memory, + offloading heavy execution logs to the `.gemini/history` temp files). + - Audit React/Ink components for event listener leaks. +3. **Phase 3: Validation & CI** + - Run E2E tests to ensure behavioral parity. + - Run `npm run preflight`. + - Consider adding a lightweight memory-growth check to the CI pipeline to + prevent future regressions. diff --git a/progress.txt b/progress.txt new file mode 100644 index 0000000000..a16b1ab9c3 --- /dev/null +++ b/progress.txt @@ -0,0 +1 @@ +2026-04-03: Completed Phase 1. Implemented scripts/simulate-long-session.ts to reproduce memory growth. Identified ChatRecordingService and GeminiChat history as primary growth sources. Captured baseline metrics showing ~180MB growth per 200MB of tool output data. diff --git a/scripts/simulate-long-session.ts b/scripts/simulate-long-session.ts new file mode 100644 index 0000000000..f692a1f442 --- /dev/null +++ b/scripts/simulate-long-session.ts @@ -0,0 +1,117 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import * as crypto from 'node:crypto'; +import { GeminiChat } from '../packages/core/src/core/geminiChat.js'; +import { Config } from '../packages/core/src/config/config.js'; +import { ToolRegistry } from '../packages/core/src/tools/tool-registry.js'; +import { MessageBus } from '../packages/core/src/confirmation-bus/message-bus.js'; +import { PromptRegistry } from '../packages/core/src/prompts/prompt-registry.js'; +import { ResourceRegistry } from '../packages/core/src/resources/resource-registry.js'; +import { NoopSandboxManager } from '../packages/core/src/services/sandboxManager.js'; +import type { AgentLoopContext } from '../packages/core/src/config/agent-loop-context.js'; + +// Helper to force GC if run with --expose-gc +const runGC = () => { + if (global.gc) { + global.gc(); + } +}; + +const printMemory = (turn: number) => { + runGC(); + const usage = process.memoryUsage(); + console.log( + `Turn ${turn} - RSS: ${(usage.rss / 1024 / 1024).toFixed(2)} MB, ` + + `HeapUsed: ${(usage.heapUsed / 1024 / 1024).toFixed(2)} MB, ` + + `External: ${(usage.external / 1024 / 1024).toFixed(2)} MB`, + ); +}; + +async function runReproduction() { + console.log( + 'Starting memory growth reproduction (ChatRecordingService focus)...', + ); + + const config = new Config({ + sessionId: 'reproduction-session', + targetDir: process.cwd(), + cwd: process.cwd(), + debugMode: false, + model: 'gemini-2.0-flash', + }); + await config.initialize(); + + const context: AgentLoopContext = { + config, + promptId: 'reproduction-session', + toolRegistry: new ToolRegistry(config), + promptRegistry: new PromptRegistry(), + resourceRegistry: new ResourceRegistry(), + messageBus: new MessageBus(), + sandboxManager: new NoopSandboxManager(), + // eslint-disable-next-line @typescript-eslint/no-explicit-any + geminiClient: null as any, + }; + + const chat = new GeminiChat(context); + + for (let i = 1; i <= 200; i++) { + const LARGE_STRING = crypto.randomBytes(512 * 1024).toString('hex'); // 1MB string + // 1. User message + chat.addHistory({ + role: 'user', + parts: [{ text: `Turn ${i}: Get the large data.` }], + }); + + // 2. Model message with tool call + chat.addHistory({ + role: 'model', + parts: [ + { + functionCall: { + name: 'get_large_data', + args: {}, + }, + }, + ], + }); + + // 3. User message with tool response (LARGE) + chat.addHistory({ + role: 'user', + parts: [ + { + functionResponse: { + name: 'get_large_data', + response: { output: LARGE_STRING }, + }, + }, + ], + }); + + // 4. Model message with final text + chat.addHistory({ + role: 'model', + parts: [{ text: 'I have processed the large data.' }], + }); + + // Trigger ChatRecordingService update (as GeminiClient does) + await chat + .getChatRecordingService() + ?.updateMessagesFromHistory(chat.getHistory()); + + if (i % 20 === 0) { + const history = chat.getHistory(); + console.log(`Turn ${i}: History size: ${history.length}`); + printMemory(i); + } + } + + console.log('Reproduction complete.'); +} + +runReproduction().catch(console.error);