perf(core): fix OOM crash in long-running sessions (#19608)

Co-authored-by: Jacob Richman <jacob314@gmail.com>
2026-05-12 12:54:07 -07:00 · 2026-03-05 13:23:07 +09:00
parent 9dc6898d28
commit 3d4956aa57
4 changed files with 35 additions and 13 deletions
@@ -691,9 +691,13 @@ export class GeminiChat {
    const history = curated
      ? extractCuratedHistory(this.history)
      : this.history;
-    // Deep copy the history to avoid mutating the history outside of the
+    // Return a shallow copy of the array to prevent callers from mutating
-    // chat session.
+    // the internal history array (push/pop/splice). Content objects are
-    return structuredClone(history);
+    // shared references — callers MUST NOT mutate them in place.
    // This replaces a prior structuredClone() which deep-copied the entire
    // conversation on every call, causing O(n) memory pressure per turn
    // that compounded into OOM crashes in long-running sessions.
    return [...history];
  }
  /**
@@ -241,6 +241,7 @@ export class Turn {
  readonly pendingToolCalls: ToolCallRequestInfo[] = [];
  private debugResponses: GenerateContentResponse[] = [];
  private pendingCitations = new Set<string>();
  private cachedResponseText: string | undefined = undefined;
  finishReason: FinishReason | undefined = undefined;
  constructor(
@@ -432,11 +433,15 @@ export class Turn {
  /**
   * Get the concatenated response text from all responses in this turn.
   * This extracts and joins all text content from the model's responses.
   * The result is cached since this is called multiple times per turn.
   */
  getResponseText(): string {
-    return this.debugResponses
+    if (this.cachedResponseText === undefined) {
-      .map((response) => getResponseText(response))
+      this.cachedResponseText = this.debugResponses
-      .filter((text): text is string => text !== null)
+        .map((response) => getResponseText(response))
-      .join(' ');
+        .filter((text): text is string => text !== null)
        .join(' ');
    }
    return this.cachedResponseText;
  }
 }
@@ -232,6 +232,7 @@ type EventBacklogItem = {
 export class CoreEventEmitter extends EventEmitter<CoreEvents> {
  private _eventBacklog: EventBacklogItem[] = [];
  private _backlogHead = 0;
  private static readonly MAX_BACKLOG_SIZE = 10000;
  constructor() {
@@ -243,8 +244,17 @@ export class CoreEventEmitter extends EventEmitter<CoreEvents> {
    ...args: CoreEvents[K]
  ): void {
    if (this.listenerCount(event) === 0) {
-      if (this._eventBacklog.length >= CoreEventEmitter.MAX_BACKLOG_SIZE) {
+      const backlogSize = this._eventBacklog.length - this._backlogHead;
-        this._eventBacklog.shift();
+      if (backlogSize >= CoreEventEmitter.MAX_BACKLOG_SIZE) {
        // Evict oldest entry. Use a head pointer instead of shift() to avoid
        // O(n) array reindexing on every eviction at capacity.
        (this._eventBacklog as unknown[])[this._backlogHead] = undefined;
        this._backlogHead++;
        // Compact once dead entries exceed half capacity to bound memory
        if (this._backlogHead >= CoreEventEmitter.MAX_BACKLOG_SIZE / 2) {
          this._eventBacklog = this._eventBacklog.slice(this._backlogHead);
          this._backlogHead = 0;
        }
      }
      // eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion
      this._eventBacklog.push({ event, args } as EventBacklogItem);
@@ -391,9 +401,13 @@ export class CoreEventEmitter extends EventEmitter<CoreEvents> {
   * subscribes.
   */
  drainBacklogs(): void {
-    const backlog = [...this._eventBacklog];
+    const backlog = this._eventBacklog;
-    this._eventBacklog.length = 0; // Clear in-place
+    const head = this._backlogHead;
-    for (const item of backlog) {
+    this._eventBacklog = [];
    this._backlogHead = 0;
    for (let i = head; i < backlog.length; i++) {
      const item = backlog[i];
      if (item === undefined) continue;
      // eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion
      (this.emit as (event: keyof CoreEvents, ...args: unknown[]) => boolean)(
        item.event,
@@ -87,7 +87,6 @@ export async function checkNextSpeaker(
    lastComprehensiveMessage.parts &&
    lastComprehensiveMessage.parts.length === 0
  ) {
    lastComprehensiveMessage.parts.push({ text: '' });
    return {
      reasoning:
        'The last message was a filler model message with no content (nothing for user to act on), model should speak next.',