perf(core): fix OOM crash in long-running sessions (#19608)

Co-authored-by: Jacob Richman <jacob314@gmail.com>
This commit is contained in:
Darren Sander
2026-03-05 13:23:07 +09:00
committed by GitHub
parent 9dc6898d28
commit 3d4956aa57
4 changed files with 35 additions and 13 deletions

View File

@@ -691,9 +691,13 @@ export class GeminiChat {
const history = curated
? extractCuratedHistory(this.history)
: this.history;
// Deep copy the history to avoid mutating the history outside of the
// chat session.
return structuredClone(history);
// Return a shallow copy of the array to prevent callers from mutating
// the internal history array (push/pop/splice). Content objects are
// shared references — callers MUST NOT mutate them in place.
// This replaces a prior structuredClone() which deep-copied the entire
// conversation on every call, causing O(n) memory pressure per turn
// that compounded into OOM crashes in long-running sessions.
return [...history];
}
/**

View File

@@ -241,6 +241,7 @@ export class Turn {
readonly pendingToolCalls: ToolCallRequestInfo[] = [];
private debugResponses: GenerateContentResponse[] = [];
private pendingCitations = new Set<string>();
private cachedResponseText: string | undefined = undefined;
finishReason: FinishReason | undefined = undefined;
constructor(
@@ -432,11 +433,15 @@ export class Turn {
/**
* Get the concatenated response text from all responses in this turn.
* This extracts and joins all text content from the model's responses.
* The result is cached since this is called multiple times per turn.
*/
getResponseText(): string {
return this.debugResponses
.map((response) => getResponseText(response))
.filter((text): text is string => text !== null)
.join(' ');
if (this.cachedResponseText === undefined) {
this.cachedResponseText = this.debugResponses
.map((response) => getResponseText(response))
.filter((text): text is string => text !== null)
.join(' ');
}
return this.cachedResponseText;
}
}

View File

@@ -232,6 +232,7 @@ type EventBacklogItem = {
export class CoreEventEmitter extends EventEmitter<CoreEvents> {
private _eventBacklog: EventBacklogItem[] = [];
private _backlogHead = 0;
private static readonly MAX_BACKLOG_SIZE = 10000;
constructor() {
@@ -243,8 +244,17 @@ export class CoreEventEmitter extends EventEmitter<CoreEvents> {
...args: CoreEvents[K]
): void {
if (this.listenerCount(event) === 0) {
if (this._eventBacklog.length >= CoreEventEmitter.MAX_BACKLOG_SIZE) {
this._eventBacklog.shift();
const backlogSize = this._eventBacklog.length - this._backlogHead;
if (backlogSize >= CoreEventEmitter.MAX_BACKLOG_SIZE) {
// Evict oldest entry. Use a head pointer instead of shift() to avoid
// O(n) array reindexing on every eviction at capacity.
(this._eventBacklog as unknown[])[this._backlogHead] = undefined;
this._backlogHead++;
// Compact once dead entries exceed half capacity to bound memory
if (this._backlogHead >= CoreEventEmitter.MAX_BACKLOG_SIZE / 2) {
this._eventBacklog = this._eventBacklog.slice(this._backlogHead);
this._backlogHead = 0;
}
}
// eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion
this._eventBacklog.push({ event, args } as EventBacklogItem);
@@ -391,9 +401,13 @@ export class CoreEventEmitter extends EventEmitter<CoreEvents> {
* subscribes.
*/
drainBacklogs(): void {
const backlog = [...this._eventBacklog];
this._eventBacklog.length = 0; // Clear in-place
for (const item of backlog) {
const backlog = this._eventBacklog;
const head = this._backlogHead;
this._eventBacklog = [];
this._backlogHead = 0;
for (let i = head; i < backlog.length; i++) {
const item = backlog[i];
if (item === undefined) continue;
// eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion
(this.emit as (event: keyof CoreEvents, ...args: unknown[]) => boolean)(
item.event,

View File

@@ -87,7 +87,6 @@ export async function checkNextSpeaker(
lastComprehensiveMessage.parts &&
lastComprehensiveMessage.parts.length === 0
) {
lastComprehensiveMessage.parts.push({ text: '' });
return {
reasoning:
'The last message was a filler model message with no content (nothing for user to act on), model should speak next.',