mirror of
https://github.com/google-gemini/gemini-cli.git
synced 2026-05-12 12:54:07 -07:00
perf(core): fix OOM crash in long-running sessions (#19608)
Co-authored-by: Jacob Richman <jacob314@gmail.com>
This commit is contained in:
@@ -691,9 +691,13 @@ export class GeminiChat {
|
|||||||
const history = curated
|
const history = curated
|
||||||
? extractCuratedHistory(this.history)
|
? extractCuratedHistory(this.history)
|
||||||
: this.history;
|
: this.history;
|
||||||
// Deep copy the history to avoid mutating the history outside of the
|
// Return a shallow copy of the array to prevent callers from mutating
|
||||||
// chat session.
|
// the internal history array (push/pop/splice). Content objects are
|
||||||
return structuredClone(history);
|
// shared references — callers MUST NOT mutate them in place.
|
||||||
|
// This replaces a prior structuredClone() which deep-copied the entire
|
||||||
|
// conversation on every call, causing O(n) memory pressure per turn
|
||||||
|
// that compounded into OOM crashes in long-running sessions.
|
||||||
|
return [...history];
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|||||||
@@ -241,6 +241,7 @@ export class Turn {
|
|||||||
readonly pendingToolCalls: ToolCallRequestInfo[] = [];
|
readonly pendingToolCalls: ToolCallRequestInfo[] = [];
|
||||||
private debugResponses: GenerateContentResponse[] = [];
|
private debugResponses: GenerateContentResponse[] = [];
|
||||||
private pendingCitations = new Set<string>();
|
private pendingCitations = new Set<string>();
|
||||||
|
private cachedResponseText: string | undefined = undefined;
|
||||||
finishReason: FinishReason | undefined = undefined;
|
finishReason: FinishReason | undefined = undefined;
|
||||||
|
|
||||||
constructor(
|
constructor(
|
||||||
@@ -432,11 +433,15 @@ export class Turn {
|
|||||||
/**
|
/**
|
||||||
* Get the concatenated response text from all responses in this turn.
|
* Get the concatenated response text from all responses in this turn.
|
||||||
* This extracts and joins all text content from the model's responses.
|
* This extracts and joins all text content from the model's responses.
|
||||||
|
* The result is cached since this is called multiple times per turn.
|
||||||
*/
|
*/
|
||||||
getResponseText(): string {
|
getResponseText(): string {
|
||||||
return this.debugResponses
|
if (this.cachedResponseText === undefined) {
|
||||||
.map((response) => getResponseText(response))
|
this.cachedResponseText = this.debugResponses
|
||||||
.filter((text): text is string => text !== null)
|
.map((response) => getResponseText(response))
|
||||||
.join(' ');
|
.filter((text): text is string => text !== null)
|
||||||
|
.join(' ');
|
||||||
|
}
|
||||||
|
return this.cachedResponseText;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -232,6 +232,7 @@ type EventBacklogItem = {
|
|||||||
|
|
||||||
export class CoreEventEmitter extends EventEmitter<CoreEvents> {
|
export class CoreEventEmitter extends EventEmitter<CoreEvents> {
|
||||||
private _eventBacklog: EventBacklogItem[] = [];
|
private _eventBacklog: EventBacklogItem[] = [];
|
||||||
|
private _backlogHead = 0;
|
||||||
private static readonly MAX_BACKLOG_SIZE = 10000;
|
private static readonly MAX_BACKLOG_SIZE = 10000;
|
||||||
|
|
||||||
constructor() {
|
constructor() {
|
||||||
@@ -243,8 +244,17 @@ export class CoreEventEmitter extends EventEmitter<CoreEvents> {
|
|||||||
...args: CoreEvents[K]
|
...args: CoreEvents[K]
|
||||||
): void {
|
): void {
|
||||||
if (this.listenerCount(event) === 0) {
|
if (this.listenerCount(event) === 0) {
|
||||||
if (this._eventBacklog.length >= CoreEventEmitter.MAX_BACKLOG_SIZE) {
|
const backlogSize = this._eventBacklog.length - this._backlogHead;
|
||||||
this._eventBacklog.shift();
|
if (backlogSize >= CoreEventEmitter.MAX_BACKLOG_SIZE) {
|
||||||
|
// Evict oldest entry. Use a head pointer instead of shift() to avoid
|
||||||
|
// O(n) array reindexing on every eviction at capacity.
|
||||||
|
(this._eventBacklog as unknown[])[this._backlogHead] = undefined;
|
||||||
|
this._backlogHead++;
|
||||||
|
// Compact once dead entries exceed half capacity to bound memory
|
||||||
|
if (this._backlogHead >= CoreEventEmitter.MAX_BACKLOG_SIZE / 2) {
|
||||||
|
this._eventBacklog = this._eventBacklog.slice(this._backlogHead);
|
||||||
|
this._backlogHead = 0;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
// eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion
|
// eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion
|
||||||
this._eventBacklog.push({ event, args } as EventBacklogItem);
|
this._eventBacklog.push({ event, args } as EventBacklogItem);
|
||||||
@@ -391,9 +401,13 @@ export class CoreEventEmitter extends EventEmitter<CoreEvents> {
|
|||||||
* subscribes.
|
* subscribes.
|
||||||
*/
|
*/
|
||||||
drainBacklogs(): void {
|
drainBacklogs(): void {
|
||||||
const backlog = [...this._eventBacklog];
|
const backlog = this._eventBacklog;
|
||||||
this._eventBacklog.length = 0; // Clear in-place
|
const head = this._backlogHead;
|
||||||
for (const item of backlog) {
|
this._eventBacklog = [];
|
||||||
|
this._backlogHead = 0;
|
||||||
|
for (let i = head; i < backlog.length; i++) {
|
||||||
|
const item = backlog[i];
|
||||||
|
if (item === undefined) continue;
|
||||||
// eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion
|
// eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion
|
||||||
(this.emit as (event: keyof CoreEvents, ...args: unknown[]) => boolean)(
|
(this.emit as (event: keyof CoreEvents, ...args: unknown[]) => boolean)(
|
||||||
item.event,
|
item.event,
|
||||||
|
|||||||
@@ -87,7 +87,6 @@ export async function checkNextSpeaker(
|
|||||||
lastComprehensiveMessage.parts &&
|
lastComprehensiveMessage.parts &&
|
||||||
lastComprehensiveMessage.parts.length === 0
|
lastComprehensiveMessage.parts.length === 0
|
||||||
) {
|
) {
|
||||||
lastComprehensiveMessage.parts.push({ text: '' });
|
|
||||||
return {
|
return {
|
||||||
reasoning:
|
reasoning:
|
||||||
'The last message was a filler model message with no content (nothing for user to act on), model should speak next.',
|
'The last message was a filler model message with no content (nothing for user to act on), model should speak next.',
|
||||||
|
|||||||
Reference in New Issue
Block a user