mirror of
https://github.com/google-gemini/gemini-cli.git
synced 2026-03-10 22:21:22 -07:00
Fix message too large issue. (#19499)
This commit is contained in:
committed by
GitHub
parent
a00eb3b8e6
commit
c276d0c7b6
@@ -689,6 +689,13 @@ export class LocalAgentExecutor<TOutput extends z.ZodTypeAny> {
|
||||
chat.setHistory(newHistory);
|
||||
this.hasFailedCompressionAttempt = false;
|
||||
}
|
||||
} else if (info.compressionStatus === CompressionStatus.CONTENT_TRUNCATED) {
|
||||
if (newHistory) {
|
||||
chat.setHistory(newHistory);
|
||||
// Do NOT reset hasFailedCompressionAttempt.
|
||||
// We only truncated content because summarization previously failed.
|
||||
// We want to keep avoiding expensive summarization calls.
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -1082,6 +1082,15 @@ export class GeminiClient {
|
||||
this.updateTelemetryTokenCount();
|
||||
this.forceFullIdeContext = true;
|
||||
}
|
||||
} else if (info.compressionStatus === CompressionStatus.CONTENT_TRUNCATED) {
|
||||
if (newHistory) {
|
||||
// We truncated content to save space, but summarization is still "failed".
|
||||
// We update the chat context directly without resetting the failure flag.
|
||||
this.getChat().setHistory(newHistory);
|
||||
this.updateTelemetryTokenCount();
|
||||
// We don't reset the chat session fully like in COMPRESSED because
|
||||
// this is a lighter-weight intervention.
|
||||
}
|
||||
}
|
||||
|
||||
return info;
|
||||
|
||||
@@ -180,6 +180,9 @@ export enum CompressionStatus {
|
||||
|
||||
/** The compression was not necessary and no action was taken */
|
||||
NOOP,
|
||||
|
||||
/** The compression was skipped due to previous failure, but content was truncated to budget */
|
||||
CONTENT_TRUNCATED,
|
||||
}
|
||||
|
||||
export interface ChatCompressionInfo {
|
||||
|
||||
@@ -226,8 +226,10 @@ describe('ChatCompressionService', () => {
|
||||
false,
|
||||
mockModel,
|
||||
mockConfig,
|
||||
true,
|
||||
false,
|
||||
);
|
||||
// It should now attempt compression even if previously failed (logic removed)
|
||||
// But since history is small, it will be NOOP due to threshold
|
||||
expect(result.info.compressionStatus).toBe(CompressionStatus.NOOP);
|
||||
expect(result.newHistory).toBeNull();
|
||||
});
|
||||
|
||||
@@ -240,10 +240,7 @@ export class ChatCompressionService {
|
||||
const curatedHistory = chat.getHistory(true);
|
||||
|
||||
// Regardless of `force`, don't do anything if the history is empty.
|
||||
if (
|
||||
curatedHistory.length === 0 ||
|
||||
(hasFailedCompressionAttempt && !force)
|
||||
) {
|
||||
if (curatedHistory.length === 0) {
|
||||
return {
|
||||
newHistory: null,
|
||||
info: {
|
||||
@@ -285,6 +282,35 @@ export class ChatCompressionService {
|
||||
config,
|
||||
);
|
||||
|
||||
// If summarization previously failed (and not forced), we only rely on truncation.
|
||||
// We do NOT attempt to invoke the LLM for summarization again to avoid repeated failures/costs.
|
||||
if (hasFailedCompressionAttempt && !force) {
|
||||
const truncatedTokenCount = estimateTokenCountSync(
|
||||
truncatedHistory.flatMap((c) => c.parts || []),
|
||||
);
|
||||
|
||||
// If truncation reduced the size, we consider it a successful "compression" (truncation only).
|
||||
if (truncatedTokenCount < originalTokenCount) {
|
||||
return {
|
||||
newHistory: truncatedHistory,
|
||||
info: {
|
||||
originalTokenCount,
|
||||
newTokenCount: truncatedTokenCount,
|
||||
compressionStatus: CompressionStatus.CONTENT_TRUNCATED,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
return {
|
||||
newHistory: null,
|
||||
info: {
|
||||
originalTokenCount,
|
||||
newTokenCount: originalTokenCount,
|
||||
compressionStatus: CompressionStatus.NOOP,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
const splitPoint = findCompressSplitPoint(
|
||||
truncatedHistory,
|
||||
1 - COMPRESSION_PRESERVE_THRESHOLD,
|
||||
|
||||
Reference in New Issue
Block a user