From 0559040c0f08a1aec336128d68d032022001314c Mon Sep 17 00:00:00 2001 From: Tommaso Sciortino Date: Wed, 17 Sep 2025 13:12:06 -0700 Subject: [PATCH] Fix automatic compression bug (#8599) --- packages/cli/src/ui/hooks/useGeminiStream.ts | 18 +++++-- packages/core/src/core/client.test.ts | 57 +++++--------------- packages/core/src/core/client.ts | 8 +-- 3 files changed, 28 insertions(+), 55 deletions(-) diff --git a/packages/cli/src/ui/hooks/useGeminiStream.ts b/packages/cli/src/ui/hooks/useGeminiStream.ts index 4ce58c6731..3566695419 100644 --- a/packages/cli/src/ui/hooks/useGeminiStream.ts +++ b/packages/cli/src/ui/hooks/useGeminiStream.ts @@ -591,8 +591,15 @@ export const useGeminiStream = ( ); const handleChatCompressionEvent = useCallback( - (eventValue: ServerGeminiChatCompressedEvent['value']) => - addItem( + ( + eventValue: ServerGeminiChatCompressedEvent['value'], + userMessageTimestamp: number, + ) => { + if (pendingHistoryItemRef.current) { + addItem(pendingHistoryItemRef.current, userMessageTimestamp); + setPendingHistoryItem(null); + } + return addItem( { type: 'info', text: @@ -602,8 +609,9 @@ export const useGeminiStream = ( `${eventValue?.newTokenCount ?? 'unknown'} tokens).`, }, Date.now(), - ), - [addItem, config], + ); + }, + [addItem, config, pendingHistoryItemRef, setPendingHistoryItem], ); const handleMaxSessionTurnsEvent = useCallback( @@ -683,7 +691,7 @@ export const useGeminiStream = ( handleErrorEvent(event.value, userMessageTimestamp); break; case ServerGeminiEventType.ChatCompressed: - handleChatCompressionEvent(event.value); + handleChatCompressionEvent(event.value, userMessageTimestamp); break; case ServerGeminiEventType.ToolCallConfirmation: case ServerGeminiEventType.ToolCallResponse: diff --git a/packages/core/src/core/client.test.ts b/packages/core/src/core/client.test.ts index 3a5cced429..19aa737053 100644 --- a/packages/core/src/core/client.test.ts +++ b/packages/core/src/core/client.test.ts @@ -15,7 +15,6 @@ import { } from 'vitest'; import type { Content, GenerateContentResponse, Part } from '@google/genai'; -import { createUserContent } from '@google/genai'; import { findIndexAfterFraction, isThinkingDefault, @@ -399,12 +398,8 @@ describe('Gemini Client (client.ts)', () => { vi.mocked(mockContentGenerator.countTokens).mockResolvedValue({ totalTokens: 1000, }); - await client.tryCompressChat('prompt-id-4', false, [ - { text: 'request' }, - ]); // Fails - const result = await client.tryCompressChat('prompt-id-4', true, [ - { text: 'request' }, - ]); + await client.tryCompressChat('prompt-id-4', false); // Fails + const result = await client.tryCompressChat('prompt-id-4', true); expect(result).toEqual({ compressionStatus: CompressionStatus.COMPRESSED, @@ -418,9 +413,7 @@ describe('Gemini Client (client.ts)', () => { vi.mocked(mockContentGenerator.countTokens).mockResolvedValue({ totalTokens: 1000, }); - const result = await client.tryCompressChat('prompt-id-4', false, [ - { text: 'request' }, - ]); + const result = await client.tryCompressChat('prompt-id-4', false); expect(result).toEqual({ compressionStatus: @@ -432,9 +425,7 @@ describe('Gemini Client (client.ts)', () => { it('does not manipulate the source chat', async () => { const { client, mockChat } = setup(); - await client.tryCompressChat('prompt-id-4', false, [ - { text: 'request' }, - ]); + await client.tryCompressChat('prompt-id-4', false); expect(client['chat']).toBe(mockChat); // a new chat session was not created }); @@ -457,7 +448,6 @@ describe('Gemini Client (client.ts)', () => { const { compressionStatus } = await client.tryCompressChat( 'prompt-id-4', false, - [{ text: 'what is your wisdom?' }], ); expect(compressionStatus).toBe( @@ -470,13 +460,9 @@ describe('Gemini Client (client.ts)', () => { it('will not attempt to compress context after a failure', async () => { const { client } = setup(); - await client.tryCompressChat('prompt-id-4', false, [ - { text: 'request' }, - ]); + await client.tryCompressChat('prompt-id-4', false); - const result = await client.tryCompressChat('prompt-id-5', false, [ - { text: 'request' }, - ]); + const result = await client.tryCompressChat('prompt-id-5', false); // it counts tokens for {original, compressed} and then never again expect(mockContentGenerator.countTokens).toHaveBeenCalledTimes(2); @@ -499,9 +485,7 @@ describe('Gemini Client (client.ts)', () => { }); const initialChat = client.getChat(); - const result = await client.tryCompressChat('prompt-id-2', false, [ - { text: '...history...' }, - ]); + const result = await client.tryCompressChat('prompt-id-2', false); const newChat = client.getChat(); expect(tokenLimit).toHaveBeenCalled(); @@ -546,9 +530,7 @@ describe('Gemini Client (client.ts)', () => { ], } as unknown as GenerateContentResponse); - await client.tryCompressChat('prompt-id-3', false, [ - { text: '...history...' }, - ]); + await client.tryCompressChat('prompt-id-3', false); expect( ClearcutLogger.prototype.logChatCompressionEvent, @@ -592,9 +574,7 @@ describe('Gemini Client (client.ts)', () => { } as unknown as GenerateContentResponse); const initialChat = client.getChat(); - const result = await client.tryCompressChat('prompt-id-3', false, [ - { text: '...history...' }, - ]); + const result = await client.tryCompressChat('prompt-id-3', false); const newChat = client.getChat(); expect(tokenLimit).toHaveBeenCalled(); @@ -653,9 +633,7 @@ describe('Gemini Client (client.ts)', () => { } as unknown as GenerateContentResponse); const initialChat = client.getChat(); - const result = await client.tryCompressChat('prompt-id-3', false, [ - { text: '...history...' }, - ]); + const result = await client.tryCompressChat('prompt-id-3', false); const newChat = client.getChat(); expect(tokenLimit).toHaveBeenCalled(); @@ -675,7 +653,7 @@ describe('Gemini Client (client.ts)', () => { // 3. compressed summary message // 4. standard canned user summary message // 5. The last user message (not the last 3 because that would start with a function response) - expect(newChat.getHistory().length).toEqual(6); + expect(newChat.getHistory().length).toEqual(5); }); it('should always trigger summarization when force is true, regardless of token count', async () => { @@ -703,9 +681,7 @@ describe('Gemini Client (client.ts)', () => { } as unknown as GenerateContentResponse); const initialChat = client.getChat(); - const result = await client.tryCompressChat('prompt-id-1', false, [ - { text: '...history...' }, - ]); // force = true + const result = await client.tryCompressChat('prompt-id-1', false); // force = true const newChat = client.getChat(); expect(mockGenerateContentFn).toHaveBeenCalled(); @@ -750,17 +726,12 @@ describe('Gemini Client (client.ts)', () => { client['chat'] = mockChat; client['startChat'] = vi.fn().mockResolvedValue(mockChat); - const request = [{ text: 'Long conversation' }]; - const result = await client.tryCompressChat( - 'prompt-id-4', - false, - request, - ); + const result = await client.tryCompressChat('prompt-id-4', false); expect(mockContentGenerator.countTokens).toHaveBeenCalledTimes(2); expect(mockContentGenerator.countTokens).toHaveBeenNthCalledWith(1, { model: firstCurrentModel, - contents: [...mockChatHistory, createUserContent(request)], + contents: [...mockChatHistory], }); expect(mockContentGenerator.countTokens).toHaveBeenNthCalledWith(2, { model: secondCurrentModel, diff --git a/packages/core/src/core/client.ts b/packages/core/src/core/client.ts index cc20c55f88..1d98bc2628 100644 --- a/packages/core/src/core/client.ts +++ b/packages/core/src/core/client.ts @@ -11,7 +11,6 @@ import type { Tool, GenerateContentResponse, } from '@google/genai'; -import { createUserContent } from '@google/genai'; import { getDirectoryContextString, getEnvironmentContext, @@ -451,7 +450,7 @@ export class GeminiClient { return new Turn(this.getChat(), prompt_id); } - const compressed = await this.tryCompressChat(prompt_id, false, request); + const compressed = await this.tryCompressChat(prompt_id, false); if (compressed.compressionStatus === CompressionStatus.COMPRESSED) { yield { type: GeminiEventType.ChatCompressed, value: compressed }; @@ -638,7 +637,6 @@ export class GeminiClient { async tryCompressChat( prompt_id: string, force: boolean = false, - request?: PartListUnion, ): Promise { // If the model is 'auto', we will use a placeholder model to check. // Compression occurs before we choose a model, so calling `count_tokens` @@ -654,10 +652,6 @@ export class GeminiClient { const curatedHistory = this.getChat().getHistory(true); - if (request) { - curatedHistory.push(createUserContent(request)); - } - // Regardless of `force`, don't do anything if the history is empty. if ( curatedHistory.length === 0 ||