From 0559040c0f08a1aec336128d68d032022001314c Mon Sep 17 00:00:00 2001
From: Tommaso Sciortino <sciortino@gmail.com>
Date: Wed, 17 Sep 2025 13:12:06 -0700
Subject: [PATCH] Fix automatic compression bug (#8599)

---
 packages/cli/src/ui/hooks/useGeminiStream.ts | 18 +++++--
 packages/core/src/core/client.test.ts        | 57 +++++---------------
 packages/core/src/core/client.ts             |  8 +--
 3 files changed, 28 insertions(+), 55 deletions(-)

diff --git a/packages/cli/src/ui/hooks/useGeminiStream.ts b/packages/cli/src/ui/hooks/useGeminiStream.ts
index 4ce58c6731..3566695419 100644
--- a/packages/cli/src/ui/hooks/useGeminiStream.ts
+++ b/packages/cli/src/ui/hooks/useGeminiStream.ts
@@ -591,8 +591,15 @@ export const useGeminiStream = (
   );
 
   const handleChatCompressionEvent = useCallback(
-    (eventValue: ServerGeminiChatCompressedEvent['value']) =>
-      addItem(
+    (
+      eventValue: ServerGeminiChatCompressedEvent['value'],
+      userMessageTimestamp: number,
+    ) => {
+      if (pendingHistoryItemRef.current) {
+        addItem(pendingHistoryItemRef.current, userMessageTimestamp);
+        setPendingHistoryItem(null);
+      }
+      return addItem(
         {
           type: 'info',
           text:
@@ -602,8 +609,9 @@ export const useGeminiStream = (
             `${eventValue?.newTokenCount ?? 'unknown'} tokens).`,
         },
         Date.now(),
-      ),
-    [addItem, config],
+      );
+    },
+    [addItem, config, pendingHistoryItemRef, setPendingHistoryItem],
   );
 
   const handleMaxSessionTurnsEvent = useCallback(
@@ -683,7 +691,7 @@ export const useGeminiStream = (
             handleErrorEvent(event.value, userMessageTimestamp);
             break;
           case ServerGeminiEventType.ChatCompressed:
-            handleChatCompressionEvent(event.value);
+            handleChatCompressionEvent(event.value, userMessageTimestamp);
             break;
           case ServerGeminiEventType.ToolCallConfirmation:
           case ServerGeminiEventType.ToolCallResponse:
diff --git a/packages/core/src/core/client.test.ts b/packages/core/src/core/client.test.ts
index 3a5cced429..19aa737053 100644
--- a/packages/core/src/core/client.test.ts
+++ b/packages/core/src/core/client.test.ts
@@ -15,7 +15,6 @@ import {
 } from 'vitest';
 
 import type { Content, GenerateContentResponse, Part } from '@google/genai';
-import { createUserContent } from '@google/genai';
 import {
   findIndexAfterFraction,
   isThinkingDefault,
@@ -399,12 +398,8 @@ describe('Gemini Client (client.ts)', () => {
         vi.mocked(mockContentGenerator.countTokens).mockResolvedValue({
           totalTokens: 1000,
         });
-        await client.tryCompressChat('prompt-id-4', false, [
-          { text: 'request' },
-        ]); // Fails
-        const result = await client.tryCompressChat('prompt-id-4', true, [
-          { text: 'request' },
-        ]);
+        await client.tryCompressChat('prompt-id-4', false); // Fails
+        const result = await client.tryCompressChat('prompt-id-4', true);
 
         expect(result).toEqual({
           compressionStatus: CompressionStatus.COMPRESSED,
@@ -418,9 +413,7 @@ describe('Gemini Client (client.ts)', () => {
         vi.mocked(mockContentGenerator.countTokens).mockResolvedValue({
           totalTokens: 1000,
         });
-        const result = await client.tryCompressChat('prompt-id-4', false, [
-          { text: 'request' },
-        ]);
+        const result = await client.tryCompressChat('prompt-id-4', false);
 
         expect(result).toEqual({
           compressionStatus:
@@ -432,9 +425,7 @@ describe('Gemini Client (client.ts)', () => {
 
       it('does not manipulate the source chat', async () => {
         const { client, mockChat } = setup();
-        await client.tryCompressChat('prompt-id-4', false, [
-          { text: 'request' },
-        ]);
+        await client.tryCompressChat('prompt-id-4', false);
 
         expect(client['chat']).toBe(mockChat); // a new chat session was not created
       });
@@ -457,7 +448,6 @@ describe('Gemini Client (client.ts)', () => {
         const { compressionStatus } = await client.tryCompressChat(
           'prompt-id-4',
           false,
-          [{ text: 'what is your wisdom?' }],
         );
 
         expect(compressionStatus).toBe(
@@ -470,13 +460,9 @@ describe('Gemini Client (client.ts)', () => {
 
       it('will not attempt to compress context after a failure', async () => {
         const { client } = setup();
-        await client.tryCompressChat('prompt-id-4', false, [
-          { text: 'request' },
-        ]);
+        await client.tryCompressChat('prompt-id-4', false);
 
-        const result = await client.tryCompressChat('prompt-id-5', false, [
-          { text: 'request' },
-        ]);
+        const result = await client.tryCompressChat('prompt-id-5', false);
 
         // it counts tokens for {original, compressed} and then never again
         expect(mockContentGenerator.countTokens).toHaveBeenCalledTimes(2);
@@ -499,9 +485,7 @@ describe('Gemini Client (client.ts)', () => {
       });
 
       const initialChat = client.getChat();
-      const result = await client.tryCompressChat('prompt-id-2', false, [
-        { text: '...history...' },
-      ]);
+      const result = await client.tryCompressChat('prompt-id-2', false);
       const newChat = client.getChat();
 
       expect(tokenLimit).toHaveBeenCalled();
@@ -546,9 +530,7 @@ describe('Gemini Client (client.ts)', () => {
         ],
       } as unknown as GenerateContentResponse);
 
-      await client.tryCompressChat('prompt-id-3', false, [
-        { text: '...history...' },
-      ]);
+      await client.tryCompressChat('prompt-id-3', false);
 
       expect(
         ClearcutLogger.prototype.logChatCompressionEvent,
@@ -592,9 +574,7 @@ describe('Gemini Client (client.ts)', () => {
       } as unknown as GenerateContentResponse);
 
       const initialChat = client.getChat();
-      const result = await client.tryCompressChat('prompt-id-3', false, [
-        { text: '...history...' },
-      ]);
+      const result = await client.tryCompressChat('prompt-id-3', false);
       const newChat = client.getChat();
 
       expect(tokenLimit).toHaveBeenCalled();
@@ -653,9 +633,7 @@ describe('Gemini Client (client.ts)', () => {
       } as unknown as GenerateContentResponse);
 
       const initialChat = client.getChat();
-      const result = await client.tryCompressChat('prompt-id-3', false, [
-        { text: '...history...' },
-      ]);
+      const result = await client.tryCompressChat('prompt-id-3', false);
       const newChat = client.getChat();
 
       expect(tokenLimit).toHaveBeenCalled();
@@ -675,7 +653,7 @@ describe('Gemini Client (client.ts)', () => {
       // 3. compressed summary message
       // 4. standard canned user summary message
       // 5. The last user message (not the last 3 because that would start with a function response)
-      expect(newChat.getHistory().length).toEqual(6);
+      expect(newChat.getHistory().length).toEqual(5);
     });
 
     it('should always trigger summarization when force is true, regardless of token count', async () => {
@@ -703,9 +681,7 @@ describe('Gemini Client (client.ts)', () => {
       } as unknown as GenerateContentResponse);
 
       const initialChat = client.getChat();
-      const result = await client.tryCompressChat('prompt-id-1', false, [
-        { text: '...history...' },
-      ]); // force = true
+      const result = await client.tryCompressChat('prompt-id-1', false); // force = true
       const newChat = client.getChat();
 
       expect(mockGenerateContentFn).toHaveBeenCalled();
@@ -750,17 +726,12 @@ describe('Gemini Client (client.ts)', () => {
       client['chat'] = mockChat;
       client['startChat'] = vi.fn().mockResolvedValue(mockChat);
 
-      const request = [{ text: 'Long conversation' }];
-      const result = await client.tryCompressChat(
-        'prompt-id-4',
-        false,
-        request,
-      );
+      const result = await client.tryCompressChat('prompt-id-4', false);
 
       expect(mockContentGenerator.countTokens).toHaveBeenCalledTimes(2);
       expect(mockContentGenerator.countTokens).toHaveBeenNthCalledWith(1, {
         model: firstCurrentModel,
-        contents: [...mockChatHistory, createUserContent(request)],
+        contents: [...mockChatHistory],
       });
       expect(mockContentGenerator.countTokens).toHaveBeenNthCalledWith(2, {
         model: secondCurrentModel,
diff --git a/packages/core/src/core/client.ts b/packages/core/src/core/client.ts
index cc20c55f88..1d98bc2628 100644
--- a/packages/core/src/core/client.ts
+++ b/packages/core/src/core/client.ts
@@ -11,7 +11,6 @@ import type {
   Tool,
   GenerateContentResponse,
 } from '@google/genai';
-import { createUserContent } from '@google/genai';
 import {
   getDirectoryContextString,
   getEnvironmentContext,
@@ -451,7 +450,7 @@ export class GeminiClient {
       return new Turn(this.getChat(), prompt_id);
     }
 
-    const compressed = await this.tryCompressChat(prompt_id, false, request);
+    const compressed = await this.tryCompressChat(prompt_id, false);
 
     if (compressed.compressionStatus === CompressionStatus.COMPRESSED) {
       yield { type: GeminiEventType.ChatCompressed, value: compressed };
@@ -638,7 +637,6 @@ export class GeminiClient {
   async tryCompressChat(
     prompt_id: string,
     force: boolean = false,
-    request?: PartListUnion,
   ): Promise<ChatCompressionInfo> {
     // If the model is 'auto', we will use a placeholder model to check.
     // Compression occurs before we choose a model, so calling `count_tokens`
@@ -654,10 +652,6 @@ export class GeminiClient {
 
     const curatedHistory = this.getChat().getHistory(true);
 
-    if (request) {
-      curatedHistory.push(createUserContent(request));
-    }
-
     // Regardless of `force`, don't do anything if the history is empty.
     if (
       curatedHistory.length === 0 ||