From 4f91037af49556763fe3e5bd15a71016a5b69e88 Mon Sep 17 00:00:00 2001
From: Sandy Tao <sandytao520@icloud.com>
Date: Fri, 12 Sep 2025 14:13:16 -0700
Subject: [PATCH] fix(core): Include the latest user request in countTokens for
 compression (#8375)

---
 packages/core/src/core/client.test.ts | 83 +++++++++++++++++++++------
 packages/core/src/core/client.ts      |  8 ++-
 2 files changed, 73 insertions(+), 18 deletions(-)

diff --git a/packages/core/src/core/client.test.ts b/packages/core/src/core/client.test.ts
index 3dd4b7b053..5d030f3ad0 100644
--- a/packages/core/src/core/client.test.ts
+++ b/packages/core/src/core/client.test.ts
@@ -15,6 +15,7 @@ import {
 } from 'vitest';
 
 import type { Content, GenerateContentResponse, Part } from '@google/genai';
+import { createUserContent } from '@google/genai';
 import {
   findIndexAfterFraction,
   isThinkingDefault,
@@ -578,8 +579,12 @@ describe('Gemini Client (client.ts)', () => {
         vi.mocked(mockContentGenerator.countTokens).mockResolvedValue({
           totalTokens: 1000,
         });
-        await client.tryCompressChat('prompt-id-4'); // Fails
-        const result = await client.tryCompressChat('prompt-id-4', true);
+        await client.tryCompressChat('prompt-id-4', false, [
+          { text: 'request' },
+        ]); // Fails
+        const result = await client.tryCompressChat('prompt-id-4', true, [
+          { text: 'request' },
+        ]);
 
         expect(result).toEqual({
           compressionStatus: CompressionStatus.COMPRESSED,
@@ -593,7 +598,9 @@ describe('Gemini Client (client.ts)', () => {
         vi.mocked(mockContentGenerator.countTokens).mockResolvedValue({
           totalTokens: 1000,
         });
-        const result = await client.tryCompressChat('prompt-id-4', true);
+        const result = await client.tryCompressChat('prompt-id-4', false, [
+          { text: 'request' },
+        ]);
 
         expect(result).toEqual({
           compressionStatus:
@@ -605,7 +612,9 @@ describe('Gemini Client (client.ts)', () => {
 
       it('does not manipulate the source chat', async () => {
         const { client, mockChat } = setup();
-        await client.tryCompressChat('prompt-id-4', true);
+        await client.tryCompressChat('prompt-id-4', false, [
+          { text: 'request' },
+        ]);
 
         expect(client['chat']).toBe(mockChat); // a new chat session was not created
       });
@@ -625,8 +634,11 @@ describe('Gemini Client (client.ts)', () => {
         const { client } = setup({
           chatHistory: originalHistory,
         });
-        const { compressionStatus } =
-          await client.tryCompressChat('prompt-id-4');
+        const { compressionStatus } = await client.tryCompressChat(
+          'prompt-id-4',
+          false,
+          [{ text: 'what is your wisdom?' }],
+        );
 
         expect(compressionStatus).toBe(
           CompressionStatus.COMPRESSION_FAILED_INFLATED_TOKEN_COUNT,
@@ -638,9 +650,13 @@ describe('Gemini Client (client.ts)', () => {
 
       it('will not attempt to compress context after a failure', async () => {
         const { client } = setup();
-        await client.tryCompressChat('prompt-id-4');
+        await client.tryCompressChat('prompt-id-4', false, [
+          { text: 'request' },
+        ]);
 
-        const result = await client.tryCompressChat('prompt-id-5');
+        const result = await client.tryCompressChat('prompt-id-5', false, [
+          { text: 'request' },
+        ]);
 
         // it counts tokens for {original, compressed} and then never again
         expect(mockContentGenerator.countTokens).toHaveBeenCalledTimes(2);
@@ -663,7 +679,9 @@ describe('Gemini Client (client.ts)', () => {
       });
 
       const initialChat = client.getChat();
-      const result = await client.tryCompressChat('prompt-id-2');
+      const result = await client.tryCompressChat('prompt-id-2', false, [
+        { text: '...history...' },
+      ]);
       const newChat = client.getChat();
 
       expect(tokenLimit).toHaveBeenCalled();
@@ -708,7 +726,9 @@ describe('Gemini Client (client.ts)', () => {
         ],
       } as unknown as GenerateContentResponse);
 
-      await client.tryCompressChat('prompt-id-3');
+      await client.tryCompressChat('prompt-id-3', false, [
+        { text: '...history...' },
+      ]);
 
       expect(
         ClearcutLogger.prototype.logChatCompressionEvent,
@@ -752,7 +772,9 @@ describe('Gemini Client (client.ts)', () => {
       } as unknown as GenerateContentResponse);
 
       const initialChat = client.getChat();
-      const result = await client.tryCompressChat('prompt-id-3');
+      const result = await client.tryCompressChat('prompt-id-3', false, [
+        { text: '...history...' },
+      ]);
       const newChat = client.getChat();
 
       expect(tokenLimit).toHaveBeenCalled();
@@ -811,7 +833,9 @@ describe('Gemini Client (client.ts)', () => {
       } as unknown as GenerateContentResponse);
 
       const initialChat = client.getChat();
-      const result = await client.tryCompressChat('prompt-id-3');
+      const result = await client.tryCompressChat('prompt-id-3', false, [
+        { text: '...history...' },
+      ]);
       const newChat = client.getChat();
 
       expect(tokenLimit).toHaveBeenCalled();
@@ -831,7 +855,7 @@ describe('Gemini Client (client.ts)', () => {
       // 3. compressed summary message
       // 4. standard canned user summary message
       // 5. The last user message (not the last 3 because that would start with a function response)
-      expect(newChat.getHistory().length).toEqual(5);
+      expect(newChat.getHistory().length).toEqual(6);
     });
 
     it('should always trigger summarization when force is true, regardless of token count', async () => {
@@ -859,7 +883,9 @@ describe('Gemini Client (client.ts)', () => {
       } as unknown as GenerateContentResponse);
 
       const initialChat = client.getChat();
-      const result = await client.tryCompressChat('prompt-id-1', true); // force = true
+      const result = await client.tryCompressChat('prompt-id-1', false, [
+        { text: '...history...' },
+      ]); // force = true
       const newChat = client.getChat();
 
       expect(mockGenerateContentFn).toHaveBeenCalled();
@@ -896,7 +922,7 @@ describe('Gemini Client (client.ts)', () => {
       ];
 
       const mockChat = {
-        getHistory: vi.fn().mockReturnValue(mockChatHistory),
+        getHistory: vi.fn().mockImplementation(() => [...mockChatHistory]),
         setHistory: vi.fn(),
         sendMessage: mockSendMessage,
       } as unknown as GeminiChat;
@@ -904,12 +930,17 @@ describe('Gemini Client (client.ts)', () => {
       client['chat'] = mockChat;
       client['startChat'] = vi.fn().mockResolvedValue(mockChat);
 
-      const result = await client.tryCompressChat('prompt-id-4', true);
+      const request = [{ text: 'Long conversation' }];
+      const result = await client.tryCompressChat(
+        'prompt-id-4',
+        false,
+        request,
+      );
 
       expect(mockContentGenerator.countTokens).toHaveBeenCalledTimes(2);
       expect(mockContentGenerator.countTokens).toHaveBeenNthCalledWith(1, {
         model: firstCurrentModel,
-        contents: mockChatHistory,
+        contents: [...mockChatHistory, createUserContent(request)],
       });
       expect(mockContentGenerator.countTokens).toHaveBeenNthCalledWith(2, {
         model: secondCurrentModel,
@@ -1031,6 +1062,12 @@ describe('Gemini Client (client.ts)', () => {
 
       vi.mocked(mockConfig.getIdeMode).mockReturnValue(true);
 
+      vi.spyOn(client, 'tryCompressChat').mockResolvedValue({
+        originalTokenCount: 0,
+        newTokenCount: 0,
+        compressionStatus: CompressionStatus.COMPRESSED,
+      });
+
       mockTurnRunFn.mockReturnValue(
         (async function* () {
           yield { type: 'content', value: 'Hello' };
@@ -1148,6 +1185,12 @@ ${JSON.stringify(
 
       vi.spyOn(client['config'], 'getIdeMode').mockReturnValue(true);
 
+      vi.spyOn(client, 'tryCompressChat').mockResolvedValue({
+        originalTokenCount: 0,
+        newTokenCount: 0,
+        compressionStatus: CompressionStatus.COMPRESSED,
+      });
+
       const mockStream = (async function* () {
         yield { type: 'content', value: 'Hello' };
       })();
@@ -1218,6 +1261,12 @@ ${JSON.stringify(
 
       vi.spyOn(client['config'], 'getIdeMode').mockReturnValue(true);
 
+      vi.spyOn(client, 'tryCompressChat').mockResolvedValue({
+        originalTokenCount: 0,
+        newTokenCount: 0,
+        compressionStatus: CompressionStatus.COMPRESSED,
+      });
+
       const mockStream = (async function* () {
         yield { type: 'content', value: 'Hello' };
       })();
diff --git a/packages/core/src/core/client.ts b/packages/core/src/core/client.ts
index 86405ba5a8..810f87de25 100644
--- a/packages/core/src/core/client.ts
+++ b/packages/core/src/core/client.ts
@@ -12,6 +12,7 @@ import type {
   Tool,
   GenerateContentResponse,
 } from '@google/genai';
+import { createUserContent } from '@google/genai';
 import {
   getDirectoryContextString,
   getEnvironmentContext,
@@ -453,7 +454,7 @@ export class GeminiClient {
       return new Turn(this.getChat(), prompt_id);
     }
 
-    const compressed = await this.tryCompressChat(prompt_id);
+    const compressed = await this.tryCompressChat(prompt_id, false, request);
 
     if (compressed.compressionStatus === CompressionStatus.COMPRESSED) {
       yield { type: GeminiEventType.ChatCompressed, value: compressed };
@@ -790,6 +791,7 @@ export class GeminiClient {
   async tryCompressChat(
     prompt_id: string,
     force: boolean = false,
+    request?: PartListUnion,
   ): Promise<ChatCompressionInfo> {
     // If the model is 'auto', we will use a placeholder model to check.
     // Compression occurs before we choose a model, so calling `count_tokens`
@@ -805,6 +807,10 @@ export class GeminiClient {
 
     const curatedHistory = this.getChat().getHistory(true);
 
+    if (request) {
+      curatedHistory.push(createUserContent(request));
+    }
+
     // Regardless of `force`, don't do anything if the history is empty.
     if (
       curatedHistory.length === 0 ||