fix(compression): prevent unnecessary summarization when history is too short (#11082)

2026-06-22 01:07:12 -07:00 · 2025-10-14 00:02:03 -04:00
parent 0f8199dde3
commit a3fe9279d8
3 changed files with 49 additions and 6 deletions
@@ -47,7 +47,7 @@ export function CompressionMessage({
      case CompressionStatus.COMPRESSION_FAILED_TOKEN_COUNT_ERROR:
        return 'Could not compress chat history due to a token counting error.';
      case CompressionStatus.NOOP:
-        return 'Chat history is already compressed.';
+        return 'Nothing to compress.';
      default:
        return '';
    }
@@ -590,16 +590,37 @@ describe('Gemini Client (client.ts)', () => {
      expect(newChat).toBe(initialChat);
    });

+    it('should return NOOP if history is too short to compress', async () => {
+      const { client } = setup({
+        chatHistory: [{ role: 'user', parts: [{ text: 'hi' }] }],
+        originalTokenCount: 50,
+      });
+
+      const result = await client.tryCompressChat('prompt-id-noop', false);
+
+      expect(result).toEqual({
+        compressionStatus: CompressionStatus.NOOP,
+        originalTokenCount: 50,
+        newTokenCount: 50,
+      });
+      expect(mockGenerateContentFn).not.toHaveBeenCalled();
+    });
+
    it('logs a telemetry event when compressing', async () => {
      vi.spyOn(ClearcutLogger.prototype, 'logChatCompressionEvent');
-
      const MOCKED_TOKEN_LIMIT = 1000;
      const MOCKED_CONTEXT_PERCENTAGE_THRESHOLD = 0.5;
-      vi.mocked(tokenLimit).mockReturnValue(MOCKED_TOKEN_LIMIT);
      vi.spyOn(client['config'], 'getChatCompression').mockReturnValue({
        contextPercentageThreshold: MOCKED_CONTEXT_PERCENTAGE_THRESHOLD,
      });
-      const history = [{ role: 'user', parts: [{ text: '...history...' }] }];
+      const history = [
+        { role: 'user', parts: [{ text: '...history...' }] },
+        { role: 'model', parts: [{ text: '...history...' }] },
+        { role: 'user', parts: [{ text: '...history...' }] },
+        { role: 'model', parts: [{ text: '...history...' }] },
+        { role: 'user', parts: [{ text: '...history...' }] },
+        { role: 'model', parts: [{ text: '...history...' }] },
+      ];
      mockGetHistory.mockReturnValue(history);

      const originalTokenCount =
@@ -674,7 +695,14 @@ describe('Gemini Client (client.ts)', () => {
      vi.spyOn(client['config'], 'getChatCompression').mockReturnValue({
        contextPercentageThreshold: MOCKED_CONTEXT_PERCENTAGE_THRESHOLD,
      });
-      const history = [{ role: 'user', parts: [{ text: '...history...' }] }];
+      const history = [
+        { role: 'user', parts: [{ text: '...history...' }] },
+        { role: 'model', parts: [{ text: '...history...' }] },
+        { role: 'user', parts: [{ text: '...history...' }] },
+        { role: 'model', parts: [{ text: '...history...' }] },
+        { role: 'user', parts: [{ text: '...history...' }] },
+        { role: 'model', parts: [{ text: '...history...' }] },
+      ];
      mockGetHistory.mockReturnValue(history);

      const originalTokenCount =
@@ -838,7 +866,14 @@ describe('Gemini Client (client.ts)', () => {
    });

    it('should always trigger summarization when force is true, regardless of token count', async () => {
-      const history = [{ role: 'user', parts: [{ text: '...history...' }] }];
+      const history = [
+        { role: 'user', parts: [{ text: '...history...' }] },
+        { role: 'model', parts: [{ text: '...history...' }] },
+        { role: 'user', parts: [{ text: '...history...' }] },
+        { role: 'model', parts: [{ text: '...history...' }] },
+        { role: 'user', parts: [{ text: '...history...' }] },
+        { role: 'model', parts: [{ text: '...history...' }] },
+      ];
      mockGetHistory.mockReturnValue(history);

      const originalTokenCount = 100; // Well below threshold, but > estimated new count
@@ -777,6 +777,14 @@ My setup is complete. I will provide my first command in the next turn.
    const historyToCompress = curatedHistory.slice(0, splitPoint);
    const historyToKeep = curatedHistory.slice(splitPoint);

+    if (historyToCompress.length === 0) {
+      return {
+        originalTokenCount,
+        newTokenCount: originalTokenCount,
+        compressionStatus: CompressionStatus.NOOP,
+      };
+    }
+
    const summaryResponse = await this.config
      .getContentGenerator()
      .generateContent(