feat(core): enhanced anchored iterative context compression with self-verification (#15710)

2026-04-23 03:24:42 -07:00 · 2026-01-20 09:43:15 +01:00
parent e34f0b4a98
commit 1182168bd9
9 changed files with 283 additions and 40 deletions
@@ -12,17 +12,20 @@ import {
 } from './chatCompressionService.js';
 import type { Content, GenerateContentResponse } from '@google/genai';
 import { CompressionStatus } from '../core/turn.js';
+import type { BaseLlmClient } from '../core/baseLlmClient.js';
 import type { GeminiChat } from '../core/geminiChat.js';
 import type { Config } from '../config/config.js';
 import * as fileUtils from '../utils/fileUtils.js';
 import { getInitialChatHistory } from '../utils/environmentContext.js';
 import * as tokenCalculation from '../utils/tokenCalculation.js';
+import { tokenLimit } from '../core/tokenLimits.js';
 import os from 'node:os';
 import path from 'node:path';
 import fs from 'node:fs';

 vi.mock('../telemetry/loggers.js');
 vi.mock('../utils/environmentContext.js');
+vi.mock('../core/tokenLimits.js');

 describe('findCompressSplitPoint', () => {
  it('should throw an error for non-positive numbers', () => {
@@ -145,15 +148,26 @@ describe('ChatCompressionService', () => {
      getLastPromptTokenCount: vi.fn().mockReturnValue(500),
    } as unknown as GeminiChat;

-    const mockGenerateContent = vi.fn().mockResolvedValue({
-      candidates: [
-        {
-          content: {
-            parts: [{ text: 'Summary' }],
+    const mockGenerateContent = vi
+      .fn()
+      .mockResolvedValueOnce({
+        candidates: [
+          {
+            content: {
+              parts: [{ text: 'Initial Summary' }],
+            },
          },
-        },
-      ],
-    } as unknown as GenerateContentResponse);
+        ],
+      } as unknown as GenerateContentResponse)
+      .mockResolvedValueOnce({
+        candidates: [
+          {
+            content: {
+              parts: [{ text: 'Verified Summary' }],
+            },
+          },
+        ],
+      } as unknown as GenerateContentResponse);

    mockConfig = {
      getCompressionThreshold: vi.fn(),
@@ -219,8 +233,13 @@ describe('ChatCompressionService', () => {
    vi.mocked(mockChat.getHistory).mockReturnValue([
      { role: 'user', parts: [{ text: 'hi' }] },
    ]);
-    vi.mocked(mockChat.getLastPromptTokenCount).mockReturnValue(1000);
-    // Real token limit is ~1M, threshold 0.5. 1000 < 500k, so NOOP.
+    vi.mocked(mockChat.getLastPromptTokenCount).mockReturnValue(600);
+    vi.mocked(tokenLimit).mockReturnValue(1000);
+    // Threshold is 0.5 * 1000 = 500. 600 > 500, so it SHOULD compress.
+    // Wait, the default threshold is 0.5.
+    // Let's set it explicitly.
+    vi.mocked(mockConfig.getCompressionThreshold).mockResolvedValue(0.7);
+    // 600 < 700, so NOOP.

    const result = await service.compress(
      mockChat,
@@ -234,7 +253,7 @@ describe('ChatCompressionService', () => {
    expect(result.newHistory).toBeNull();
  });

-  it('should compress if over token threshold', async () => {
+  it('should compress if over token threshold with verification turn', async () => {
    const history: Content[] = [
      { role: 'user', parts: [{ text: 'msg1' }] },
      { role: 'model', parts: [{ text: 'msg2' }] },
@@ -256,8 +275,78 @@ describe('ChatCompressionService', () => {

    expect(result.info.compressionStatus).toBe(CompressionStatus.COMPRESSED);
    expect(result.newHistory).not.toBeNull();
-    expect(result.newHistory![0].parts![0].text).toBe('Summary');
-    expect(mockConfig.getBaseLlmClient().generateContent).toHaveBeenCalled();
+    // It should contain the final verified summary
+    expect(result.newHistory![0].parts![0].text).toBe('Verified Summary');
+    expect(mockConfig.getBaseLlmClient().generateContent).toHaveBeenCalledTimes(
+      2,
+    );
+  });
+
+  it('should fall back to initial summary if verification response is empty', async () => {
+    const history: Content[] = [
+      { role: 'user', parts: [{ text: 'msg1' }] },
+      { role: 'model', parts: [{ text: 'msg2' }] },
+    ];
+    vi.mocked(mockChat.getHistory).mockReturnValue(history);
+    vi.mocked(mockChat.getLastPromptTokenCount).mockReturnValue(600000);
+
+    // Completely override the LLM client for this test to avoid conflicting with beforeEach mocks
+    const mockLlmClient = {
+      generateContent: vi
+        .fn()
+        .mockResolvedValueOnce({
+          candidates: [{ content: { parts: [{ text: 'Initial Summary' }] } }],
+        } as unknown as GenerateContentResponse)
+        .mockResolvedValueOnce({
+          candidates: [{ content: { parts: [{ text: '   ' }] } }],
+        } as unknown as GenerateContentResponse),
+    };
+    vi.mocked(mockConfig.getBaseLlmClient).mockReturnValue(
+      mockLlmClient as unknown as BaseLlmClient,
+    );
+
+    const result = await service.compress(
+      mockChat,
+      mockPromptId,
+      false,
+      mockModel,
+      mockConfig,
+      false,
+    );
+
+    expect(result.info.compressionStatus).toBe(CompressionStatus.COMPRESSED);
+    expect(result.newHistory![0].parts![0].text).toBe('Initial Summary');
+  });
+
+  it('should use anchored instruction when a previous snapshot is present', async () => {
+    const history: Content[] = [
+      {
+        role: 'user',
+        parts: [{ text: '<state_snapshot>old</state_snapshot>' }],
+      },
+      { role: 'model', parts: [{ text: 'msg2' }] },
+      { role: 'user', parts: [{ text: 'msg3' }] },
+      { role: 'model', parts: [{ text: 'msg4' }] },
+    ];
+    vi.mocked(mockChat.getHistory).mockReturnValue(history);
+    vi.mocked(mockChat.getLastPromptTokenCount).mockReturnValue(800);
+    vi.mocked(tokenLimit).mockReturnValue(1000);
+
+    await service.compress(
+      mockChat,
+      mockPromptId,
+      false,
+      mockModel,
+      mockConfig,
+      false,
+    );
+
+    const firstCall = vi.mocked(mockConfig.getBaseLlmClient().generateContent)
+      .mock.calls[0][0];
+    const lastContent = firstCall.contents?.[firstCall.contents.length - 1];
+    expect(lastContent?.parts?.[0].text).toContain(
+      'A previous <state_snapshot> exists',
+    );
  });

  it('should force compress even if under threshold', async () => {
@@ -322,6 +411,46 @@ describe('ChatCompressionService', () => {
    expect(result.newHistory).toBeNull();
  });

+  it('should return COMPRESSION_FAILED_EMPTY_SUMMARY if summary is empty', async () => {
+    const history: Content[] = [
+      { role: 'user', parts: [{ text: 'msg1' }] },
+      { role: 'model', parts: [{ text: 'msg2' }] },
+    ];
+    vi.mocked(mockChat.getHistory).mockReturnValue(history);
+    vi.mocked(mockChat.getLastPromptTokenCount).mockReturnValue(800);
+    vi.mocked(tokenLimit).mockReturnValue(1000);
+
+    // Completely override the LLM client for this test
+    const mockLlmClient = {
+      generateContent: vi.fn().mockResolvedValue({
+        candidates: [
+          {
+            content: {
+              parts: [{ text: '   ' }],
+            },
+          },
+        ],
+      } as unknown as GenerateContentResponse),
+    };
+    vi.mocked(mockConfig.getBaseLlmClient).mockReturnValue(
+      mockLlmClient as unknown as BaseLlmClient,
+    );
+
+    const result = await service.compress(
+      mockChat,
+      mockPromptId,
+      false,
+      mockModel,
+      mockConfig,
+      false,
+    );
+
+    expect(result.info.compressionStatus).toBe(
+      CompressionStatus.COMPRESSION_FAILED_EMPTY_SUMMARY,
+    );
+    expect(result.newHistory).toBeNull();
+  });
+
  describe('Reverse Token Budget Truncation', () => {
    it('should truncate older function responses when budget is exceeded', async () => {
      vi.mocked(mockConfig.getCompressionThreshold).mockResolvedValue(0.5);
@@ -615,6 +744,7 @@ describe('ChatCompressionService', () => {

      vi.mocked(mockChat.getHistory).mockReturnValue(history);
      vi.mocked(mockChat.getLastPromptTokenCount).mockReturnValue(600000);
+      vi.mocked(tokenLimit).mockReturnValue(1_000_000);

      const result = await service.compress(
        mockChat,
@@ -671,6 +801,7 @@ describe('ChatCompressionService', () => {
      ];

      vi.mocked(mockChat.getHistory).mockReturnValue(history);
+      vi.mocked(tokenLimit).mockReturnValue(1_000_000);

      const result = await service.compress(
        mockChat,