feat(core): enhanced anchored iterative context compression with self-verification (#15710)

2026-05-14 13:53:02 -07:00 · 2026-01-20 09:43:15 +01:00
parent e34f0b4a98
commit 1182168bd9
9 changed files with 283 additions and 40 deletions
@@ -12,17 +12,20 @@ import {
 } from './chatCompressionService.js';
 import type { Content, GenerateContentResponse } from '@google/genai';
 import { CompressionStatus } from '../core/turn.js';
+import type { BaseLlmClient } from '../core/baseLlmClient.js';
 import type { GeminiChat } from '../core/geminiChat.js';
 import type { Config } from '../config/config.js';
 import * as fileUtils from '../utils/fileUtils.js';
 import { getInitialChatHistory } from '../utils/environmentContext.js';
 import * as tokenCalculation from '../utils/tokenCalculation.js';
+import { tokenLimit } from '../core/tokenLimits.js';
 import os from 'node:os';
 import path from 'node:path';
 import fs from 'node:fs';

 vi.mock('../telemetry/loggers.js');
 vi.mock('../utils/environmentContext.js');
+vi.mock('../core/tokenLimits.js');

 describe('findCompressSplitPoint', () => {
  it('should throw an error for non-positive numbers', () => {
@@ -145,15 +148,26 @@ describe('ChatCompressionService', () => {
      getLastPromptTokenCount: vi.fn().mockReturnValue(500),
    } as unknown as GeminiChat;

-    const mockGenerateContent = vi.fn().mockResolvedValue({
-      candidates: [
-        {
-          content: {
-            parts: [{ text: 'Summary' }],
+    const mockGenerateContent = vi
+      .fn()
+      .mockResolvedValueOnce({
+        candidates: [
+          {
+            content: {
+              parts: [{ text: 'Initial Summary' }],
+            },
          },
-        },
-      ],
-    } as unknown as GenerateContentResponse);
+        ],
+      } as unknown as GenerateContentResponse)
+      .mockResolvedValueOnce({
+        candidates: [
+          {
+            content: {
+              parts: [{ text: 'Verified Summary' }],
+            },
+          },
+        ],
+      } as unknown as GenerateContentResponse);

    mockConfig = {
      getCompressionThreshold: vi.fn(),
@@ -219,8 +233,13 @@ describe('ChatCompressionService', () => {
    vi.mocked(mockChat.getHistory).mockReturnValue([
      { role: 'user', parts: [{ text: 'hi' }] },
    ]);
-    vi.mocked(mockChat.getLastPromptTokenCount).mockReturnValue(1000);
-    // Real token limit is ~1M, threshold 0.5. 1000 < 500k, so NOOP.
+    vi.mocked(mockChat.getLastPromptTokenCount).mockReturnValue(600);
+    vi.mocked(tokenLimit).mockReturnValue(1000);
+    // Threshold is 0.5 * 1000 = 500. 600 > 500, so it SHOULD compress.
+    // Wait, the default threshold is 0.5.
+    // Let's set it explicitly.
+    vi.mocked(mockConfig.getCompressionThreshold).mockResolvedValue(0.7);
+    // 600 < 700, so NOOP.

    const result = await service.compress(
      mockChat,
@@ -234,7 +253,7 @@ describe('ChatCompressionService', () => {
    expect(result.newHistory).toBeNull();
  });

-  it('should compress if over token threshold', async () => {
+  it('should compress if over token threshold with verification turn', async () => {
    const history: Content[] = [
      { role: 'user', parts: [{ text: 'msg1' }] },
      { role: 'model', parts: [{ text: 'msg2' }] },
@@ -256,8 +275,78 @@ describe('ChatCompressionService', () => {

    expect(result.info.compressionStatus).toBe(CompressionStatus.COMPRESSED);
    expect(result.newHistory).not.toBeNull();
-    expect(result.newHistory![0].parts![0].text).toBe('Summary');
-    expect(mockConfig.getBaseLlmClient().generateContent).toHaveBeenCalled();
+    // It should contain the final verified summary
+    expect(result.newHistory![0].parts![0].text).toBe('Verified Summary');
+    expect(mockConfig.getBaseLlmClient().generateContent).toHaveBeenCalledTimes(
+      2,
+    );
+  });
+
+  it('should fall back to initial summary if verification response is empty', async () => {
+    const history: Content[] = [
+      { role: 'user', parts: [{ text: 'msg1' }] },
+      { role: 'model', parts: [{ text: 'msg2' }] },
+    ];
+    vi.mocked(mockChat.getHistory).mockReturnValue(history);
+    vi.mocked(mockChat.getLastPromptTokenCount).mockReturnValue(600000);
+
+    // Completely override the LLM client for this test to avoid conflicting with beforeEach mocks
+    const mockLlmClient = {
+      generateContent: vi
+        .fn()
+        .mockResolvedValueOnce({
+          candidates: [{ content: { parts: [{ text: 'Initial Summary' }] } }],
+        } as unknown as GenerateContentResponse)
+        .mockResolvedValueOnce({
+          candidates: [{ content: { parts: [{ text: '   ' }] } }],
+        } as unknown as GenerateContentResponse),
+    };
+    vi.mocked(mockConfig.getBaseLlmClient).mockReturnValue(
+      mockLlmClient as unknown as BaseLlmClient,
+    );
+
+    const result = await service.compress(
+      mockChat,
+      mockPromptId,
+      false,
+      mockModel,
+      mockConfig,
+      false,
+    );
+
+    expect(result.info.compressionStatus).toBe(CompressionStatus.COMPRESSED);
+    expect(result.newHistory![0].parts![0].text).toBe('Initial Summary');
+  });
+
+  it('should use anchored instruction when a previous snapshot is present', async () => {
+    const history: Content[] = [
+      {
+        role: 'user',
+        parts: [{ text: '<state_snapshot>old</state_snapshot>' }],
+      },
+      { role: 'model', parts: [{ text: 'msg2' }] },
+      { role: 'user', parts: [{ text: 'msg3' }] },
+      { role: 'model', parts: [{ text: 'msg4' }] },
+    ];
+    vi.mocked(mockChat.getHistory).mockReturnValue(history);
+    vi.mocked(mockChat.getLastPromptTokenCount).mockReturnValue(800);
+    vi.mocked(tokenLimit).mockReturnValue(1000);
+
+    await service.compress(
+      mockChat,
+      mockPromptId,
+      false,
+      mockModel,
+      mockConfig,
+      false,
+    );
+
+    const firstCall = vi.mocked(mockConfig.getBaseLlmClient().generateContent)
+      .mock.calls[0][0];
+    const lastContent = firstCall.contents?.[firstCall.contents.length - 1];
+    expect(lastContent?.parts?.[0].text).toContain(
+      'A previous <state_snapshot> exists',
+    );
  });

  it('should force compress even if under threshold', async () => {
@@ -322,6 +411,46 @@ describe('ChatCompressionService', () => {
    expect(result.newHistory).toBeNull();
  });

+  it('should return COMPRESSION_FAILED_EMPTY_SUMMARY if summary is empty', async () => {
+    const history: Content[] = [
+      { role: 'user', parts: [{ text: 'msg1' }] },
+      { role: 'model', parts: [{ text: 'msg2' }] },
+    ];
+    vi.mocked(mockChat.getHistory).mockReturnValue(history);
+    vi.mocked(mockChat.getLastPromptTokenCount).mockReturnValue(800);
+    vi.mocked(tokenLimit).mockReturnValue(1000);
+
+    // Completely override the LLM client for this test
+    const mockLlmClient = {
+      generateContent: vi.fn().mockResolvedValue({
+        candidates: [
+          {
+            content: {
+              parts: [{ text: '   ' }],
+            },
+          },
+        ],
+      } as unknown as GenerateContentResponse),
+    };
+    vi.mocked(mockConfig.getBaseLlmClient).mockReturnValue(
+      mockLlmClient as unknown as BaseLlmClient,
+    );
+
+    const result = await service.compress(
+      mockChat,
+      mockPromptId,
+      false,
+      mockModel,
+      mockConfig,
+      false,
+    );
+
+    expect(result.info.compressionStatus).toBe(
+      CompressionStatus.COMPRESSION_FAILED_EMPTY_SUMMARY,
+    );
+    expect(result.newHistory).toBeNull();
+  });
+
  describe('Reverse Token Budget Truncation', () => {
    it('should truncate older function responses when budget is exceeded', async () => {
      vi.mocked(mockConfig.getCompressionThreshold).mockResolvedValue(0.5);
@@ -615,6 +744,7 @@ describe('ChatCompressionService', () => {

      vi.mocked(mockChat.getHistory).mockReturnValue(history);
      vi.mocked(mockChat.getLastPromptTokenCount).mockReturnValue(600000);
+      vi.mocked(tokenLimit).mockReturnValue(1_000_000);

      const result = await service.compress(
        mockChat,
@@ -671,6 +801,7 @@ describe('ChatCompressionService', () => {
      ];

      vi.mocked(mockChat.getHistory).mockReturnValue(history);
+      vi.mocked(tokenLimit).mockReturnValue(1_000_000);

      const result = await service.compress(
        mockChat,
@@ -240,6 +240,7 @@ export class ChatCompressionService {
    model: string,
    config: Config,
    hasFailedCompressionAttempt: boolean,
+    abortSignal?: AbortSignal,
  ): Promise<{ newHistory: Content[] | null; info: ChatCompressionInfo }> {
    const curatedHistory = chat.getHistory(true);

@@ -319,6 +320,14 @@ export class ChatCompressionService {
        ? originalHistoryToCompress
        : historyToCompressTruncated;

+    const hasPreviousSnapshot = historyForSummarizer.some((c) =>
+      c.parts?.some((p) => p.text?.includes('<state_snapshot>')),
+    );
+
+    const anchorInstruction = hasPreviousSnapshot
+      ? 'A previous <state_snapshot> exists in the history. You MUST integrate all still-relevant information from that snapshot into the new one, updating it with the more recent events. Do not lose established constraints or critical knowledge.'
+      : 'Generate a new <state_snapshot> based on the provided history.';
+
    const summaryResponse = await config.getBaseLlmClient().generateContent({
      modelConfigKey: { model: modelStringToModelConfigAlias(model) },
      contents: [
@@ -327,7 +336,7 @@ export class ChatCompressionService {
          role: 'user',
          parts: [
            {
-              text: 'First, reason in your scratchpad. Then, generate the <state_snapshot>.',
+              text: `${anchorInstruction}\n\nFirst, reason in your scratchpad. Then, generate the updated <state_snapshot>.`,
            },
          ],
        },
@@ -335,14 +344,62 @@ export class ChatCompressionService {
      systemInstruction: { text: getCompressionPrompt() },
      promptId,
      // TODO(joshualitt): wire up a sensible abort signal,
-      abortSignal: new AbortController().signal,
+      abortSignal: abortSignal ?? new AbortController().signal,
    });
    const summary = getResponseText(summaryResponse) ?? '';

+    // Phase 3: The "Probe" Verification (Self-Correction)
+    // We perform a second lightweight turn to ensure no critical information was lost.
+    const verificationResponse = await config
+      .getBaseLlmClient()
+      .generateContent({
+        modelConfigKey: { model: modelStringToModelConfigAlias(model) },
+        contents: [
+          ...historyForSummarizer,
+          {
+            role: 'model',
+            parts: [{ text: summary }],
+          },
+          {
+            role: 'user',
+            parts: [
+              {
+                text: 'Critically evaluate the <state_snapshot> you just generated. Did you omit any specific technical details, file paths, tool results, or user constraints mentioned in the history? If anything is missing or could be more precise, generate a FINAL, improved <state_snapshot>. Otherwise, repeat the exact same <state_snapshot> again.',
+              },
+            ],
+          },
+        ],
+        systemInstruction: { text: getCompressionPrompt() },
+        promptId: `${promptId}-verify`,
+        abortSignal: abortSignal ?? new AbortController().signal,
+      });
+
+    const finalSummary = (
+      getResponseText(verificationResponse)?.trim() || summary
+    ).trim();
+
+    if (!finalSummary) {
+      logChatCompression(
+        config,
+        makeChatCompressionEvent({
+          tokens_before: originalTokenCount,
+          tokens_after: originalTokenCount, // No change since it failed
+        }),
+      );
+      return {
+        newHistory: null,
+        info: {
+          originalTokenCount,
+          newTokenCount: originalTokenCount,
+          compressionStatus: CompressionStatus.COMPRESSION_FAILED_EMPTY_SUMMARY,
+        },
+      };
+    }
+
    const extraHistory: Content[] = [
      {
        role: 'user',
-        parts: [{ text: summary }],
+        parts: [{ text: finalSummary }],
      },
      {
        role: 'model',