feat(cli): lower compression threshold to 0.2 and update UX

- Change default compression threshold to 200k - Introduce 'ChatCompressing' event to handle UI loading - Subtly display context optimization in message footer - Fix associated snapshot tests
2026-03-19 02:20:42 -07:00 · 2026-02-17 23:12:07 -08:00
parent 884acda2dc
commit 3031aef1a4
10 changed files with 79 additions and 53 deletions
--- a/packages/core/src/core/client.test.ts
+++ b/packages/core/src/core/client.test.ts
@@ -1077,6 +1077,7 @@ ${JSON.stringify(
        new AbortController().signal,
        'test-prompt-id',
      );
+      await stream.next(); // Trigger ChatCompressing
      await stream.next(); // Trigger the generator

      expect(countTokensSpy).toHaveBeenCalledWith(
@@ -1924,8 +1925,10 @@ ${JSON.stringify(

      // Assert
      expect(events).toEqual([
+        { type: GeminiEventType.ChatCompressing },
        { type: GeminiEventType.ModelInfo, value: 'default-routed-model' },
        { type: GeminiEventType.InvalidStream },
+        { type: GeminiEventType.ChatCompressing },
        { type: GeminiEventType.Content, value: 'Continued content' },
      ]);

@@ -1980,6 +1983,7 @@ ${JSON.stringify(

      // Assert
      expect(events).toEqual([
+        { type: GeminiEventType.ChatCompressing },
        { type: GeminiEventType.ModelInfo, value: 'default-routed-model' },
        { type: GeminiEventType.InvalidStream },
      ]);
@@ -2017,8 +2021,8 @@ ${JSON.stringify(
      const events = await fromAsync(stream);

      // Assert
-      // We expect 3 events (model_info + original + 1 retry)
-      expect(events.length).toBe(3);
+      // We expect 5 events (chat_compressing + model_info + original + 1 retry chat_compressing + 1 retry model_info)
+      expect(events.length).toBe(5);
      expect(
        events
          .filter((e) => e.type === GeminiEventType.ModelInfo)
--- a/packages/core/src/core/client.ts
+++ b/packages/core/src/core/client.ts
@@ -571,6 +571,7 @@ export class GeminiClient {
    // Check for context window overflow
    const modelForLimitCheck = this._getActiveModelForCurrentTurn();

+    yield { type: GeminiEventType.ChatCompressing };
    const compressed = await this.tryCompressChat(prompt_id, false);

    if (compressed.compressionStatus === CompressionStatus.COMPRESSED) {
--- a/packages/core/src/core/turn.ts
+++ b/packages/core/src/core/turn.ts
@@ -68,6 +68,7 @@ export enum GeminiEventType {
  ModelInfo = 'model_info',
  AgentExecutionStopped = 'agent_execution_stopped',
  AgentExecutionBlocked = 'agent_execution_blocked',
+  ChatCompressing = 'chat_compressing',
 }

 export type ServerGeminiRetryEvent = {
@@ -192,6 +193,10 @@ export type ServerGeminiChatCompressedEvent = {
  value: ChatCompressionInfo | null;
 };

+export type ServerGeminiChatCompressingEvent = {
+  type: GeminiEventType.ChatCompressing;
+};
+
 export type ServerGeminiMaxSessionTurnsEvent = {
  type: GeminiEventType.MaxSessionTurns;
 };
@@ -213,6 +218,7 @@ export type ServerGeminiCitationEvent = {
 // The original union type, now composed of the individual types
 export type ServerGeminiStreamEvent =
  | ServerGeminiChatCompressedEvent
+  | ServerGeminiChatCompressingEvent
  | ServerGeminiCitationEvent
  | ServerGeminiContentEvent
  | ServerGeminiErrorEvent
--- a/packages/core/src/services/chatCompressionService.ts
+++ b/packages/core/src/services/chatCompressionService.ts
@@ -36,7 +36,7 @@ import { PreCompressTrigger } from '../hooks/types.js';
 * Default threshold for compression token count as a fraction of the model's
 * token limit. If the chat history exceeds this threshold, it will be compressed.
 */
-export const DEFAULT_COMPRESSION_TOKEN_THRESHOLD = 0.5;
+export const DEFAULT_COMPRESSION_TOKEN_THRESHOLD = 0.2;

 /**
 * The fraction of the latest chat history to keep. A value of 0.3