feat(cli): lower compression threshold to 0.2 and update UX

- Change default compression threshold to 200k

- Introduce 'ChatCompressing' event to handle UI loading

- Subtly display context optimization in message footer

- Fix associated snapshot tests
This commit is contained in:
Taylor Mullen
2026-02-17 23:12:07 -08:00
parent 884acda2dc
commit 3031aef1a4
10 changed files with 79 additions and 53 deletions

View File

@@ -1077,6 +1077,7 @@ ${JSON.stringify(
new AbortController().signal,
'test-prompt-id',
);
await stream.next(); // Trigger ChatCompressing
await stream.next(); // Trigger the generator
expect(countTokensSpy).toHaveBeenCalledWith(
@@ -1924,8 +1925,10 @@ ${JSON.stringify(
// Assert
expect(events).toEqual([
{ type: GeminiEventType.ChatCompressing },
{ type: GeminiEventType.ModelInfo, value: 'default-routed-model' },
{ type: GeminiEventType.InvalidStream },
{ type: GeminiEventType.ChatCompressing },
{ type: GeminiEventType.Content, value: 'Continued content' },
]);
@@ -1980,6 +1983,7 @@ ${JSON.stringify(
// Assert
expect(events).toEqual([
{ type: GeminiEventType.ChatCompressing },
{ type: GeminiEventType.ModelInfo, value: 'default-routed-model' },
{ type: GeminiEventType.InvalidStream },
]);
@@ -2017,8 +2021,8 @@ ${JSON.stringify(
const events = await fromAsync(stream);
// Assert
// We expect 3 events (model_info + original + 1 retry)
expect(events.length).toBe(3);
// We expect 5 events (chat_compressing + model_info + original + 1 retry chat_compressing + 1 retry model_info)
expect(events.length).toBe(5);
expect(
events
.filter((e) => e.type === GeminiEventType.ModelInfo)

View File

@@ -571,6 +571,7 @@ export class GeminiClient {
// Check for context window overflow
const modelForLimitCheck = this._getActiveModelForCurrentTurn();
yield { type: GeminiEventType.ChatCompressing };
const compressed = await this.tryCompressChat(prompt_id, false);
if (compressed.compressionStatus === CompressionStatus.COMPRESSED) {

View File

@@ -68,6 +68,7 @@ export enum GeminiEventType {
ModelInfo = 'model_info',
AgentExecutionStopped = 'agent_execution_stopped',
AgentExecutionBlocked = 'agent_execution_blocked',
ChatCompressing = 'chat_compressing',
}
export type ServerGeminiRetryEvent = {
@@ -192,6 +193,10 @@ export type ServerGeminiChatCompressedEvent = {
value: ChatCompressionInfo | null;
};
export type ServerGeminiChatCompressingEvent = {
type: GeminiEventType.ChatCompressing;
};
export type ServerGeminiMaxSessionTurnsEvent = {
type: GeminiEventType.MaxSessionTurns;
};
@@ -213,6 +218,7 @@ export type ServerGeminiCitationEvent = {
// The original union type, now composed of the individual types
export type ServerGeminiStreamEvent =
| ServerGeminiChatCompressedEvent
| ServerGeminiChatCompressingEvent
| ServerGeminiCitationEvent
| ServerGeminiContentEvent
| ServerGeminiErrorEvent

View File

@@ -36,7 +36,7 @@ import { PreCompressTrigger } from '../hooks/types.js';
* Default threshold for compression token count as a fraction of the model's
* token limit. If the chat history exceeds this threshold, it will be compressed.
*/
export const DEFAULT_COMPRESSION_TOKEN_THRESHOLD = 0.5;
export const DEFAULT_COMPRESSION_TOKEN_THRESHOLD = 0.2;
/**
* The fraction of the latest chat history to keep. A value of 0.3