Fix automatic compression bug (#8599)

This commit is contained in:
Tommaso Sciortino
2025-09-17 13:12:06 -07:00
committed by GitHub
parent 8c0c156cbd
commit 0559040c0f
3 changed files with 28 additions and 55 deletions
+13 -5
View File
@@ -591,8 +591,15 @@ export const useGeminiStream = (
); );
const handleChatCompressionEvent = useCallback( const handleChatCompressionEvent = useCallback(
(eventValue: ServerGeminiChatCompressedEvent['value']) => (
addItem( eventValue: ServerGeminiChatCompressedEvent['value'],
userMessageTimestamp: number,
) => {
if (pendingHistoryItemRef.current) {
addItem(pendingHistoryItemRef.current, userMessageTimestamp);
setPendingHistoryItem(null);
}
return addItem(
{ {
type: 'info', type: 'info',
text: text:
@@ -602,8 +609,9 @@ export const useGeminiStream = (
`${eventValue?.newTokenCount ?? 'unknown'} tokens).`, `${eventValue?.newTokenCount ?? 'unknown'} tokens).`,
}, },
Date.now(), Date.now(),
), );
[addItem, config], },
[addItem, config, pendingHistoryItemRef, setPendingHistoryItem],
); );
const handleMaxSessionTurnsEvent = useCallback( const handleMaxSessionTurnsEvent = useCallback(
@@ -683,7 +691,7 @@ export const useGeminiStream = (
handleErrorEvent(event.value, userMessageTimestamp); handleErrorEvent(event.value, userMessageTimestamp);
break; break;
case ServerGeminiEventType.ChatCompressed: case ServerGeminiEventType.ChatCompressed:
handleChatCompressionEvent(event.value); handleChatCompressionEvent(event.value, userMessageTimestamp);
break; break;
case ServerGeminiEventType.ToolCallConfirmation: case ServerGeminiEventType.ToolCallConfirmation:
case ServerGeminiEventType.ToolCallResponse: case ServerGeminiEventType.ToolCallResponse:
+14 -43
View File
@@ -15,7 +15,6 @@ import {
} from 'vitest'; } from 'vitest';
import type { Content, GenerateContentResponse, Part } from '@google/genai'; import type { Content, GenerateContentResponse, Part } from '@google/genai';
import { createUserContent } from '@google/genai';
import { import {
findIndexAfterFraction, findIndexAfterFraction,
isThinkingDefault, isThinkingDefault,
@@ -399,12 +398,8 @@ describe('Gemini Client (client.ts)', () => {
vi.mocked(mockContentGenerator.countTokens).mockResolvedValue({ vi.mocked(mockContentGenerator.countTokens).mockResolvedValue({
totalTokens: 1000, totalTokens: 1000,
}); });
await client.tryCompressChat('prompt-id-4', false, [ await client.tryCompressChat('prompt-id-4', false); // Fails
{ text: 'request' }, const result = await client.tryCompressChat('prompt-id-4', true);
]); // Fails
const result = await client.tryCompressChat('prompt-id-4', true, [
{ text: 'request' },
]);
expect(result).toEqual({ expect(result).toEqual({
compressionStatus: CompressionStatus.COMPRESSED, compressionStatus: CompressionStatus.COMPRESSED,
@@ -418,9 +413,7 @@ describe('Gemini Client (client.ts)', () => {
vi.mocked(mockContentGenerator.countTokens).mockResolvedValue({ vi.mocked(mockContentGenerator.countTokens).mockResolvedValue({
totalTokens: 1000, totalTokens: 1000,
}); });
const result = await client.tryCompressChat('prompt-id-4', false, [ const result = await client.tryCompressChat('prompt-id-4', false);
{ text: 'request' },
]);
expect(result).toEqual({ expect(result).toEqual({
compressionStatus: compressionStatus:
@@ -432,9 +425,7 @@ describe('Gemini Client (client.ts)', () => {
it('does not manipulate the source chat', async () => { it('does not manipulate the source chat', async () => {
const { client, mockChat } = setup(); const { client, mockChat } = setup();
await client.tryCompressChat('prompt-id-4', false, [ await client.tryCompressChat('prompt-id-4', false);
{ text: 'request' },
]);
expect(client['chat']).toBe(mockChat); // a new chat session was not created expect(client['chat']).toBe(mockChat); // a new chat session was not created
}); });
@@ -457,7 +448,6 @@ describe('Gemini Client (client.ts)', () => {
const { compressionStatus } = await client.tryCompressChat( const { compressionStatus } = await client.tryCompressChat(
'prompt-id-4', 'prompt-id-4',
false, false,
[{ text: 'what is your wisdom?' }],
); );
expect(compressionStatus).toBe( expect(compressionStatus).toBe(
@@ -470,13 +460,9 @@ describe('Gemini Client (client.ts)', () => {
it('will not attempt to compress context after a failure', async () => { it('will not attempt to compress context after a failure', async () => {
const { client } = setup(); const { client } = setup();
await client.tryCompressChat('prompt-id-4', false, [ await client.tryCompressChat('prompt-id-4', false);
{ text: 'request' },
]);
const result = await client.tryCompressChat('prompt-id-5', false, [ const result = await client.tryCompressChat('prompt-id-5', false);
{ text: 'request' },
]);
// it counts tokens for {original, compressed} and then never again // it counts tokens for {original, compressed} and then never again
expect(mockContentGenerator.countTokens).toHaveBeenCalledTimes(2); expect(mockContentGenerator.countTokens).toHaveBeenCalledTimes(2);
@@ -499,9 +485,7 @@ describe('Gemini Client (client.ts)', () => {
}); });
const initialChat = client.getChat(); const initialChat = client.getChat();
const result = await client.tryCompressChat('prompt-id-2', false, [ const result = await client.tryCompressChat('prompt-id-2', false);
{ text: '...history...' },
]);
const newChat = client.getChat(); const newChat = client.getChat();
expect(tokenLimit).toHaveBeenCalled(); expect(tokenLimit).toHaveBeenCalled();
@@ -546,9 +530,7 @@ describe('Gemini Client (client.ts)', () => {
], ],
} as unknown as GenerateContentResponse); } as unknown as GenerateContentResponse);
await client.tryCompressChat('prompt-id-3', false, [ await client.tryCompressChat('prompt-id-3', false);
{ text: '...history...' },
]);
expect( expect(
ClearcutLogger.prototype.logChatCompressionEvent, ClearcutLogger.prototype.logChatCompressionEvent,
@@ -592,9 +574,7 @@ describe('Gemini Client (client.ts)', () => {
} as unknown as GenerateContentResponse); } as unknown as GenerateContentResponse);
const initialChat = client.getChat(); const initialChat = client.getChat();
const result = await client.tryCompressChat('prompt-id-3', false, [ const result = await client.tryCompressChat('prompt-id-3', false);
{ text: '...history...' },
]);
const newChat = client.getChat(); const newChat = client.getChat();
expect(tokenLimit).toHaveBeenCalled(); expect(tokenLimit).toHaveBeenCalled();
@@ -653,9 +633,7 @@ describe('Gemini Client (client.ts)', () => {
} as unknown as GenerateContentResponse); } as unknown as GenerateContentResponse);
const initialChat = client.getChat(); const initialChat = client.getChat();
const result = await client.tryCompressChat('prompt-id-3', false, [ const result = await client.tryCompressChat('prompt-id-3', false);
{ text: '...history...' },
]);
const newChat = client.getChat(); const newChat = client.getChat();
expect(tokenLimit).toHaveBeenCalled(); expect(tokenLimit).toHaveBeenCalled();
@@ -675,7 +653,7 @@ describe('Gemini Client (client.ts)', () => {
// 3. compressed summary message // 3. compressed summary message
// 4. standard canned user summary message // 4. standard canned user summary message
// 5. The last user message (not the last 3 because that would start with a function response) // 5. The last user message (not the last 3 because that would start with a function response)
expect(newChat.getHistory().length).toEqual(6); expect(newChat.getHistory().length).toEqual(5);
}); });
it('should always trigger summarization when force is true, regardless of token count', async () => { it('should always trigger summarization when force is true, regardless of token count', async () => {
@@ -703,9 +681,7 @@ describe('Gemini Client (client.ts)', () => {
} as unknown as GenerateContentResponse); } as unknown as GenerateContentResponse);
const initialChat = client.getChat(); const initialChat = client.getChat();
const result = await client.tryCompressChat('prompt-id-1', false, [ const result = await client.tryCompressChat('prompt-id-1', false); // force = true
{ text: '...history...' },
]); // force = true
const newChat = client.getChat(); const newChat = client.getChat();
expect(mockGenerateContentFn).toHaveBeenCalled(); expect(mockGenerateContentFn).toHaveBeenCalled();
@@ -750,17 +726,12 @@ describe('Gemini Client (client.ts)', () => {
client['chat'] = mockChat; client['chat'] = mockChat;
client['startChat'] = vi.fn().mockResolvedValue(mockChat); client['startChat'] = vi.fn().mockResolvedValue(mockChat);
const request = [{ text: 'Long conversation' }]; const result = await client.tryCompressChat('prompt-id-4', false);
const result = await client.tryCompressChat(
'prompt-id-4',
false,
request,
);
expect(mockContentGenerator.countTokens).toHaveBeenCalledTimes(2); expect(mockContentGenerator.countTokens).toHaveBeenCalledTimes(2);
expect(mockContentGenerator.countTokens).toHaveBeenNthCalledWith(1, { expect(mockContentGenerator.countTokens).toHaveBeenNthCalledWith(1, {
model: firstCurrentModel, model: firstCurrentModel,
contents: [...mockChatHistory, createUserContent(request)], contents: [...mockChatHistory],
}); });
expect(mockContentGenerator.countTokens).toHaveBeenNthCalledWith(2, { expect(mockContentGenerator.countTokens).toHaveBeenNthCalledWith(2, {
model: secondCurrentModel, model: secondCurrentModel,
+1 -7
View File
@@ -11,7 +11,6 @@ import type {
Tool, Tool,
GenerateContentResponse, GenerateContentResponse,
} from '@google/genai'; } from '@google/genai';
import { createUserContent } from '@google/genai';
import { import {
getDirectoryContextString, getDirectoryContextString,
getEnvironmentContext, getEnvironmentContext,
@@ -451,7 +450,7 @@ export class GeminiClient {
return new Turn(this.getChat(), prompt_id); return new Turn(this.getChat(), prompt_id);
} }
const compressed = await this.tryCompressChat(prompt_id, false, request); const compressed = await this.tryCompressChat(prompt_id, false);
if (compressed.compressionStatus === CompressionStatus.COMPRESSED) { if (compressed.compressionStatus === CompressionStatus.COMPRESSED) {
yield { type: GeminiEventType.ChatCompressed, value: compressed }; yield { type: GeminiEventType.ChatCompressed, value: compressed };
@@ -638,7 +637,6 @@ export class GeminiClient {
async tryCompressChat( async tryCompressChat(
prompt_id: string, prompt_id: string,
force: boolean = false, force: boolean = false,
request?: PartListUnion,
): Promise<ChatCompressionInfo> { ): Promise<ChatCompressionInfo> {
// If the model is 'auto', we will use a placeholder model to check. // If the model is 'auto', we will use a placeholder model to check.
// Compression occurs before we choose a model, so calling `count_tokens` // Compression occurs before we choose a model, so calling `count_tokens`
@@ -654,10 +652,6 @@ export class GeminiClient {
const curatedHistory = this.getChat().getHistory(true); const curatedHistory = this.getChat().getHistory(true);
if (request) {
curatedHistory.push(createUserContent(request));
}
// Regardless of `force`, don't do anything if the history is empty. // Regardless of `force`, don't do anything if the history is empty.
if ( if (
curatedHistory.length === 0 || curatedHistory.length === 0 ||