diff --git a/packages/core/src/core/client.test.ts b/packages/core/src/core/client.test.ts index 10e954fd2e..13c50de848 100644 --- a/packages/core/src/core/client.test.ts +++ b/packages/core/src/core/client.test.ts @@ -1143,9 +1143,8 @@ ${JSON.stringify( // A string of length 400 is roughly 100 tokens. const longText = 'a'.repeat(400); const request: Part[] = [{ text: longText }]; - const estimatedRequestTokenCount = Math.floor( - JSON.stringify(request).length / 4, - ); + // estimateTextOnlyLength counts only text content (400 chars), not JSON structure + const estimatedRequestTokenCount = Math.floor(longText.length / 4); const remainingTokenCount = MOCKED_TOKEN_LIMIT - lastPromptTokenCount; // Mock tryCompressChat to not compress @@ -1203,9 +1202,8 @@ ${JSON.stringify( // We need a request > 95 tokens. const longText = 'a'.repeat(400); const request: Part[] = [{ text: longText }]; - const estimatedRequestTokenCount = Math.floor( - JSON.stringify(request).length / 4, - ); + // estimateTextOnlyLength counts only text content (400 chars), not JSON structure + const estimatedRequestTokenCount = Math.floor(longText.length / 4); const remainingTokenCount = STICKY_MODEL_LIMIT - lastPromptTokenCount; vi.spyOn(client, 'tryCompressChat').mockResolvedValue({ @@ -1236,6 +1234,66 @@ ${JSON.stringify( expect(mockTurnRunFn).not.toHaveBeenCalled(); }); + it('should not trigger overflow warning for requests with large binary data (PDFs/images)', async () => { + // Arrange + const MOCKED_TOKEN_LIMIT = 1000000; // 1M tokens + vi.mocked(tokenLimit).mockReturnValue(MOCKED_TOKEN_LIMIT); + + const lastPromptTokenCount = 10000; + const mockChat: Partial = { + getLastPromptTokenCount: vi.fn().mockReturnValue(lastPromptTokenCount), + getHistory: vi.fn().mockReturnValue([]), + }; + client['chat'] = mockChat as GeminiChat; + + // Simulate a PDF file with large base64 data (11MB when encoded) + // In the old implementation, this would incorrectly estimate ~2.7M tokens + // In the new implementation, only the text part is counted + const largePdfBase64 = 'A'.repeat(11 * 1024 * 1024); + const request: Part[] = [ + { text: 'Please analyze this PDF document' }, // ~35 chars = ~8 tokens + { + inlineData: { + mimeType: 'application/pdf', + data: largePdfBase64, // This should be ignored in token estimation + }, + }, + ]; + + // Mock tryCompressChat to not compress + vi.spyOn(client, 'tryCompressChat').mockResolvedValue({ + originalTokenCount: lastPromptTokenCount, + newTokenCount: lastPromptTokenCount, + compressionStatus: CompressionStatus.NOOP, + }); + + // Mock Turn.run to simulate successful processing + const mockStream = (async function* () { + yield { type: 'content', value: 'Analysis complete' }; + })(); + mockTurnRunFn.mockReturnValue(mockStream); + + // Act + const stream = client.sendMessageStream( + request, + new AbortController().signal, + 'prompt-id-pdf-test', + ); + + const events = await fromAsync(stream); + + // Assert + // Should NOT contain overflow warning + expect(events).not.toContainEqual( + expect.objectContaining({ + type: GeminiEventType.ContextWindowWillOverflow, + }), + ); + + // Turn.run should be called (processing should continue) + expect(mockTurnRunFn).toHaveBeenCalled(); + }); + describe('Model Routing', () => { let mockRouterService: { route: Mock }; diff --git a/packages/core/src/core/client.ts b/packages/core/src/core/client.ts index e2addd0925..a8f777bd41 100644 --- a/packages/core/src/core/client.ts +++ b/packages/core/src/core/client.ts @@ -55,6 +55,44 @@ import type { ModelConfigKey } from '../services/modelConfigService.js'; const MAX_TURNS = 100; +/** + * Estimates the character length of text-only parts in a request. + * Binary data (inline_data, fileData) is excluded from the estimation + * because Gemini counts these as fixed token values, not based on their size. + * @param request The request to estimate tokens for + * @returns Estimated character length of text content + */ +function estimateTextOnlyLength(request: PartListUnion): number { + if (typeof request === 'string') { + return request.length; + } + + // Ensure request is an array before iterating + if (!Array.isArray(request)) { + return 0; + } + + let textLength = 0; + for (const part of request) { + // Handle string elements in the array + if (typeof part === 'string') { + textLength += part.length; + } + // Handle object elements with text property + else if ( + typeof part === 'object' && + part !== null && + 'text' in part && + part.text + ) { + textLength += part.text.length; + } + // inlineData, fileData, and other binary parts are ignored + // as they are counted as fixed tokens by Gemini + } + return textLength; +} + export class GeminiClient { private chat?: GeminiChat; private sessionTurnCount = 0; @@ -422,8 +460,11 @@ export class GeminiClient { // Check for context window overflow const modelForLimitCheck = this._getEffectiveModelForCurrentTurn(); + // Estimate tokens based on text content only. + // Binary data (PDFs, images) are counted as fixed tokens by Gemini, + // not based on their base64-encoded size. const estimatedRequestTokenCount = Math.floor( - JSON.stringify(request).length / 4, + estimateTextOnlyLength(request) / 4, ); const remainingTokenCount =