fix(core): Fix context window overflow warning for PDF files (#13548)

2026-05-12 21:03:05 -07:00 · 2025-11-24 16:46:03 +09:00
parent c2a741ee36
commit 7350399a50
2 changed files with 106 additions and 7 deletions
@@ -1143,9 +1143,8 @@ ${JSON.stringify(
      // A string of length 400 is roughly 100 tokens.
      const longText = 'a'.repeat(400);
      const request: Part[] = [{ text: longText }];
-      const estimatedRequestTokenCount = Math.floor(
+      // estimateTextOnlyLength counts only text content (400 chars), not JSON structure
-        JSON.stringify(request).length / 4,
+      const estimatedRequestTokenCount = Math.floor(longText.length / 4);
      );
      const remainingTokenCount = MOCKED_TOKEN_LIMIT - lastPromptTokenCount;
      // Mock tryCompressChat to not compress
@@ -1203,9 +1202,8 @@ ${JSON.stringify(
      // We need a request > 95 tokens.
      const longText = 'a'.repeat(400);
      const request: Part[] = [{ text: longText }];
-      const estimatedRequestTokenCount = Math.floor(
+      // estimateTextOnlyLength counts only text content (400 chars), not JSON structure
-        JSON.stringify(request).length / 4,
+      const estimatedRequestTokenCount = Math.floor(longText.length / 4);
      );
      const remainingTokenCount = STICKY_MODEL_LIMIT - lastPromptTokenCount;
      vi.spyOn(client, 'tryCompressChat').mockResolvedValue({
@@ -1236,6 +1234,66 @@ ${JSON.stringify(
      expect(mockTurnRunFn).not.toHaveBeenCalled();
    });
    it('should not trigger overflow warning for requests with large binary data (PDFs/images)', async () => {
      // Arrange
      const MOCKED_TOKEN_LIMIT = 1000000; // 1M tokens
      vi.mocked(tokenLimit).mockReturnValue(MOCKED_TOKEN_LIMIT);
      const lastPromptTokenCount = 10000;
      const mockChat: Partial<GeminiChat> = {
        getLastPromptTokenCount: vi.fn().mockReturnValue(lastPromptTokenCount),
        getHistory: vi.fn().mockReturnValue([]),
      };
      client['chat'] = mockChat as GeminiChat;
      // Simulate a PDF file with large base64 data (11MB when encoded)
      // In the old implementation, this would incorrectly estimate ~2.7M tokens
      // In the new implementation, only the text part is counted
      const largePdfBase64 = 'A'.repeat(11 * 1024 * 1024);
      const request: Part[] = [
        { text: 'Please analyze this PDF document' }, // ~35 chars = ~8 tokens
        {
          inlineData: {
            mimeType: 'application/pdf',
            data: largePdfBase64, // This should be ignored in token estimation
          },
        },
      ];
      // Mock tryCompressChat to not compress
      vi.spyOn(client, 'tryCompressChat').mockResolvedValue({
        originalTokenCount: lastPromptTokenCount,
        newTokenCount: lastPromptTokenCount,
        compressionStatus: CompressionStatus.NOOP,
      });
      // Mock Turn.run to simulate successful processing
      const mockStream = (async function* () {
        yield { type: 'content', value: 'Analysis complete' };
      })();
      mockTurnRunFn.mockReturnValue(mockStream);
      // Act
      const stream = client.sendMessageStream(
        request,
        new AbortController().signal,
        'prompt-id-pdf-test',
      );
      const events = await fromAsync(stream);
      // Assert
      // Should NOT contain overflow warning
      expect(events).not.toContainEqual(
        expect.objectContaining({
          type: GeminiEventType.ContextWindowWillOverflow,
        }),
      );
      // Turn.run should be called (processing should continue)
      expect(mockTurnRunFn).toHaveBeenCalled();
    });
    describe('Model Routing', () => {
      let mockRouterService: { route: Mock };
@@ -55,6 +55,44 @@ import type { ModelConfigKey } from '../services/modelConfigService.js';
 const MAX_TURNS = 100;
 /**
 * Estimates the character length of text-only parts in a request.
 * Binary data (inline_data, fileData) is excluded from the estimation
 * because Gemini counts these as fixed token values, not based on their size.
 * @param request The request to estimate tokens for
 * @returns Estimated character length of text content
 */
 function estimateTextOnlyLength(request: PartListUnion): number {
  if (typeof request === 'string') {
    return request.length;
  }
  // Ensure request is an array before iterating
  if (!Array.isArray(request)) {
    return 0;
  }
  let textLength = 0;
  for (const part of request) {
    // Handle string elements in the array
    if (typeof part === 'string') {
      textLength += part.length;
    }
    // Handle object elements with text property
    else if (
      typeof part === 'object' &&
      part !== null &&
      'text' in part &&
      part.text
    ) {
      textLength += part.text.length;
    }
    // inlineData, fileData, and other binary parts are ignored
    // as they are counted as fixed tokens by Gemini
  }
  return textLength;
 }
 export class GeminiClient {
  private chat?: GeminiChat;
  private sessionTurnCount = 0;
@@ -422,8 +460,11 @@ export class GeminiClient {
    // Check for context window overflow
    const modelForLimitCheck = this._getEffectiveModelForCurrentTurn();
    // Estimate tokens based on text content only.
    // Binary data (PDFs, images) are counted as fixed tokens by Gemini,
    // not based on their base64-encoded size.
    const estimatedRequestTokenCount = Math.floor(
-      JSON.stringify(request).length / 4,
+      estimateTextOnlyLength(request) / 4,
    );
    const remainingTokenCount =