fix(core): Fix context window overflow warning for PDF files (#13548)

This commit is contained in:
Kimihiko Kitase
2025-11-24 16:46:03 +09:00
committed by GitHub
parent c2a741ee36
commit 7350399a50
2 changed files with 106 additions and 7 deletions

View File

@@ -1143,9 +1143,8 @@ ${JSON.stringify(
// A string of length 400 is roughly 100 tokens.
const longText = 'a'.repeat(400);
const request: Part[] = [{ text: longText }];
const estimatedRequestTokenCount = Math.floor(
JSON.stringify(request).length / 4,
);
// estimateTextOnlyLength counts only text content (400 chars), not JSON structure
const estimatedRequestTokenCount = Math.floor(longText.length / 4);
const remainingTokenCount = MOCKED_TOKEN_LIMIT - lastPromptTokenCount;
// Mock tryCompressChat to not compress
@@ -1203,9 +1202,8 @@ ${JSON.stringify(
// We need a request > 95 tokens.
const longText = 'a'.repeat(400);
const request: Part[] = [{ text: longText }];
const estimatedRequestTokenCount = Math.floor(
JSON.stringify(request).length / 4,
);
// estimateTextOnlyLength counts only text content (400 chars), not JSON structure
const estimatedRequestTokenCount = Math.floor(longText.length / 4);
const remainingTokenCount = STICKY_MODEL_LIMIT - lastPromptTokenCount;
vi.spyOn(client, 'tryCompressChat').mockResolvedValue({
@@ -1236,6 +1234,66 @@ ${JSON.stringify(
expect(mockTurnRunFn).not.toHaveBeenCalled();
});
it('should not trigger overflow warning for requests with large binary data (PDFs/images)', async () => {
// Arrange
const MOCKED_TOKEN_LIMIT = 1000000; // 1M tokens
vi.mocked(tokenLimit).mockReturnValue(MOCKED_TOKEN_LIMIT);
const lastPromptTokenCount = 10000;
const mockChat: Partial<GeminiChat> = {
getLastPromptTokenCount: vi.fn().mockReturnValue(lastPromptTokenCount),
getHistory: vi.fn().mockReturnValue([]),
};
client['chat'] = mockChat as GeminiChat;
// Simulate a PDF file with large base64 data (11MB when encoded)
// In the old implementation, this would incorrectly estimate ~2.7M tokens
// In the new implementation, only the text part is counted
const largePdfBase64 = 'A'.repeat(11 * 1024 * 1024);
const request: Part[] = [
{ text: 'Please analyze this PDF document' }, // ~35 chars = ~8 tokens
{
inlineData: {
mimeType: 'application/pdf',
data: largePdfBase64, // This should be ignored in token estimation
},
},
];
// Mock tryCompressChat to not compress
vi.spyOn(client, 'tryCompressChat').mockResolvedValue({
originalTokenCount: lastPromptTokenCount,
newTokenCount: lastPromptTokenCount,
compressionStatus: CompressionStatus.NOOP,
});
// Mock Turn.run to simulate successful processing
const mockStream = (async function* () {
yield { type: 'content', value: 'Analysis complete' };
})();
mockTurnRunFn.mockReturnValue(mockStream);
// Act
const stream = client.sendMessageStream(
request,
new AbortController().signal,
'prompt-id-pdf-test',
);
const events = await fromAsync(stream);
// Assert
// Should NOT contain overflow warning
expect(events).not.toContainEqual(
expect.objectContaining({
type: GeminiEventType.ContextWindowWillOverflow,
}),
);
// Turn.run should be called (processing should continue)
expect(mockTurnRunFn).toHaveBeenCalled();
});
describe('Model Routing', () => {
let mockRouterService: { route: Mock };

View File

@@ -55,6 +55,44 @@ import type { ModelConfigKey } from '../services/modelConfigService.js';
const MAX_TURNS = 100;
/**
* Estimates the character length of text-only parts in a request.
* Binary data (inline_data, fileData) is excluded from the estimation
* because Gemini counts these as fixed token values, not based on their size.
* @param request The request to estimate tokens for
* @returns Estimated character length of text content
*/
function estimateTextOnlyLength(request: PartListUnion): number {
if (typeof request === 'string') {
return request.length;
}
// Ensure request is an array before iterating
if (!Array.isArray(request)) {
return 0;
}
let textLength = 0;
for (const part of request) {
// Handle string elements in the array
if (typeof part === 'string') {
textLength += part.length;
}
// Handle object elements with text property
else if (
typeof part === 'object' &&
part !== null &&
'text' in part &&
part.text
) {
textLength += part.text.length;
}
// inlineData, fileData, and other binary parts are ignored
// as they are counted as fixed tokens by Gemini
}
return textLength;
}
export class GeminiClient {
private chat?: GeminiChat;
private sessionTurnCount = 0;
@@ -422,8 +460,11 @@ export class GeminiClient {
// Check for context window overflow
const modelForLimitCheck = this._getEffectiveModelForCurrentTurn();
// Estimate tokens based on text content only.
// Binary data (PDFs, images) are counted as fixed tokens by Gemini,
// not based on their base64-encoded size.
const estimatedRequestTokenCount = Math.floor(
JSON.stringify(request).length / 4,
estimateTextOnlyLength(request) / 4,
);
const remainingTokenCount =