mirror of
https://github.com/google-gemini/gemini-cli.git
synced 2026-03-18 10:01:29 -07:00
fix(core): Fix context window overflow warning for PDF files (#13548)
This commit is contained in:
@@ -1143,9 +1143,8 @@ ${JSON.stringify(
|
||||
// A string of length 400 is roughly 100 tokens.
|
||||
const longText = 'a'.repeat(400);
|
||||
const request: Part[] = [{ text: longText }];
|
||||
const estimatedRequestTokenCount = Math.floor(
|
||||
JSON.stringify(request).length / 4,
|
||||
);
|
||||
// estimateTextOnlyLength counts only text content (400 chars), not JSON structure
|
||||
const estimatedRequestTokenCount = Math.floor(longText.length / 4);
|
||||
const remainingTokenCount = MOCKED_TOKEN_LIMIT - lastPromptTokenCount;
|
||||
|
||||
// Mock tryCompressChat to not compress
|
||||
@@ -1203,9 +1202,8 @@ ${JSON.stringify(
|
||||
// We need a request > 95 tokens.
|
||||
const longText = 'a'.repeat(400);
|
||||
const request: Part[] = [{ text: longText }];
|
||||
const estimatedRequestTokenCount = Math.floor(
|
||||
JSON.stringify(request).length / 4,
|
||||
);
|
||||
// estimateTextOnlyLength counts only text content (400 chars), not JSON structure
|
||||
const estimatedRequestTokenCount = Math.floor(longText.length / 4);
|
||||
const remainingTokenCount = STICKY_MODEL_LIMIT - lastPromptTokenCount;
|
||||
|
||||
vi.spyOn(client, 'tryCompressChat').mockResolvedValue({
|
||||
@@ -1236,6 +1234,66 @@ ${JSON.stringify(
|
||||
expect(mockTurnRunFn).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('should not trigger overflow warning for requests with large binary data (PDFs/images)', async () => {
|
||||
// Arrange
|
||||
const MOCKED_TOKEN_LIMIT = 1000000; // 1M tokens
|
||||
vi.mocked(tokenLimit).mockReturnValue(MOCKED_TOKEN_LIMIT);
|
||||
|
||||
const lastPromptTokenCount = 10000;
|
||||
const mockChat: Partial<GeminiChat> = {
|
||||
getLastPromptTokenCount: vi.fn().mockReturnValue(lastPromptTokenCount),
|
||||
getHistory: vi.fn().mockReturnValue([]),
|
||||
};
|
||||
client['chat'] = mockChat as GeminiChat;
|
||||
|
||||
// Simulate a PDF file with large base64 data (11MB when encoded)
|
||||
// In the old implementation, this would incorrectly estimate ~2.7M tokens
|
||||
// In the new implementation, only the text part is counted
|
||||
const largePdfBase64 = 'A'.repeat(11 * 1024 * 1024);
|
||||
const request: Part[] = [
|
||||
{ text: 'Please analyze this PDF document' }, // ~35 chars = ~8 tokens
|
||||
{
|
||||
inlineData: {
|
||||
mimeType: 'application/pdf',
|
||||
data: largePdfBase64, // This should be ignored in token estimation
|
||||
},
|
||||
},
|
||||
];
|
||||
|
||||
// Mock tryCompressChat to not compress
|
||||
vi.spyOn(client, 'tryCompressChat').mockResolvedValue({
|
||||
originalTokenCount: lastPromptTokenCount,
|
||||
newTokenCount: lastPromptTokenCount,
|
||||
compressionStatus: CompressionStatus.NOOP,
|
||||
});
|
||||
|
||||
// Mock Turn.run to simulate successful processing
|
||||
const mockStream = (async function* () {
|
||||
yield { type: 'content', value: 'Analysis complete' };
|
||||
})();
|
||||
mockTurnRunFn.mockReturnValue(mockStream);
|
||||
|
||||
// Act
|
||||
const stream = client.sendMessageStream(
|
||||
request,
|
||||
new AbortController().signal,
|
||||
'prompt-id-pdf-test',
|
||||
);
|
||||
|
||||
const events = await fromAsync(stream);
|
||||
|
||||
// Assert
|
||||
// Should NOT contain overflow warning
|
||||
expect(events).not.toContainEqual(
|
||||
expect.objectContaining({
|
||||
type: GeminiEventType.ContextWindowWillOverflow,
|
||||
}),
|
||||
);
|
||||
|
||||
// Turn.run should be called (processing should continue)
|
||||
expect(mockTurnRunFn).toHaveBeenCalled();
|
||||
});
|
||||
|
||||
describe('Model Routing', () => {
|
||||
let mockRouterService: { route: Mock };
|
||||
|
||||
|
||||
@@ -55,6 +55,44 @@ import type { ModelConfigKey } from '../services/modelConfigService.js';
|
||||
|
||||
const MAX_TURNS = 100;
|
||||
|
||||
/**
|
||||
* Estimates the character length of text-only parts in a request.
|
||||
* Binary data (inline_data, fileData) is excluded from the estimation
|
||||
* because Gemini counts these as fixed token values, not based on their size.
|
||||
* @param request The request to estimate tokens for
|
||||
* @returns Estimated character length of text content
|
||||
*/
|
||||
function estimateTextOnlyLength(request: PartListUnion): number {
|
||||
if (typeof request === 'string') {
|
||||
return request.length;
|
||||
}
|
||||
|
||||
// Ensure request is an array before iterating
|
||||
if (!Array.isArray(request)) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
let textLength = 0;
|
||||
for (const part of request) {
|
||||
// Handle string elements in the array
|
||||
if (typeof part === 'string') {
|
||||
textLength += part.length;
|
||||
}
|
||||
// Handle object elements with text property
|
||||
else if (
|
||||
typeof part === 'object' &&
|
||||
part !== null &&
|
||||
'text' in part &&
|
||||
part.text
|
||||
) {
|
||||
textLength += part.text.length;
|
||||
}
|
||||
// inlineData, fileData, and other binary parts are ignored
|
||||
// as they are counted as fixed tokens by Gemini
|
||||
}
|
||||
return textLength;
|
||||
}
|
||||
|
||||
export class GeminiClient {
|
||||
private chat?: GeminiChat;
|
||||
private sessionTurnCount = 0;
|
||||
@@ -422,8 +460,11 @@ export class GeminiClient {
|
||||
// Check for context window overflow
|
||||
const modelForLimitCheck = this._getEffectiveModelForCurrentTurn();
|
||||
|
||||
// Estimate tokens based on text content only.
|
||||
// Binary data (PDFs, images) are counted as fixed tokens by Gemini,
|
||||
// not based on their base64-encoded size.
|
||||
const estimatedRequestTokenCount = Math.floor(
|
||||
JSON.stringify(request).length / 4,
|
||||
estimateTextOnlyLength(request) / 4,
|
||||
);
|
||||
|
||||
const remainingTokenCount =
|
||||
|
||||
Reference in New Issue
Block a user