mirror of
https://github.com/google-gemini/gemini-cli.git
synced 2026-05-12 21:03:05 -07:00
fix(core): Fix context window overflow warning for PDF files (#13548)
This commit is contained in:
@@ -1143,9 +1143,8 @@ ${JSON.stringify(
|
|||||||
// A string of length 400 is roughly 100 tokens.
|
// A string of length 400 is roughly 100 tokens.
|
||||||
const longText = 'a'.repeat(400);
|
const longText = 'a'.repeat(400);
|
||||||
const request: Part[] = [{ text: longText }];
|
const request: Part[] = [{ text: longText }];
|
||||||
const estimatedRequestTokenCount = Math.floor(
|
// estimateTextOnlyLength counts only text content (400 chars), not JSON structure
|
||||||
JSON.stringify(request).length / 4,
|
const estimatedRequestTokenCount = Math.floor(longText.length / 4);
|
||||||
);
|
|
||||||
const remainingTokenCount = MOCKED_TOKEN_LIMIT - lastPromptTokenCount;
|
const remainingTokenCount = MOCKED_TOKEN_LIMIT - lastPromptTokenCount;
|
||||||
|
|
||||||
// Mock tryCompressChat to not compress
|
// Mock tryCompressChat to not compress
|
||||||
@@ -1203,9 +1202,8 @@ ${JSON.stringify(
|
|||||||
// We need a request > 95 tokens.
|
// We need a request > 95 tokens.
|
||||||
const longText = 'a'.repeat(400);
|
const longText = 'a'.repeat(400);
|
||||||
const request: Part[] = [{ text: longText }];
|
const request: Part[] = [{ text: longText }];
|
||||||
const estimatedRequestTokenCount = Math.floor(
|
// estimateTextOnlyLength counts only text content (400 chars), not JSON structure
|
||||||
JSON.stringify(request).length / 4,
|
const estimatedRequestTokenCount = Math.floor(longText.length / 4);
|
||||||
);
|
|
||||||
const remainingTokenCount = STICKY_MODEL_LIMIT - lastPromptTokenCount;
|
const remainingTokenCount = STICKY_MODEL_LIMIT - lastPromptTokenCount;
|
||||||
|
|
||||||
vi.spyOn(client, 'tryCompressChat').mockResolvedValue({
|
vi.spyOn(client, 'tryCompressChat').mockResolvedValue({
|
||||||
@@ -1236,6 +1234,66 @@ ${JSON.stringify(
|
|||||||
expect(mockTurnRunFn).not.toHaveBeenCalled();
|
expect(mockTurnRunFn).not.toHaveBeenCalled();
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it('should not trigger overflow warning for requests with large binary data (PDFs/images)', async () => {
|
||||||
|
// Arrange
|
||||||
|
const MOCKED_TOKEN_LIMIT = 1000000; // 1M tokens
|
||||||
|
vi.mocked(tokenLimit).mockReturnValue(MOCKED_TOKEN_LIMIT);
|
||||||
|
|
||||||
|
const lastPromptTokenCount = 10000;
|
||||||
|
const mockChat: Partial<GeminiChat> = {
|
||||||
|
getLastPromptTokenCount: vi.fn().mockReturnValue(lastPromptTokenCount),
|
||||||
|
getHistory: vi.fn().mockReturnValue([]),
|
||||||
|
};
|
||||||
|
client['chat'] = mockChat as GeminiChat;
|
||||||
|
|
||||||
|
// Simulate a PDF file with large base64 data (11MB when encoded)
|
||||||
|
// In the old implementation, this would incorrectly estimate ~2.7M tokens
|
||||||
|
// In the new implementation, only the text part is counted
|
||||||
|
const largePdfBase64 = 'A'.repeat(11 * 1024 * 1024);
|
||||||
|
const request: Part[] = [
|
||||||
|
{ text: 'Please analyze this PDF document' }, // ~35 chars = ~8 tokens
|
||||||
|
{
|
||||||
|
inlineData: {
|
||||||
|
mimeType: 'application/pdf',
|
||||||
|
data: largePdfBase64, // This should be ignored in token estimation
|
||||||
|
},
|
||||||
|
},
|
||||||
|
];
|
||||||
|
|
||||||
|
// Mock tryCompressChat to not compress
|
||||||
|
vi.spyOn(client, 'tryCompressChat').mockResolvedValue({
|
||||||
|
originalTokenCount: lastPromptTokenCount,
|
||||||
|
newTokenCount: lastPromptTokenCount,
|
||||||
|
compressionStatus: CompressionStatus.NOOP,
|
||||||
|
});
|
||||||
|
|
||||||
|
// Mock Turn.run to simulate successful processing
|
||||||
|
const mockStream = (async function* () {
|
||||||
|
yield { type: 'content', value: 'Analysis complete' };
|
||||||
|
})();
|
||||||
|
mockTurnRunFn.mockReturnValue(mockStream);
|
||||||
|
|
||||||
|
// Act
|
||||||
|
const stream = client.sendMessageStream(
|
||||||
|
request,
|
||||||
|
new AbortController().signal,
|
||||||
|
'prompt-id-pdf-test',
|
||||||
|
);
|
||||||
|
|
||||||
|
const events = await fromAsync(stream);
|
||||||
|
|
||||||
|
// Assert
|
||||||
|
// Should NOT contain overflow warning
|
||||||
|
expect(events).not.toContainEqual(
|
||||||
|
expect.objectContaining({
|
||||||
|
type: GeminiEventType.ContextWindowWillOverflow,
|
||||||
|
}),
|
||||||
|
);
|
||||||
|
|
||||||
|
// Turn.run should be called (processing should continue)
|
||||||
|
expect(mockTurnRunFn).toHaveBeenCalled();
|
||||||
|
});
|
||||||
|
|
||||||
describe('Model Routing', () => {
|
describe('Model Routing', () => {
|
||||||
let mockRouterService: { route: Mock };
|
let mockRouterService: { route: Mock };
|
||||||
|
|
||||||
|
|||||||
@@ -55,6 +55,44 @@ import type { ModelConfigKey } from '../services/modelConfigService.js';
|
|||||||
|
|
||||||
const MAX_TURNS = 100;
|
const MAX_TURNS = 100;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Estimates the character length of text-only parts in a request.
|
||||||
|
* Binary data (inline_data, fileData) is excluded from the estimation
|
||||||
|
* because Gemini counts these as fixed token values, not based on their size.
|
||||||
|
* @param request The request to estimate tokens for
|
||||||
|
* @returns Estimated character length of text content
|
||||||
|
*/
|
||||||
|
function estimateTextOnlyLength(request: PartListUnion): number {
|
||||||
|
if (typeof request === 'string') {
|
||||||
|
return request.length;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Ensure request is an array before iterating
|
||||||
|
if (!Array.isArray(request)) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
let textLength = 0;
|
||||||
|
for (const part of request) {
|
||||||
|
// Handle string elements in the array
|
||||||
|
if (typeof part === 'string') {
|
||||||
|
textLength += part.length;
|
||||||
|
}
|
||||||
|
// Handle object elements with text property
|
||||||
|
else if (
|
||||||
|
typeof part === 'object' &&
|
||||||
|
part !== null &&
|
||||||
|
'text' in part &&
|
||||||
|
part.text
|
||||||
|
) {
|
||||||
|
textLength += part.text.length;
|
||||||
|
}
|
||||||
|
// inlineData, fileData, and other binary parts are ignored
|
||||||
|
// as they are counted as fixed tokens by Gemini
|
||||||
|
}
|
||||||
|
return textLength;
|
||||||
|
}
|
||||||
|
|
||||||
export class GeminiClient {
|
export class GeminiClient {
|
||||||
private chat?: GeminiChat;
|
private chat?: GeminiChat;
|
||||||
private sessionTurnCount = 0;
|
private sessionTurnCount = 0;
|
||||||
@@ -422,8 +460,11 @@ export class GeminiClient {
|
|||||||
// Check for context window overflow
|
// Check for context window overflow
|
||||||
const modelForLimitCheck = this._getEffectiveModelForCurrentTurn();
|
const modelForLimitCheck = this._getEffectiveModelForCurrentTurn();
|
||||||
|
|
||||||
|
// Estimate tokens based on text content only.
|
||||||
|
// Binary data (PDFs, images) are counted as fixed tokens by Gemini,
|
||||||
|
// not based on their base64-encoded size.
|
||||||
const estimatedRequestTokenCount = Math.floor(
|
const estimatedRequestTokenCount = Math.floor(
|
||||||
JSON.stringify(request).length / 4,
|
estimateTextOnlyLength(request) / 4,
|
||||||
);
|
);
|
||||||
|
|
||||||
const remainingTokenCount =
|
const remainingTokenCount =
|
||||||
|
|||||||
Reference in New Issue
Block a user