feat(core): Improve request token calculation accuracy (#13824)

This commit is contained in:
Sandy Tao
2025-11-26 12:20:46 +08:00
committed by GitHub
parent 36a0a3d37b
commit e1d2653a7a
8 changed files with 307 additions and 56 deletions
@@ -14,6 +14,7 @@ import { getResponseText } from '../utils/partUtils.js';
import { logChatCompression } from '../telemetry/loggers.js';
import { makeChatCompressionEvent } from '../telemetry/types.js';
import { getInitialChatHistory } from '../utils/environmentContext.js';
import { calculateRequestTokenCount } from '../utils/tokenCalculation.js';
import {
DEFAULT_GEMINI_FLASH_LITE_MODEL,
DEFAULT_GEMINI_FLASH_MODEL,
@@ -195,12 +196,10 @@ export class ChatCompressionService {
// Use a shared utility to construct the initial history for an accurate token count.
const fullNewHistory = await getInitialChatHistory(config, extraHistory);
// Estimate token count 1 token ≈ 4 characters
const newTokenCount = Math.floor(
fullNewHistory.reduce(
(total, content) => total + JSON.stringify(content).length,
0,
) / 4,
const newTokenCount = await calculateRequestTokenCount(
fullNewHistory.flatMap((c) => c.parts || []),
config.getContentGenerator(),
model,
);
logChatCompression(