feat(core): Improve request token calculation accuracy (#13824)

2026-04-27 05:24:34 -07:00 · 2025-11-26 12:20:46 +08:00
parent 36a0a3d37b
commit e1d2653a7a
8 changed files with 307 additions and 56 deletions
@@ -14,6 +14,7 @@ import { getResponseText } from '../utils/partUtils.js';
 import { logChatCompression } from '../telemetry/loggers.js';
 import { makeChatCompressionEvent } from '../telemetry/types.js';
 import { getInitialChatHistory } from '../utils/environmentContext.js';
+import { calculateRequestTokenCount } from '../utils/tokenCalculation.js';
 import {
  DEFAULT_GEMINI_FLASH_LITE_MODEL,
  DEFAULT_GEMINI_FLASH_MODEL,
@@ -195,12 +196,10 @@ export class ChatCompressionService {
    // Use a shared utility to construct the initial history for an accurate token count.
    const fullNewHistory = await getInitialChatHistory(config, extraHistory);

-    // Estimate token count 1 token ≈ 4 characters
-    const newTokenCount = Math.floor(
-      fullNewHistory.reduce(
-        (total, content) => total + JSON.stringify(content).length,
-        0,
-      ) / 4,
+    const newTokenCount = await calculateRequestTokenCount(
+      fullNewHistory.flatMap((c) => c.parts || []),
+      config.getContentGenerator(),
+      model,
    );

    logChatCompression(