Show raw input token counts in json output. (#15021)

Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
2026-04-24 20:14:44 -07:00 · 2025-12-15 18:47:39 -08:00
parent bb0c0d8ee3
commit 79f664d593
17 changed files with 189 additions and 129 deletions
@@ -71,6 +71,7 @@ describe('JsonFormatter', () => {
            totalLatencyMs: 5672,
          },
          tokens: {
+            input: 13745,
            prompt: 24401,
            candidates: 215,
            total: 24719,
@@ -86,6 +87,7 @@ describe('JsonFormatter', () => {
            totalLatencyMs: 5914,
          },
          tokens: {
+            input: 20803,
            prompt: 20803,
            candidates: 716,
            total: 21657,
@@ -150,6 +150,8 @@ describe('StreamJsonFormatter', () => {
          total_tokens: 100,
          input_tokens: 50,
          output_tokens: 50,
+          cached: 0,
+          input: 50,
          duration_ms: 1200,
          tool_calls: 2,
        },
@@ -174,6 +176,8 @@ describe('StreamJsonFormatter', () => {
          total_tokens: 100,
          input_tokens: 50,
          output_tokens: 50,
+          cached: 0,
+          input: 50,
          duration_ms: 1200,
          tool_calls: 0,
        },
@@ -247,41 +251,114 @@ describe('StreamJsonFormatter', () => {
  });

  describe('convertToStreamStats', () => {
+    const createMockMetrics = (): SessionMetrics => ({
+      models: {},
+      tools: {
+        totalCalls: 0,
+        totalSuccess: 0,
+        totalFail: 0,
+        totalDurationMs: 0,
+        totalDecisions: {
+          [ToolCallDecision.ACCEPT]: 0,
+          [ToolCallDecision.REJECT]: 0,
+          [ToolCallDecision.MODIFY]: 0,
+          [ToolCallDecision.AUTO_ACCEPT]: 0,
+        },
+        byName: {},
+      },
+      files: {
+        totalLinesAdded: 0,
+        totalLinesRemoved: 0,
+      },
+    });
+
    it('should aggregate token counts from single model', () => {
-      const metrics: SessionMetrics = {
-        models: {
-          'gemini-2.0-flash': {
-            api: {
-              totalRequests: 1,
-              totalErrors: 0,
-              totalLatencyMs: 1000,
-            },
-            tokens: {
-              prompt: 50,
-              candidates: 30,
-              total: 80,
-              cached: 0,
-              thoughts: 0,
-              tool: 0,
-            },
-          },
+      const metrics = createMockMetrics();
+      metrics.models['gemini-2.0-flash'] = {
+        api: {
+          totalRequests: 1,
+          totalErrors: 0,
+          totalLatencyMs: 1000,
        },
-        tools: {
-          totalCalls: 2,
-          totalSuccess: 2,
-          totalFail: 0,
-          totalDurationMs: 500,
-          totalDecisions: {
-            [ToolCallDecision.ACCEPT]: 0,
-            [ToolCallDecision.REJECT]: 0,
-            [ToolCallDecision.MODIFY]: 0,
-            [ToolCallDecision.AUTO_ACCEPT]: 2,
-          },
-          byName: {},
+        tokens: {
+          input: 50,
+          prompt: 50,
+          candidates: 30,
+          total: 80,
+          cached: 0,
+          thoughts: 0,
+          tool: 0,
        },
-        files: {
-          totalLinesAdded: 0,
-          totalLinesRemoved: 0,
+      };
+      metrics.tools.totalCalls = 2;
+      metrics.tools.totalDecisions[ToolCallDecision.AUTO_ACCEPT] = 2;
+
+      const result = formatter.convertToStreamStats(metrics, 1200);
+
+      expect(result).toEqual({
+        total_tokens: 80,
+        input_tokens: 50,
+        output_tokens: 30,
+        cached: 0,
+        input: 50,
+        duration_ms: 1200,
+        tool_calls: 2,
+      });
+    });
+
+    it('should aggregate token counts from multiple models', () => {
+      const metrics = createMockMetrics();
+      metrics.models['gemini-pro'] = {
+        api: { totalRequests: 1, totalErrors: 0, totalLatencyMs: 1000 },
+        tokens: {
+          input: 50,
+          prompt: 50,
+          candidates: 30,
+          total: 80,
+          cached: 0,
+          thoughts: 0,
+          tool: 0,
+        },
+      };
+      metrics.models['gemini-ultra'] = {
+        api: { totalRequests: 1, totalErrors: 0, totalLatencyMs: 2000 },
+        tokens: {
+          input: 100,
+          prompt: 100,
+          candidates: 70,
+          total: 170,
+          cached: 0,
+          thoughts: 0,
+          tool: 0,
+        },
+      };
+      metrics.tools.totalCalls = 5;
+
+      const result = formatter.convertToStreamStats(metrics, 3000);
+
+      expect(result).toEqual({
+        total_tokens: 250, // 80 + 170
+        input_tokens: 150, // 50 + 100
+        output_tokens: 100, // 30 + 70
+        cached: 0,
+        input: 150,
+        duration_ms: 3000,
+        tool_calls: 5,
+      });
+    });
+
+    it('should aggregate cached token counts correctly', () => {
+      const metrics = createMockMetrics();
+      metrics.models['gemini-pro'] = {
+        api: { totalRequests: 1, totalErrors: 0, totalLatencyMs: 1000 },
+        tokens: {
+          input: 20, // 50 prompt - 30 cached
+          prompt: 50,
+          candidates: 30,
+          total: 80,
+          cached: 30,
+          thoughts: 0,
+          tool: 0,
        },
      };

@@ -291,96 +368,15 @@ describe('StreamJsonFormatter', () => {
        total_tokens: 80,
        input_tokens: 50,
        output_tokens: 30,
+        cached: 30,
+        input: 20,
        duration_ms: 1200,
-        tool_calls: 2,
-      });
-    });
-
-    it('should aggregate token counts from multiple models', () => {
-      const metrics: SessionMetrics = {
-        models: {
-          'gemini-2.0-flash': {
-            api: {
-              totalRequests: 1,
-              totalErrors: 0,
-              totalLatencyMs: 1000,
-            },
-            tokens: {
-              prompt: 50,
-              candidates: 30,
-              total: 80,
-              cached: 0,
-              thoughts: 0,
-              tool: 0,
-            },
-          },
-          'gemini-1.5-pro': {
-            api: {
-              totalRequests: 1,
-              totalErrors: 0,
-              totalLatencyMs: 2000,
-            },
-            tokens: {
-              prompt: 100,
-              candidates: 70,
-              total: 170,
-              cached: 0,
-              thoughts: 0,
-              tool: 0,
-            },
-          },
-        },
-        tools: {
-          totalCalls: 5,
-          totalSuccess: 5,
-          totalFail: 0,
-          totalDurationMs: 1000,
-          totalDecisions: {
-            [ToolCallDecision.ACCEPT]: 0,
-            [ToolCallDecision.REJECT]: 0,
-            [ToolCallDecision.MODIFY]: 0,
-            [ToolCallDecision.AUTO_ACCEPT]: 5,
-          },
-          byName: {},
-        },
-        files: {
-          totalLinesAdded: 0,
-          totalLinesRemoved: 0,
-        },
-      };
-
-      const result = formatter.convertToStreamStats(metrics, 3000);
-
-      expect(result).toEqual({
-        total_tokens: 250, // 80 + 170
-        input_tokens: 150, // 50 + 100
-        output_tokens: 100, // 30 + 70
-        duration_ms: 3000,
-        tool_calls: 5,
+        tool_calls: 0,
      });
    });

    it('should handle empty metrics', () => {
-      const metrics: SessionMetrics = {
-        models: {},
-        tools: {
-          totalCalls: 0,
-          totalSuccess: 0,
-          totalFail: 0,
-          totalDurationMs: 0,
-          totalDecisions: {
-            [ToolCallDecision.ACCEPT]: 0,
-            [ToolCallDecision.REJECT]: 0,
-            [ToolCallDecision.MODIFY]: 0,
-            [ToolCallDecision.AUTO_ACCEPT]: 0,
-          },
-          byName: {},
-        },
-        files: {
-          totalLinesAdded: 0,
-          totalLinesRemoved: 0,
-        },
-      };
+      const metrics = createMockMetrics();

      const result = formatter.convertToStreamStats(metrics, 100);

@@ -388,6 +384,8 @@ describe('StreamJsonFormatter', () => {
        total_tokens: 0,
        input_tokens: 0,
        output_tokens: 0,
+        cached: 0,
+        input: 0,
        duration_ms: 100,
        tool_calls: 0,
      });
@@ -515,6 +513,8 @@ describe('StreamJsonFormatter', () => {
            total_tokens: 0,
            input_tokens: 0,
            output_tokens: 0,
+            cached: 0,
+            input: 0,
            duration_ms: 0,
            tool_calls: 0,
          },
@@ -536,6 +536,8 @@ describe('StreamJsonFormatter', () => {
          total_tokens: 100,
          input_tokens: 50,
          output_tokens: 50,
+          cached: 0,
+          input: 50,
          duration_ms: 1200,
          tool_calls: 2,
        },
@@ -43,18 +43,24 @@ export class StreamJsonFormatter {
    let totalTokens = 0;
    let inputTokens = 0;
    let outputTokens = 0;
+    let cached = 0;
+    let input = 0;

    // Aggregate token counts across all models
    for (const modelMetrics of Object.values(metrics.models)) {
      totalTokens += modelMetrics.tokens.total;
      inputTokens += modelMetrics.tokens.prompt;
      outputTokens += modelMetrics.tokens.candidates;
+      cached += modelMetrics.tokens.cached;
+      input += modelMetrics.tokens.input;
    }

    return {
      total_tokens: totalTokens,
      input_tokens: inputTokens,
      output_tokens: outputTokens,
+      cached,
+      input,
      duration_ms: durationMs,
      tool_calls: metrics.tools.totalCalls,
    };
@@ -81,6 +81,9 @@ export interface StreamStats {
  total_tokens: number;
  input_tokens: number;
  output_tokens: number;
+  // Breakdown of input_tokens
+  cached: number;
+  input: number;
  duration_ms: number;
  tool_calls: number;
 }
@@ -173,6 +173,7 @@ describe('UiTelemetryService', () => {
          totalLatencyMs: 500,
        },
        tokens: {
+          input: 5,
          prompt: 10,
          candidates: 20,
          total: 30,
@@ -227,6 +228,7 @@ describe('UiTelemetryService', () => {
          totalLatencyMs: 1100,
        },
        tokens: {
+          input: 10,
          prompt: 25,
          candidates: 45,
          total: 70,
@@ -301,6 +303,7 @@ describe('UiTelemetryService', () => {
          totalLatencyMs: 300,
        },
        tokens: {
+          input: 0,
          prompt: 0,
          candidates: 0,
          total: 0,
@@ -345,6 +348,7 @@ describe('UiTelemetryService', () => {
          totalLatencyMs: 800,
        },
        tokens: {
+          input: 5,
          prompt: 10,
          candidates: 20,
          total: 30,
@@ -43,6 +43,7 @@ export interface ModelMetrics {
    totalLatencyMs: number;
  };
  tokens: {
+    input: number;
    prompt: number;
    candidates: number;
    total: number;
@@ -80,6 +81,7 @@ const createInitialModelMetrics = (): ModelMetrics => ({
    totalLatencyMs: 0,
  },
  tokens: {
+    input: 0,
    prompt: 0,
    candidates: 0,
    total: 0,
@@ -171,6 +173,10 @@ export class UiTelemetryService extends EventEmitter {
    modelMetrics.tokens.cached += event.usage.cached_content_token_count;
    modelMetrics.tokens.thoughts += event.usage.thoughts_token_count;
    modelMetrics.tokens.tool += event.usage.tool_token_count;
+    modelMetrics.tokens.input = Math.max(
+      0,
+      modelMetrics.tokens.prompt - modelMetrics.tokens.cached,
+    );
  }

  private processApiError(event: ApiErrorEvent) {