fix(ui): Track last prompt token count in processStream (#8650)

2026-06-29 04:37:12 -07:00 · 2025-09-18 11:46:56 -07:00
parent bcfd50b45b
commit 2c754d71e3
8 changed files with 72 additions and 24 deletions
@@ -16,7 +16,7 @@ vi.mock('@google/gemini-cli-core', async () => {
  return {
    ...actual,
    uiTelemetryService: {
-      resetLastPromptTokenCount: vi.fn(),
+      setLastPromptTokenCount: vi.fn(),
    },
  };
 });
@@ -57,9 +57,8 @@ describe('clearCommand', () => {
    expect(mockContext.ui.setDebugMessage).toHaveBeenCalledTimes(1);

    expect(mockResetChat).toHaveBeenCalledTimes(1);
-    expect(uiTelemetryService.resetLastPromptTokenCount).toHaveBeenCalledTimes(
-      1,
-    );
+    expect(uiTelemetryService.setLastPromptTokenCount).toHaveBeenCalledWith(0);
+    expect(uiTelemetryService.setLastPromptTokenCount).toHaveBeenCalledTimes(1);
    expect(mockContext.ui.clear).toHaveBeenCalledTimes(1);

    // Check the order of operations.
@@ -67,7 +66,7 @@ describe('clearCommand', () => {
      .invocationCallOrder[0];
    const resetChatOrder = mockResetChat.mock.invocationCallOrder[0];
    const resetTelemetryOrder = (
-      uiTelemetryService.resetLastPromptTokenCount as Mock
+      uiTelemetryService.setLastPromptTokenCount as Mock
    ).mock.invocationCallOrder[0];
    const clearOrder = (mockContext.ui.clear as Mock).mock
      .invocationCallOrder[0];
@@ -94,9 +93,8 @@ describe('clearCommand', () => {
      'Clearing terminal.',
    );
    expect(mockResetChat).not.toHaveBeenCalled();
-    expect(uiTelemetryService.resetLastPromptTokenCount).toHaveBeenCalledTimes(
-      1,
-    );
+    expect(uiTelemetryService.setLastPromptTokenCount).toHaveBeenCalledWith(0);
+    expect(uiTelemetryService.setLastPromptTokenCount).toHaveBeenCalledTimes(1);
    expect(nullConfigContext.ui.clear).toHaveBeenCalledTimes(1);
  });
 });
@@ -24,7 +24,7 @@ export const clearCommand: SlashCommand = {
      context.ui.setDebugMessage('Clearing terminal.');
    }

-    uiTelemetryService.resetLastPromptTokenCount();
+    uiTelemetryService.setLastPromptTokenCount(0);
    context.ui.clear();
  },
 };
@@ -42,6 +42,7 @@ import { tokenLimit } from './tokenLimits.js';
 import { ideContextStore } from '../ide/ideContext.js';
 import { ClearcutLogger } from '../telemetry/clearcut-logger/clearcut-logger.js';
 import type { ModelRouterService } from '../routing/modelRouterService.js';
+import { uiTelemetryService } from '../telemetry/uiTelemetry.js';

 // Mock fs module to prevent actual file system operations during tests
 const mockFileSystem = new Map<string, string>();
@@ -111,6 +112,11 @@ vi.mock('../telemetry/index.js', () => ({
  logApiError: vi.fn(),
 }));
 vi.mock('../ide/ideContext.js');
+vi.mock('../telemetry/uiTelemetry.js', () => ({
+  uiTelemetryService: {
+    setLastPromptTokenCount: vi.fn(),
+  },
+}));

 /**
 * Array.fromAsync ponyfill, which will be available in es 2024.
@@ -243,6 +249,7 @@ describe('Gemini Client (client.ts)', () => {
  let mockGenerateContentFn: Mock;
  beforeEach(async () => {
    vi.resetAllMocks();
+    vi.mocked(uiTelemetryService.setLastPromptTokenCount).mockClear();

    mockGenerateContentFn = vi.fn().mockResolvedValue({
      candidates: [{ content: { parts: [{ text: '{"key": "value"}' }] } }],
@@ -440,6 +447,12 @@ describe('Gemini Client (client.ts)', () => {
          newTokenCount: 5000,
          originalTokenCount: 1000,
        });
+        expect(uiTelemetryService.setLastPromptTokenCount).toHaveBeenCalledWith(
+          5000,
+        );
+        expect(
+          uiTelemetryService.setLastPromptTokenCount,
+        ).toHaveBeenCalledTimes(1);
      });

      it('does not manipulate the source chat', async () => {
@@ -559,6 +572,12 @@ describe('Gemini Client (client.ts)', () => {
          tokens_after: newTokenCount,
        }),
      );
+      expect(uiTelemetryService.setLastPromptTokenCount).toHaveBeenCalledWith(
+        newTokenCount,
+      );
+      expect(uiTelemetryService.setLastPromptTokenCount).toHaveBeenCalledTimes(
+        1,
+      );
    });

    it('should trigger summarization if token count is at threshold with contextPercentageThreshold setting', async () => {
@@ -49,6 +49,7 @@ import {
 import type { IdeContext, File } from '../ide/types.js';
 import { handleFallback } from '../fallback/handler.js';
 import type { RoutingContext } from '../routing/routingStrategy.js';
+import { uiTelemetryService } from '../telemetry/uiTelemetry.js';

 export function isThinkingSupported(model: string) {
  return model.startsWith('gemini-2.5') || model === DEFAULT_GEMINI_MODEL_AUTO;
@@ -772,6 +773,8 @@ export class GeminiClient {
      };
    }

+    uiTelemetryService.setLastPromptTokenCount(newTokenCount);
+
    logChatCompression(
      this.config,
      makeChatCompressionEvent({
@@ -24,6 +24,7 @@ import { AuthType } from './contentGenerator.js';
 import { type RetryOptions } from '../utils/retry.js';
 import type { ToolRegistry } from '../tools/tool-registry.js';
 import { Kind } from '../tools/tools.js';
+import { uiTelemetryService } from '../telemetry/uiTelemetry.js';

 // Mock fs module to prevent actual file system operations during tests
 const mockFileSystem = new Map<string, string>();
@@ -81,6 +82,12 @@ vi.mock('../telemetry/loggers.js', () => ({
  logContentRetryFailure: mockLogContentRetryFailure,
 }));

+vi.mock('../telemetry/uiTelemetry.js', () => ({
+  uiTelemetryService: {
+    setLastPromptTokenCount: vi.fn(),
+  },
+}));
+
 describe('GeminiChat', () => {
  let mockContentGenerator: ContentGenerator;
  let chat: GeminiChat;
@@ -89,6 +96,7 @@ describe('GeminiChat', () => {

  beforeEach(() => {
    vi.clearAllMocks();
+    vi.mocked(uiTelemetryService.setLastPromptTokenCount).mockClear();
    mockContentGenerator = {
      generateContent: vi.fn(),
      generateContentStream: vi.fn(),
@@ -529,6 +537,11 @@ describe('GeminiChat', () => {
            },
          ],
          text: () => 'response',
+          usageMetadata: {
+            promptTokenCount: 42,
+            candidatesTokenCount: 15,
+            totalTokenCount: 57,
+          },
        } as unknown as GenerateContentResponse;
      })();
      vi.mocked(mockContentGenerator.generateContentStream).mockResolvedValue(
@@ -557,6 +570,14 @@ describe('GeminiChat', () => {
        },
        'prompt-id-1',
      );
+
+      // Verify that token counting is called when usageMetadata is present
+      expect(uiTelemetryService.setLastPromptTokenCount).toHaveBeenCalledWith(
+        42,
+      );
+      expect(uiTelemetryService.setLastPromptTokenCount).toHaveBeenCalledTimes(
+        1,
+      );
    });
  });

@@ -700,6 +721,9 @@ describe('GeminiChat', () => {
        role: 'model',
        parts: [{ text: 'Successful response' }],
      });
+
+      // Verify that token counting is not called when usageMetadata is missing
+      expect(uiTelemetryService.setLastPromptTokenCount).not.toHaveBeenCalled();
    });

    it('should fail after all retries on persistent invalid content and report metrics', async () => {
@@ -40,6 +40,7 @@ import {
 import { handleFallback } from '../fallback/handler.js';
 import { isFunctionResponse } from '../utils/messageInspectors.js';
 import { partListUnionToString } from './geminiRequest.js';
+import { uiTelemetryService } from '../telemetry/uiTelemetry.js';

 export enum StreamEventType {
  /** A regular content chunk from the API. */
@@ -528,6 +529,11 @@ export class GeminiChat {
      // Record token usage if this chunk has usageMetadata
      if (chunk.usageMetadata) {
        this.chatRecordingService.recordMessageTokens(chunk.usageMetadata);
+        if (chunk.usageMetadata.promptTokenCount !== undefined) {
+          uiTelemetryService.setLastPromptTokenCount(
+            chunk.usageMetadata.promptTokenCount,
+          );
+        }
      }

      yield chunk; // Yield every chunk to the UI immediately.
@@ -142,7 +142,7 @@ describe('UiTelemetryService', () => {
    expect(spy).toHaveBeenCalledOnce();
    const { metrics, lastPromptTokenCount } = spy.mock.calls[0][0];
    expect(metrics).toBeDefined();
-    expect(lastPromptTokenCount).toBe(10);
+    expect(lastPromptTokenCount).toBe(0);
  });

  describe('API Response Event Processing', () => {
@@ -177,7 +177,7 @@ describe('UiTelemetryService', () => {
          tool: 3,
        },
      });
-      expect(service.getLastPromptTokenCount()).toBe(10);
+      expect(service.getLastPromptTokenCount()).toBe(0);
    });

    it('should aggregate multiple ApiResponseEvents for the same model', () => {
@@ -227,7 +227,7 @@ describe('UiTelemetryService', () => {
          tool: 9,
        },
      });
-      expect(service.getLastPromptTokenCount()).toBe(15);
+      expect(service.getLastPromptTokenCount()).toBe(0);
    });

    it('should handle ApiResponseEvents for different models', () => {
@@ -266,7 +266,7 @@ describe('UiTelemetryService', () => {
      expect(metrics.models['gemini-2.5-flash']).toBeDefined();
      expect(metrics.models['gemini-2.5-pro'].api.totalRequests).toBe(1);
      expect(metrics.models['gemini-2.5-flash'].api.totalRequests).toBe(1);
-      expect(service.getLastPromptTokenCount()).toBe(100);
+      expect(service.getLastPromptTokenCount()).toBe(0);
    });
  });

@@ -543,10 +543,10 @@ describe('UiTelemetryService', () => {
      } as ApiResponseEvent & { 'event.name': typeof EVENT_API_RESPONSE };

      service.addEvent(event);
-      expect(service.getLastPromptTokenCount()).toBe(100);
+      expect(service.getLastPromptTokenCount()).toBe(0);

      // Now reset the token count
-      service.resetLastPromptTokenCount();
+      service.setLastPromptTokenCount(0);
      expect(service.getLastPromptTokenCount()).toBe(0);
    });

@@ -570,7 +570,7 @@ describe('UiTelemetryService', () => {
      service.addEvent(event);
      spy.mockClear(); // Clear the spy to focus on the reset call

-      service.resetLastPromptTokenCount();
+      service.setLastPromptTokenCount(0);

      expect(spy).toHaveBeenCalledOnce();
      const { metrics, lastPromptTokenCount } = spy.mock.calls[0][0];
@@ -596,7 +596,7 @@ describe('UiTelemetryService', () => {

      const metricsBefore = service.getMetrics();

-      service.resetLastPromptTokenCount();
+      service.setLastPromptTokenCount(0);

      const metricsAfter = service.getMetrics();

@@ -625,15 +625,15 @@ describe('UiTelemetryService', () => {
      } as ApiResponseEvent & { 'event.name': typeof EVENT_API_RESPONSE };

      service.addEvent(event);
-      expect(service.getLastPromptTokenCount()).toBe(100);
+      expect(service.getLastPromptTokenCount()).toBe(0);

      // Reset once
-      service.resetLastPromptTokenCount();
+      service.setLastPromptTokenCount(0);
      expect(service.getLastPromptTokenCount()).toBe(0);

      // Reset again - should still be 0 and still emit event
      spy.mockClear();
-      service.resetLastPromptTokenCount();
+      service.setLastPromptTokenCount(0);
      expect(service.getLastPromptTokenCount()).toBe(0);
      expect(spy).toHaveBeenCalledOnce();
    });
@@ -144,8 +144,8 @@ export class UiTelemetryService extends EventEmitter {
    return this.#lastPromptTokenCount;
  }

-  resetLastPromptTokenCount(): void {
-    this.#lastPromptTokenCount = 0;
+  setLastPromptTokenCount(lastPromptTokenCount: number): void {
+    this.#lastPromptTokenCount = lastPromptTokenCount;
    this.emit('update', {
      metrics: this.#metrics,
      lastPromptTokenCount: this.#lastPromptTokenCount,
@@ -171,8 +171,6 @@ export class UiTelemetryService extends EventEmitter {
    modelMetrics.tokens.cached += event.cached_content_token_count;
    modelMetrics.tokens.thoughts += event.thoughts_token_count;
    modelMetrics.tokens.tool += event.tool_token_count;
-
-    this.#lastPromptTokenCount = event.input_token_count;
  }

  private processApiError(event: ApiErrorEvent) {