Revert "Hotfix/metrics stream error (#7156)"

This reverts commit 0910b35812.
2026-04-29 14:34:55 -07:00 · 2025-08-26 23:30:27 -07:00
parent 3a8ec2d83c
commit 2f6cf270ec
8 changed files with 7 additions and 400 deletions
@@ -25,19 +25,6 @@ const mockModelsModule = {
  batchEmbedContents: vi.fn(),
 } as unknown as Models;

-const { mockLogInvalidChunk, mockLogContentRetry, mockLogContentRetryFailure } =
-  vi.hoisted(() => ({
-    mockLogInvalidChunk: vi.fn(),
-    mockLogContentRetry: vi.fn(),
-    mockLogContentRetryFailure: vi.fn(),
-  }));
-
-vi.mock('../telemetry/loggers.js', () => ({
-  logInvalidChunk: mockLogInvalidChunk,
-  logContentRetry: mockLogContentRetry,
-  logContentRetryFailure: mockLogContentRetryFailure,
-}));
-
 describe('GeminiChat', () => {
  let chat: GeminiChat;
  let mockConfig: Config;
@@ -496,7 +483,7 @@ describe('GeminiChat', () => {
  });

  describe('sendMessageStream with retries', () => {
-    it('should retry on invalid content, succeed, and report metrics', async () => {
+    it('should retry on invalid content and succeed on the second attempt', async () => {
      // Use mockImplementationOnce to provide a fresh, promise-wrapped generator for each attempt.
      vi.mocked(mockModelsModule.generateContentStream)
        .mockImplementationOnce(async () =>
@@ -528,9 +515,6 @@ describe('GeminiChat', () => {
      }

      // Assertions
-      expect(mockLogInvalidChunk).toHaveBeenCalledTimes(1);
-      expect(mockLogContentRetry).toHaveBeenCalledTimes(1);
-      expect(mockLogContentRetryFailure).not.toHaveBeenCalled();
      expect(mockModelsModule.generateContentStream).toHaveBeenCalledTimes(2);
      expect(
        chunks.some(
@@ -553,7 +537,7 @@ describe('GeminiChat', () => {
      });
    });

-    it('should fail after all retries on persistent invalid content and report metrics', async () => {
+    it('should fail after all retries on persistent invalid content', async () => {
      vi.mocked(mockModelsModule.generateContentStream).mockImplementation(
        async () =>
          (async function* () {
@@ -587,9 +571,6 @@ describe('GeminiChat', () => {

      // Should be called 3 times (initial + 2 retries)
      expect(mockModelsModule.generateContentStream).toHaveBeenCalledTimes(3);
-      expect(mockLogInvalidChunk).toHaveBeenCalledTimes(3);
-      expect(mockLogContentRetry).toHaveBeenCalledTimes(2);
-      expect(mockLogContentRetryFailure).toHaveBeenCalledTimes(1);

      // History should be clean, as if the failed turn never happened.
      const history = chat.getHistory();
@@ -604,7 +585,7 @@ describe('GeminiChat', () => {
    ];
    chat.setHistory(initialHistory);

-    // 2. Mock the API to fail once with an empty stream, then succeed.
+    // 2. Mock the API
    vi.mocked(mockModelsModule.generateContentStream)
      .mockImplementationOnce(async () =>
        (async function* () {
@@ -614,7 +595,6 @@ describe('GeminiChat', () => {
        })(),
      )
      .mockImplementationOnce(async () =>
-        // Second attempt succeeds
        (async function* () {
          yield {
            candidates: [{ content: { parts: [{ text: 'Second answer' }] } }],
@@ -631,13 +611,10 @@ describe('GeminiChat', () => {
      // consume stream
    }

-    // 4. Assert the final history and metrics
+    // 4. Assert the final history
    const history = chat.getHistory();
    expect(history.length).toBe(4);

-    // Assert that the correct metrics were reported for one empty-stream retry
-    expect(mockLogContentRetry).toHaveBeenCalledTimes(1);
-
    // Explicitly verify the structure of each part to satisfy TypeScript
    const turn1 = history[0];
    if (!turn1?.parts?.[0] || !('text' in turn1.parts[0])) {
@@ -23,16 +23,6 @@ import { Config } from '../config/config.js';
 import { DEFAULT_GEMINI_FLASH_MODEL } from '../config/models.js';
 import { hasCycleInSchema } from '../tools/tools.js';
 import { StructuredError } from './turn.js';
-import {
-  logContentRetry,
-  logContentRetryFailure,
-  logInvalidChunk,
-} from '../telemetry/loggers.js';
-import {
-  ContentRetryEvent,
-  ContentRetryFailureEvent,
-  InvalidChunkEvent,
-} from '../telemetry/types.js';

 /**
 * Options for retrying due to invalid content from the model.
@@ -48,6 +38,7 @@ const INVALID_CONTENT_RETRY_OPTIONS: ContentRetryOptions = {
  maxAttempts: 3, // 1 initial call + 2 retries
  initialDelayMs: 500,
 };
+
 /**
 * Returns true if the response is valid, false otherwise.
 */
@@ -358,7 +349,7 @@ export class GeminiChat {

        for (
          let attempt = 0;
-          attempt < INVALID_CONTENT_RETRY_OPTIONS.maxAttempts;
+          attempt <= INVALID_CONTENT_RETRY_OPTIONS.maxAttempts;
          attempt++
        ) {
          try {
@@ -382,14 +373,6 @@ export class GeminiChat {
            if (isContentError) {
              // Check if we have more attempts left.
              if (attempt < INVALID_CONTENT_RETRY_OPTIONS.maxAttempts - 1) {
-                logContentRetry(
-                  self.config,
-                  new ContentRetryEvent(
-                    attempt,
-                    'EmptyStreamError',
-                    INVALID_CONTENT_RETRY_OPTIONS.initialDelayMs,
-                  ),
-                );
                await new Promise((res) =>
                  setTimeout(
                    res,
@@ -405,15 +388,6 @@ export class GeminiChat {
        }

        if (lastError) {
-          if (lastError instanceof EmptyStreamError) {
-            logContentRetryFailure(
-              self.config,
-              new ContentRetryFailureEvent(
-                INVALID_CONTENT_RETRY_OPTIONS.maxAttempts,
-                'EmptyStreamError',
-              ),
-            );
-          }
          // If the stream fails, remove the user message that was added.
          if (self.history[self.history.length - 1] === userContent) {
            self.history.pop();
@@ -571,10 +545,6 @@ export class GeminiChat {
          }
        }
      } else {
-        logInvalidChunk(
-          this.config,
-          new InvalidChunkEvent('Invalid chunk received from stream.'),
-        );
        isStreamInvalid = true;
      }
      yield chunk; // Yield every chunk to the UI immediately.
@@ -19,9 +19,6 @@ import {
  IdeConnectionEvent,
  KittySequenceOverflowEvent,
  ChatCompressionEvent,
-  InvalidChunkEvent,
-  ContentRetryEvent,
-  ContentRetryFailureEvent,
 } from '../types.js';
 import { EventMetadataKey } from './event-metadata-key.js';
 import { Config } from '../../config/config.js';
@@ -51,9 +48,6 @@ export enum EventNames {
  IDE_CONNECTION = 'ide_connection',
  KITTY_SEQUENCE_OVERFLOW = 'kitty_sequence_overflow',
  CHAT_COMPRESSION = 'chat_compression',
-  INVALID_CHUNK = 'invalid_chunk',
-  CONTENT_RETRY = 'content_retry',
-  CONTENT_RETRY_FAILURE = 'content_retry_failure',
 }

 export interface LogResponse {
@@ -681,69 +675,6 @@ export class ClearcutLogger {
    });
  }

-  logInvalidChunkEvent(event: InvalidChunkEvent): void {
-    const data: EventValue[] = [];
-
-    if (event.error_message) {
-      data.push({
-        gemini_cli_key: EventMetadataKey.GEMINI_CLI_INVALID_CHUNK_ERROR_MESSAGE,
-        value: event.error_message,
-      });
-    }
-
-    this.enqueueLogEvent(this.createLogEvent(EventNames.INVALID_CHUNK, data));
-    this.flushIfNeeded();
-  }
-
-  logContentRetryEvent(event: ContentRetryEvent): void {
-    const data: EventValue[] = [
-      {
-        gemini_cli_key:
-          EventMetadataKey.GEMINI_CLI_CONTENT_RETRY_ATTEMPT_NUMBER,
-        value: String(event.attempt_number),
-      },
-      {
-        gemini_cli_key: EventMetadataKey.GEMINI_CLI_CONTENT_RETRY_ERROR_TYPE,
-        value: event.error_type,
-      },
-      {
-        gemini_cli_key: EventMetadataKey.GEMINI_CLI_CONTENT_RETRY_DELAY_MS,
-        value: String(event.retry_delay_ms),
-      },
-    ];
-
-    this.enqueueLogEvent(this.createLogEvent(EventNames.CONTENT_RETRY, data));
-    this.flushIfNeeded();
-  }
-
-  logContentRetryFailureEvent(event: ContentRetryFailureEvent): void {
-    const data: EventValue[] = [
-      {
-        gemini_cli_key:
-          EventMetadataKey.GEMINI_CLI_CONTENT_RETRY_FAILURE_TOTAL_ATTEMPTS,
-        value: String(event.total_attempts),
-      },
-      {
-        gemini_cli_key:
-          EventMetadataKey.GEMINI_CLI_CONTENT_RETRY_FAILURE_FINAL_ERROR_TYPE,
-        value: event.final_error_type,
-      },
-    ];
-
-    if (event.total_duration_ms) {
-      data.push({
-        gemini_cli_key:
-          EventMetadataKey.GEMINI_CLI_CONTENT_RETRY_FAILURE_TOTAL_DURATION_MS,
-        value: String(event.total_duration_ms),
-      });
-    }
-
-    this.enqueueLogEvent(
-      this.createLogEvent(EventNames.CONTENT_RETRY_FAILURE, data),
-    );
-    this.flushIfNeeded();
-  }
-
  /**
   * Adds default fields to data, and returns a new data array.  This fields
   * should exist on all log events.
@@ -229,16 +229,6 @@ export enum EventMetadataKey {
  // Logs the length of the kitty sequence that overflowed.
  GEMINI_CLI_KITTY_SEQUENCE_LENGTH = 53,

-  // ==========================================================================
-  // Conversation Finished Event Keys
-  // ===========================================================================
-
-  // Logs the approval mode of the session.
-  GEMINI_CLI_APPROVAL_MODE = 58,
-
-  // Logs the number of turns
-  GEMINI_CLI_CONVERSATION_TURN_COUNT = 59,
-
  // Logs the number of tokens before context window compression.
  GEMINI_CLI_COMPRESSION_TOKENS_BEFORE = 60,

@@ -247,72 +237,4 @@ export enum EventMetadataKey {

  // Logs tool type whether it is mcp or native.
  GEMINI_CLI_TOOL_TYPE = 62,
-  // Logs name of MCP tools as comma separated string
-  GEMINI_CLI_START_SESSION_MCP_TOOLS = 65,
-
-  // ==========================================================================
-  // Research Event Keys
-  // ===========================================================================
-
-  // Logs the research opt-in status (true/false)
-  GEMINI_CLI_RESEARCH_OPT_IN_STATUS = 66,
-
-  // Logs the contact email for research participation
-  GEMINI_CLI_RESEARCH_CONTACT_EMAIL = 67,
-
-  // Logs the user ID for research events
-  GEMINI_CLI_RESEARCH_USER_ID = 68,
-
-  // Logs the type of research feedback
-  GEMINI_CLI_RESEARCH_FEEDBACK_TYPE = 69,
-
-  // Logs the content of research feedback
-  GEMINI_CLI_RESEARCH_FEEDBACK_CONTENT = 70,
-
-  // Logs survey responses for research feedback (JSON stringified)
-  GEMINI_CLI_RESEARCH_SURVEY_RESPONSES = 71,
-
-  // ==========================================================================
-  // File Operation Event Keys
-  // ===========================================================================
-
-  // Logs the programming language of the project.
-  GEMINI_CLI_PROGRAMMING_LANGUAGE = 56,
-
-  // Logs the operation type of the file operation.
-  GEMINI_CLI_FILE_OPERATION_TYPE = 57,
-
-  // Logs the number of lines in the file operation.
-  GEMINI_CLI_FILE_OPERATION_LINES = 72,
-
-  // Logs the mimetype of the file in the file operation.
-  GEMINI_CLI_FILE_OPERATION_MIMETYPE = 73,
-
-  // Logs the extension of the file in the file operation.
-  GEMINI_CLI_FILE_OPERATION_EXTENSION = 74,
-
-  // ==========================================================================
-  // Content Streaming Event Keys
-  // ===========================================================================
-
-  // Logs the error message for an invalid chunk.
-  GEMINI_CLI_INVALID_CHUNK_ERROR_MESSAGE = 75,
-
-  // Logs the attempt number for a content retry.
-  GEMINI_CLI_CONTENT_RETRY_ATTEMPT_NUMBER = 76,
-
-  // Logs the error type for a content retry.
-  GEMINI_CLI_CONTENT_RETRY_ERROR_TYPE = 77,
-
-  // Logs the delay in milliseconds for a content retry.
-  GEMINI_CLI_CONTENT_RETRY_DELAY_MS = 78,
-
-  // Logs the total number of attempts for a content retry failure.
-  GEMINI_CLI_CONTENT_RETRY_FAILURE_TOTAL_ATTEMPTS = 79,
-
-  // Logs the final error type for a content retry failure.
-  GEMINI_CLI_CONTENT_RETRY_FAILURE_FINAL_ERROR_TYPE = 80,
-
-  // Logs the total duration in milliseconds for a content retry failure.
-  GEMINI_CLI_CONTENT_RETRY_FAILURE_TOTAL_DURATION_MS = 81,
 }
@@ -17,11 +17,6 @@ export const EVENT_NEXT_SPEAKER_CHECK = 'gemini_cli.next_speaker_check';
 export const EVENT_SLASH_COMMAND = 'gemini_cli.slash_command';
 export const EVENT_IDE_CONNECTION = 'gemini_cli.ide_connection';
 export const EVENT_CHAT_COMPRESSION = 'gemini_cli.chat_compression';
-export const EVENT_INVALID_CHUNK = 'gemini_cli.chat.invalid_chunk';
-export const EVENT_CONTENT_RETRY = 'gemini_cli.chat.content_retry';
-export const EVENT_CONTENT_RETRY_FAILURE =
-  'gemini_cli.chat.content_retry_failure';
-
 export const METRIC_TOOL_CALL_COUNT = 'gemini_cli.tool.call.count';
 export const METRIC_TOOL_CALL_LATENCY = 'gemini_cli.tool.call.latency';
 export const METRIC_API_REQUEST_COUNT = 'gemini_cli.api.request.count';
@@ -29,7 +24,3 @@ export const METRIC_API_REQUEST_LATENCY = 'gemini_cli.api.request.latency';
 export const METRIC_TOKEN_USAGE = 'gemini_cli.token.usage';
 export const METRIC_SESSION_COUNT = 'gemini_cli.session.count';
 export const METRIC_FILE_OPERATION_COUNT = 'gemini_cli.file.operation.count';
-export const METRIC_INVALID_CHUNK_COUNT = 'gemini_cli.chat.invalid_chunk.count';
-export const METRIC_CONTENT_RETRY_COUNT = 'gemini_cli.chat.content_retry.count';
-export const METRIC_CONTENT_RETRY_FAILURE_COUNT =
-  'gemini_cli.chat.content_retry_failure.count';
@@ -20,9 +20,6 @@ import {
  SERVICE_NAME,
  EVENT_SLASH_COMMAND,
  EVENT_CHAT_COMPRESSION,
-  EVENT_INVALID_CHUNK,
-  EVENT_CONTENT_RETRY,
-  EVENT_CONTENT_RETRY_FAILURE,
 } from './constants.js';
 import {
  ApiErrorEvent,
@@ -38,9 +35,6 @@ import {
  SlashCommandEvent,
  KittySequenceOverflowEvent,
  ChatCompressionEvent,
-  InvalidChunkEvent,
-  ContentRetryEvent,
-  ContentRetryFailureEvent,
 } from './types.js';
 import {
  recordApiErrorMetrics,
@@ -48,9 +42,6 @@ import {
  recordApiResponseMetrics,
  recordToolCallMetrics,
  recordChatCompressionMetrics,
-  recordInvalidChunk,
-  recordContentRetry,
-  recordContentRetryFailure,
 } from './metrics.js';
 import { isTelemetrySdkInitialized } from './sdk.js';
 import { uiTelemetryService, UiEvent } from './uiTelemetry.js';
@@ -435,72 +426,3 @@ export function logKittySequenceOverflow(
  };
  logger.emit(logRecord);
 }
-export function logInvalidChunk(
-  config: Config,
-  event: InvalidChunkEvent,
-): void {
-  ClearcutLogger.getInstance(config)?.logInvalidChunkEvent(event);
-  if (!isTelemetrySdkInitialized()) return;
-
-  const attributes: LogAttributes = {
-    ...getCommonAttributes(config),
-    'event.name': EVENT_INVALID_CHUNK,
-    'event.timestamp': event['event.timestamp'],
-  };
-
-  if (event.error_message) {
-    attributes['error.message'] = event.error_message;
-  }
-
-  const logger = logs.getLogger(SERVICE_NAME);
-  const logRecord: LogRecord = {
-    body: `Invalid chunk received from stream.`,
-    attributes,
-  };
-  logger.emit(logRecord);
-  recordInvalidChunk(config);
-}
-
-export function logContentRetry(
-  config: Config,
-  event: ContentRetryEvent,
-): void {
-  ClearcutLogger.getInstance(config)?.logContentRetryEvent(event);
-  if (!isTelemetrySdkInitialized()) return;
-
-  const attributes: LogAttributes = {
-    ...getCommonAttributes(config),
-    ...event,
-    'event.name': EVENT_CONTENT_RETRY,
-  };
-
-  const logger = logs.getLogger(SERVICE_NAME);
-  const logRecord: LogRecord = {
-    body: `Content retry attempt ${event.attempt_number} due to ${event.error_type}.`,
-    attributes,
-  };
-  logger.emit(logRecord);
-  recordContentRetry(config);
-}
-
-export function logContentRetryFailure(
-  config: Config,
-  event: ContentRetryFailureEvent,
-): void {
-  ClearcutLogger.getInstance(config)?.logContentRetryFailureEvent(event);
-  if (!isTelemetrySdkInitialized()) return;
-
-  const attributes: LogAttributes = {
-    ...getCommonAttributes(config),
-    ...event,
-    'event.name': EVENT_CONTENT_RETRY_FAILURE,
-  };
-
-  const logger = logs.getLogger(SERVICE_NAME);
-  const logRecord: LogRecord = {
-    body: `All content retries failed after ${event.total_attempts} attempts.`,
-    attributes,
-  };
-  logger.emit(logRecord);
-  recordContentRetryFailure(config);
-}
@@ -22,9 +22,6 @@ import {
  METRIC_SESSION_COUNT,
  METRIC_FILE_OPERATION_COUNT,
  EVENT_CHAT_COMPRESSION,
-  METRIC_INVALID_CHUNK_COUNT,
-  METRIC_CONTENT_RETRY_COUNT,
-  METRIC_CONTENT_RETRY_FAILURE_COUNT,
 } from './constants.js';
 import { Config } from '../config/config.js';
 import { DiffStat } from '../tools/tools.js';
@@ -43,9 +40,6 @@ let apiRequestLatencyHistogram: Histogram | undefined;
 let tokenUsageCounter: Counter | undefined;
 let fileOperationCounter: Counter | undefined;
 let chatCompressionCounter: Counter | undefined;
-let invalidChunkCounter: Counter | undefined;
-let contentRetryCounter: Counter | undefined;
-let contentRetryFailureCounter: Counter | undefined;
 let isMetricsInitialized = false;

 function getCommonAttributes(config: Config): Attributes {
@@ -100,24 +94,6 @@ export function initializeMetrics(config: Config): void {
    description: 'Counts chat compression events.',
    valueType: ValueType.INT,
  });
-
-  // New counters for content errors
-  invalidChunkCounter = meter.createCounter(METRIC_INVALID_CHUNK_COUNT, {
-    description: 'Counts invalid chunks received from a stream.',
-    valueType: ValueType.INT,
-  });
-  contentRetryCounter = meter.createCounter(METRIC_CONTENT_RETRY_COUNT, {
-    description: 'Counts retries due to content errors (e.g., empty stream).',
-    valueType: ValueType.INT,
-  });
-  contentRetryFailureCounter = meter.createCounter(
-    METRIC_CONTENT_RETRY_FAILURE_COUNT,
-    {
-      description: 'Counts occurrences of all content retries failing.',
-      valueType: ValueType.INT,
-    },
-  );
-
  const sessionCounter = meter.createCounter(METRIC_SESSION_COUNT, {
    description: 'Count of CLI sessions started.',
    valueType: ValueType.INT,
@@ -251,29 +227,3 @@ export function recordFileOperationMetric(
  }
  fileOperationCounter.add(1, attributes);
 }
-
-// --- New Metric Recording Functions ---
-
-/**
- * Records a metric for when an invalid chunk is received from a stream.
- */
-export function recordInvalidChunk(config: Config): void {
-  if (!invalidChunkCounter || !isMetricsInitialized) return;
-  invalidChunkCounter.add(1, getCommonAttributes(config));
-}
-
-/**
- * Records a metric for when a retry is triggered due to a content error.
- */
-export function recordContentRetry(config: Config): void {
-  if (!contentRetryCounter || !isMetricsInitialized) return;
-  contentRetryCounter.add(1, getCommonAttributes(config));
-}
-
-/**
- * Records a metric for when all content error retries have failed for a request.
- */
-export function recordContentRetryFailure(config: Config): void {
-  if (!contentRetryFailureCounter || !isMetricsInitialized) return;
-  contentRetryFailureCounter.add(1, getCommonAttributes(config));
-}
@@ -385,59 +385,6 @@ export class KittySequenceOverflowEvent {
  }
 }

-// Add these new event interfaces
-export class InvalidChunkEvent implements BaseTelemetryEvent {
-  'event.name': 'invalid_chunk';
-  'event.timestamp': string;
-  error_message?: string; // Optional: validation error details
-
-  constructor(error_message?: string) {
-    this['event.name'] = 'invalid_chunk';
-    this['event.timestamp'] = new Date().toISOString();
-    this.error_message = error_message;
-  }
-}
-
-export class ContentRetryEvent implements BaseTelemetryEvent {
-  'event.name': 'content_retry';
-  'event.timestamp': string;
-  attempt_number: number;
-  error_type: string; // e.g., 'EmptyStreamError'
-  retry_delay_ms: number;
-
-  constructor(
-    attempt_number: number,
-    error_type: string,
-    retry_delay_ms: number,
-  ) {
-    this['event.name'] = 'content_retry';
-    this['event.timestamp'] = new Date().toISOString();
-    this.attempt_number = attempt_number;
-    this.error_type = error_type;
-    this.retry_delay_ms = retry_delay_ms;
-  }
-}
-
-export class ContentRetryFailureEvent implements BaseTelemetryEvent {
-  'event.name': 'content_retry_failure';
-  'event.timestamp': string;
-  total_attempts: number;
-  final_error_type: string;
-  total_duration_ms?: number; // Optional: total time spent retrying
-
-  constructor(
-    total_attempts: number,
-    final_error_type: string,
-    total_duration_ms?: number,
-  ) {
-    this['event.name'] = 'content_retry_failure';
-    this['event.timestamp'] = new Date().toISOString();
-    this.total_attempts = total_attempts;
-    this.final_error_type = final_error_type;
-    this.total_duration_ms = total_duration_ms;
-  }
-}
-
 export type TelemetryEvent =
  | StartSessionEvent
  | EndSessionEvent
@@ -452,7 +399,4 @@ export type TelemetryEvent =
  | KittySequenceOverflowEvent
  | MalformedJsonResponseEvent
  | IdeConnectionEvent
-  | SlashCommandEvent
-  | InvalidChunkEvent
-  | ContentRetryEvent
-  | ContentRetryFailureEvent;
+  | SlashCommandEvent;