From 2f6cf270ec3a5c23226e046792739cf7bb925ad3 Mon Sep 17 00:00:00 2001 From: Taylor Mullen Date: Tue, 26 Aug 2025 23:30:27 -0700 Subject: [PATCH] Revert "Hotfix/metrics stream error (#7156)" This reverts commit 0910b35812a6ba5578277015b642d29c2853c9d5. --- packages/core/src/core/geminiChat.test.ts | 31 +------- packages/core/src/core/geminiChat.ts | 34 +------- .../clearcut-logger/clearcut-logger.ts | 69 ---------------- .../clearcut-logger/event-metadata-key.ts | 78 ------------------- packages/core/src/telemetry/constants.ts | 9 --- packages/core/src/telemetry/loggers.ts | 78 ------------------- packages/core/src/telemetry/metrics.ts | 50 ------------ packages/core/src/telemetry/types.ts | 58 +------------- 8 files changed, 7 insertions(+), 400 deletions(-) diff --git a/packages/core/src/core/geminiChat.test.ts b/packages/core/src/core/geminiChat.test.ts index 1bafb38cdc..c4fb7f0f6f 100644 --- a/packages/core/src/core/geminiChat.test.ts +++ b/packages/core/src/core/geminiChat.test.ts @@ -25,19 +25,6 @@ const mockModelsModule = { batchEmbedContents: vi.fn(), } as unknown as Models; -const { mockLogInvalidChunk, mockLogContentRetry, mockLogContentRetryFailure } = - vi.hoisted(() => ({ - mockLogInvalidChunk: vi.fn(), - mockLogContentRetry: vi.fn(), - mockLogContentRetryFailure: vi.fn(), - })); - -vi.mock('../telemetry/loggers.js', () => ({ - logInvalidChunk: mockLogInvalidChunk, - logContentRetry: mockLogContentRetry, - logContentRetryFailure: mockLogContentRetryFailure, -})); - describe('GeminiChat', () => { let chat: GeminiChat; let mockConfig: Config; @@ -496,7 +483,7 @@ describe('GeminiChat', () => { }); describe('sendMessageStream with retries', () => { - it('should retry on invalid content, succeed, and report metrics', async () => { + it('should retry on invalid content and succeed on the second attempt', async () => { // Use mockImplementationOnce to provide a fresh, promise-wrapped generator for each attempt. vi.mocked(mockModelsModule.generateContentStream) .mockImplementationOnce(async () => @@ -528,9 +515,6 @@ describe('GeminiChat', () => { } // Assertions - expect(mockLogInvalidChunk).toHaveBeenCalledTimes(1); - expect(mockLogContentRetry).toHaveBeenCalledTimes(1); - expect(mockLogContentRetryFailure).not.toHaveBeenCalled(); expect(mockModelsModule.generateContentStream).toHaveBeenCalledTimes(2); expect( chunks.some( @@ -553,7 +537,7 @@ describe('GeminiChat', () => { }); }); - it('should fail after all retries on persistent invalid content and report metrics', async () => { + it('should fail after all retries on persistent invalid content', async () => { vi.mocked(mockModelsModule.generateContentStream).mockImplementation( async () => (async function* () { @@ -587,9 +571,6 @@ describe('GeminiChat', () => { // Should be called 3 times (initial + 2 retries) expect(mockModelsModule.generateContentStream).toHaveBeenCalledTimes(3); - expect(mockLogInvalidChunk).toHaveBeenCalledTimes(3); - expect(mockLogContentRetry).toHaveBeenCalledTimes(2); - expect(mockLogContentRetryFailure).toHaveBeenCalledTimes(1); // History should be clean, as if the failed turn never happened. const history = chat.getHistory(); @@ -604,7 +585,7 @@ describe('GeminiChat', () => { ]; chat.setHistory(initialHistory); - // 2. Mock the API to fail once with an empty stream, then succeed. + // 2. Mock the API vi.mocked(mockModelsModule.generateContentStream) .mockImplementationOnce(async () => (async function* () { @@ -614,7 +595,6 @@ describe('GeminiChat', () => { })(), ) .mockImplementationOnce(async () => - // Second attempt succeeds (async function* () { yield { candidates: [{ content: { parts: [{ text: 'Second answer' }] } }], @@ -631,13 +611,10 @@ describe('GeminiChat', () => { // consume stream } - // 4. Assert the final history and metrics + // 4. Assert the final history const history = chat.getHistory(); expect(history.length).toBe(4); - // Assert that the correct metrics were reported for one empty-stream retry - expect(mockLogContentRetry).toHaveBeenCalledTimes(1); - // Explicitly verify the structure of each part to satisfy TypeScript const turn1 = history[0]; if (!turn1?.parts?.[0] || !('text' in turn1.parts[0])) { diff --git a/packages/core/src/core/geminiChat.ts b/packages/core/src/core/geminiChat.ts index a46522fa32..93428684ba 100644 --- a/packages/core/src/core/geminiChat.ts +++ b/packages/core/src/core/geminiChat.ts @@ -23,16 +23,6 @@ import { Config } from '../config/config.js'; import { DEFAULT_GEMINI_FLASH_MODEL } from '../config/models.js'; import { hasCycleInSchema } from '../tools/tools.js'; import { StructuredError } from './turn.js'; -import { - logContentRetry, - logContentRetryFailure, - logInvalidChunk, -} from '../telemetry/loggers.js'; -import { - ContentRetryEvent, - ContentRetryFailureEvent, - InvalidChunkEvent, -} from '../telemetry/types.js'; /** * Options for retrying due to invalid content from the model. @@ -48,6 +38,7 @@ const INVALID_CONTENT_RETRY_OPTIONS: ContentRetryOptions = { maxAttempts: 3, // 1 initial call + 2 retries initialDelayMs: 500, }; + /** * Returns true if the response is valid, false otherwise. */ @@ -358,7 +349,7 @@ export class GeminiChat { for ( let attempt = 0; - attempt < INVALID_CONTENT_RETRY_OPTIONS.maxAttempts; + attempt <= INVALID_CONTENT_RETRY_OPTIONS.maxAttempts; attempt++ ) { try { @@ -382,14 +373,6 @@ export class GeminiChat { if (isContentError) { // Check if we have more attempts left. if (attempt < INVALID_CONTENT_RETRY_OPTIONS.maxAttempts - 1) { - logContentRetry( - self.config, - new ContentRetryEvent( - attempt, - 'EmptyStreamError', - INVALID_CONTENT_RETRY_OPTIONS.initialDelayMs, - ), - ); await new Promise((res) => setTimeout( res, @@ -405,15 +388,6 @@ export class GeminiChat { } if (lastError) { - if (lastError instanceof EmptyStreamError) { - logContentRetryFailure( - self.config, - new ContentRetryFailureEvent( - INVALID_CONTENT_RETRY_OPTIONS.maxAttempts, - 'EmptyStreamError', - ), - ); - } // If the stream fails, remove the user message that was added. if (self.history[self.history.length - 1] === userContent) { self.history.pop(); @@ -571,10 +545,6 @@ export class GeminiChat { } } } else { - logInvalidChunk( - this.config, - new InvalidChunkEvent('Invalid chunk received from stream.'), - ); isStreamInvalid = true; } yield chunk; // Yield every chunk to the UI immediately. diff --git a/packages/core/src/telemetry/clearcut-logger/clearcut-logger.ts b/packages/core/src/telemetry/clearcut-logger/clearcut-logger.ts index 221b051438..7369bc1b09 100644 --- a/packages/core/src/telemetry/clearcut-logger/clearcut-logger.ts +++ b/packages/core/src/telemetry/clearcut-logger/clearcut-logger.ts @@ -19,9 +19,6 @@ import { IdeConnectionEvent, KittySequenceOverflowEvent, ChatCompressionEvent, - InvalidChunkEvent, - ContentRetryEvent, - ContentRetryFailureEvent, } from '../types.js'; import { EventMetadataKey } from './event-metadata-key.js'; import { Config } from '../../config/config.js'; @@ -51,9 +48,6 @@ export enum EventNames { IDE_CONNECTION = 'ide_connection', KITTY_SEQUENCE_OVERFLOW = 'kitty_sequence_overflow', CHAT_COMPRESSION = 'chat_compression', - INVALID_CHUNK = 'invalid_chunk', - CONTENT_RETRY = 'content_retry', - CONTENT_RETRY_FAILURE = 'content_retry_failure', } export interface LogResponse { @@ -681,69 +675,6 @@ export class ClearcutLogger { }); } - logInvalidChunkEvent(event: InvalidChunkEvent): void { - const data: EventValue[] = []; - - if (event.error_message) { - data.push({ - gemini_cli_key: EventMetadataKey.GEMINI_CLI_INVALID_CHUNK_ERROR_MESSAGE, - value: event.error_message, - }); - } - - this.enqueueLogEvent(this.createLogEvent(EventNames.INVALID_CHUNK, data)); - this.flushIfNeeded(); - } - - logContentRetryEvent(event: ContentRetryEvent): void { - const data: EventValue[] = [ - { - gemini_cli_key: - EventMetadataKey.GEMINI_CLI_CONTENT_RETRY_ATTEMPT_NUMBER, - value: String(event.attempt_number), - }, - { - gemini_cli_key: EventMetadataKey.GEMINI_CLI_CONTENT_RETRY_ERROR_TYPE, - value: event.error_type, - }, - { - gemini_cli_key: EventMetadataKey.GEMINI_CLI_CONTENT_RETRY_DELAY_MS, - value: String(event.retry_delay_ms), - }, - ]; - - this.enqueueLogEvent(this.createLogEvent(EventNames.CONTENT_RETRY, data)); - this.flushIfNeeded(); - } - - logContentRetryFailureEvent(event: ContentRetryFailureEvent): void { - const data: EventValue[] = [ - { - gemini_cli_key: - EventMetadataKey.GEMINI_CLI_CONTENT_RETRY_FAILURE_TOTAL_ATTEMPTS, - value: String(event.total_attempts), - }, - { - gemini_cli_key: - EventMetadataKey.GEMINI_CLI_CONTENT_RETRY_FAILURE_FINAL_ERROR_TYPE, - value: event.final_error_type, - }, - ]; - - if (event.total_duration_ms) { - data.push({ - gemini_cli_key: - EventMetadataKey.GEMINI_CLI_CONTENT_RETRY_FAILURE_TOTAL_DURATION_MS, - value: String(event.total_duration_ms), - }); - } - - this.enqueueLogEvent( - this.createLogEvent(EventNames.CONTENT_RETRY_FAILURE, data), - ); - this.flushIfNeeded(); - } - /** * Adds default fields to data, and returns a new data array. This fields * should exist on all log events. diff --git a/packages/core/src/telemetry/clearcut-logger/event-metadata-key.ts b/packages/core/src/telemetry/clearcut-logger/event-metadata-key.ts index b7090bef57..34c8d9b024 100644 --- a/packages/core/src/telemetry/clearcut-logger/event-metadata-key.ts +++ b/packages/core/src/telemetry/clearcut-logger/event-metadata-key.ts @@ -229,16 +229,6 @@ export enum EventMetadataKey { // Logs the length of the kitty sequence that overflowed. GEMINI_CLI_KITTY_SEQUENCE_LENGTH = 53, - // ========================================================================== - // Conversation Finished Event Keys - // =========================================================================== - - // Logs the approval mode of the session. - GEMINI_CLI_APPROVAL_MODE = 58, - - // Logs the number of turns - GEMINI_CLI_CONVERSATION_TURN_COUNT = 59, - // Logs the number of tokens before context window compression. GEMINI_CLI_COMPRESSION_TOKENS_BEFORE = 60, @@ -247,72 +237,4 @@ export enum EventMetadataKey { // Logs tool type whether it is mcp or native. GEMINI_CLI_TOOL_TYPE = 62, - // Logs name of MCP tools as comma separated string - GEMINI_CLI_START_SESSION_MCP_TOOLS = 65, - - // ========================================================================== - // Research Event Keys - // =========================================================================== - - // Logs the research opt-in status (true/false) - GEMINI_CLI_RESEARCH_OPT_IN_STATUS = 66, - - // Logs the contact email for research participation - GEMINI_CLI_RESEARCH_CONTACT_EMAIL = 67, - - // Logs the user ID for research events - GEMINI_CLI_RESEARCH_USER_ID = 68, - - // Logs the type of research feedback - GEMINI_CLI_RESEARCH_FEEDBACK_TYPE = 69, - - // Logs the content of research feedback - GEMINI_CLI_RESEARCH_FEEDBACK_CONTENT = 70, - - // Logs survey responses for research feedback (JSON stringified) - GEMINI_CLI_RESEARCH_SURVEY_RESPONSES = 71, - - // ========================================================================== - // File Operation Event Keys - // =========================================================================== - - // Logs the programming language of the project. - GEMINI_CLI_PROGRAMMING_LANGUAGE = 56, - - // Logs the operation type of the file operation. - GEMINI_CLI_FILE_OPERATION_TYPE = 57, - - // Logs the number of lines in the file operation. - GEMINI_CLI_FILE_OPERATION_LINES = 72, - - // Logs the mimetype of the file in the file operation. - GEMINI_CLI_FILE_OPERATION_MIMETYPE = 73, - - // Logs the extension of the file in the file operation. - GEMINI_CLI_FILE_OPERATION_EXTENSION = 74, - - // ========================================================================== - // Content Streaming Event Keys - // =========================================================================== - - // Logs the error message for an invalid chunk. - GEMINI_CLI_INVALID_CHUNK_ERROR_MESSAGE = 75, - - // Logs the attempt number for a content retry. - GEMINI_CLI_CONTENT_RETRY_ATTEMPT_NUMBER = 76, - - // Logs the error type for a content retry. - GEMINI_CLI_CONTENT_RETRY_ERROR_TYPE = 77, - - // Logs the delay in milliseconds for a content retry. - GEMINI_CLI_CONTENT_RETRY_DELAY_MS = 78, - - // Logs the total number of attempts for a content retry failure. - GEMINI_CLI_CONTENT_RETRY_FAILURE_TOTAL_ATTEMPTS = 79, - - // Logs the final error type for a content retry failure. - GEMINI_CLI_CONTENT_RETRY_FAILURE_FINAL_ERROR_TYPE = 80, - - // Logs the total duration in milliseconds for a content retry failure. - GEMINI_CLI_CONTENT_RETRY_FAILURE_TOTAL_DURATION_MS = 81, } diff --git a/packages/core/src/telemetry/constants.ts b/packages/core/src/telemetry/constants.ts index 6420a3d125..0d978fb2f5 100644 --- a/packages/core/src/telemetry/constants.ts +++ b/packages/core/src/telemetry/constants.ts @@ -17,11 +17,6 @@ export const EVENT_NEXT_SPEAKER_CHECK = 'gemini_cli.next_speaker_check'; export const EVENT_SLASH_COMMAND = 'gemini_cli.slash_command'; export const EVENT_IDE_CONNECTION = 'gemini_cli.ide_connection'; export const EVENT_CHAT_COMPRESSION = 'gemini_cli.chat_compression'; -export const EVENT_INVALID_CHUNK = 'gemini_cli.chat.invalid_chunk'; -export const EVENT_CONTENT_RETRY = 'gemini_cli.chat.content_retry'; -export const EVENT_CONTENT_RETRY_FAILURE = - 'gemini_cli.chat.content_retry_failure'; - export const METRIC_TOOL_CALL_COUNT = 'gemini_cli.tool.call.count'; export const METRIC_TOOL_CALL_LATENCY = 'gemini_cli.tool.call.latency'; export const METRIC_API_REQUEST_COUNT = 'gemini_cli.api.request.count'; @@ -29,7 +24,3 @@ export const METRIC_API_REQUEST_LATENCY = 'gemini_cli.api.request.latency'; export const METRIC_TOKEN_USAGE = 'gemini_cli.token.usage'; export const METRIC_SESSION_COUNT = 'gemini_cli.session.count'; export const METRIC_FILE_OPERATION_COUNT = 'gemini_cli.file.operation.count'; -export const METRIC_INVALID_CHUNK_COUNT = 'gemini_cli.chat.invalid_chunk.count'; -export const METRIC_CONTENT_RETRY_COUNT = 'gemini_cli.chat.content_retry.count'; -export const METRIC_CONTENT_RETRY_FAILURE_COUNT = - 'gemini_cli.chat.content_retry_failure.count'; diff --git a/packages/core/src/telemetry/loggers.ts b/packages/core/src/telemetry/loggers.ts index f292403f16..9b9faf792f 100644 --- a/packages/core/src/telemetry/loggers.ts +++ b/packages/core/src/telemetry/loggers.ts @@ -20,9 +20,6 @@ import { SERVICE_NAME, EVENT_SLASH_COMMAND, EVENT_CHAT_COMPRESSION, - EVENT_INVALID_CHUNK, - EVENT_CONTENT_RETRY, - EVENT_CONTENT_RETRY_FAILURE, } from './constants.js'; import { ApiErrorEvent, @@ -38,9 +35,6 @@ import { SlashCommandEvent, KittySequenceOverflowEvent, ChatCompressionEvent, - InvalidChunkEvent, - ContentRetryEvent, - ContentRetryFailureEvent, } from './types.js'; import { recordApiErrorMetrics, @@ -48,9 +42,6 @@ import { recordApiResponseMetrics, recordToolCallMetrics, recordChatCompressionMetrics, - recordInvalidChunk, - recordContentRetry, - recordContentRetryFailure, } from './metrics.js'; import { isTelemetrySdkInitialized } from './sdk.js'; import { uiTelemetryService, UiEvent } from './uiTelemetry.js'; @@ -435,72 +426,3 @@ export function logKittySequenceOverflow( }; logger.emit(logRecord); } -export function logInvalidChunk( - config: Config, - event: InvalidChunkEvent, -): void { - ClearcutLogger.getInstance(config)?.logInvalidChunkEvent(event); - if (!isTelemetrySdkInitialized()) return; - - const attributes: LogAttributes = { - ...getCommonAttributes(config), - 'event.name': EVENT_INVALID_CHUNK, - 'event.timestamp': event['event.timestamp'], - }; - - if (event.error_message) { - attributes['error.message'] = event.error_message; - } - - const logger = logs.getLogger(SERVICE_NAME); - const logRecord: LogRecord = { - body: `Invalid chunk received from stream.`, - attributes, - }; - logger.emit(logRecord); - recordInvalidChunk(config); -} - -export function logContentRetry( - config: Config, - event: ContentRetryEvent, -): void { - ClearcutLogger.getInstance(config)?.logContentRetryEvent(event); - if (!isTelemetrySdkInitialized()) return; - - const attributes: LogAttributes = { - ...getCommonAttributes(config), - ...event, - 'event.name': EVENT_CONTENT_RETRY, - }; - - const logger = logs.getLogger(SERVICE_NAME); - const logRecord: LogRecord = { - body: `Content retry attempt ${event.attempt_number} due to ${event.error_type}.`, - attributes, - }; - logger.emit(logRecord); - recordContentRetry(config); -} - -export function logContentRetryFailure( - config: Config, - event: ContentRetryFailureEvent, -): void { - ClearcutLogger.getInstance(config)?.logContentRetryFailureEvent(event); - if (!isTelemetrySdkInitialized()) return; - - const attributes: LogAttributes = { - ...getCommonAttributes(config), - ...event, - 'event.name': EVENT_CONTENT_RETRY_FAILURE, - }; - - const logger = logs.getLogger(SERVICE_NAME); - const logRecord: LogRecord = { - body: `All content retries failed after ${event.total_attempts} attempts.`, - attributes, - }; - logger.emit(logRecord); - recordContentRetryFailure(config); -} diff --git a/packages/core/src/telemetry/metrics.ts b/packages/core/src/telemetry/metrics.ts index a84abbff90..4abcb4f0cd 100644 --- a/packages/core/src/telemetry/metrics.ts +++ b/packages/core/src/telemetry/metrics.ts @@ -22,9 +22,6 @@ import { METRIC_SESSION_COUNT, METRIC_FILE_OPERATION_COUNT, EVENT_CHAT_COMPRESSION, - METRIC_INVALID_CHUNK_COUNT, - METRIC_CONTENT_RETRY_COUNT, - METRIC_CONTENT_RETRY_FAILURE_COUNT, } from './constants.js'; import { Config } from '../config/config.js'; import { DiffStat } from '../tools/tools.js'; @@ -43,9 +40,6 @@ let apiRequestLatencyHistogram: Histogram | undefined; let tokenUsageCounter: Counter | undefined; let fileOperationCounter: Counter | undefined; let chatCompressionCounter: Counter | undefined; -let invalidChunkCounter: Counter | undefined; -let contentRetryCounter: Counter | undefined; -let contentRetryFailureCounter: Counter | undefined; let isMetricsInitialized = false; function getCommonAttributes(config: Config): Attributes { @@ -100,24 +94,6 @@ export function initializeMetrics(config: Config): void { description: 'Counts chat compression events.', valueType: ValueType.INT, }); - - // New counters for content errors - invalidChunkCounter = meter.createCounter(METRIC_INVALID_CHUNK_COUNT, { - description: 'Counts invalid chunks received from a stream.', - valueType: ValueType.INT, - }); - contentRetryCounter = meter.createCounter(METRIC_CONTENT_RETRY_COUNT, { - description: 'Counts retries due to content errors (e.g., empty stream).', - valueType: ValueType.INT, - }); - contentRetryFailureCounter = meter.createCounter( - METRIC_CONTENT_RETRY_FAILURE_COUNT, - { - description: 'Counts occurrences of all content retries failing.', - valueType: ValueType.INT, - }, - ); - const sessionCounter = meter.createCounter(METRIC_SESSION_COUNT, { description: 'Count of CLI sessions started.', valueType: ValueType.INT, @@ -251,29 +227,3 @@ export function recordFileOperationMetric( } fileOperationCounter.add(1, attributes); } - -// --- New Metric Recording Functions --- - -/** - * Records a metric for when an invalid chunk is received from a stream. - */ -export function recordInvalidChunk(config: Config): void { - if (!invalidChunkCounter || !isMetricsInitialized) return; - invalidChunkCounter.add(1, getCommonAttributes(config)); -} - -/** - * Records a metric for when a retry is triggered due to a content error. - */ -export function recordContentRetry(config: Config): void { - if (!contentRetryCounter || !isMetricsInitialized) return; - contentRetryCounter.add(1, getCommonAttributes(config)); -} - -/** - * Records a metric for when all content error retries have failed for a request. - */ -export function recordContentRetryFailure(config: Config): void { - if (!contentRetryFailureCounter || !isMetricsInitialized) return; - contentRetryFailureCounter.add(1, getCommonAttributes(config)); -} diff --git a/packages/core/src/telemetry/types.ts b/packages/core/src/telemetry/types.ts index 10ac5da1fb..0a49ca02f1 100644 --- a/packages/core/src/telemetry/types.ts +++ b/packages/core/src/telemetry/types.ts @@ -385,59 +385,6 @@ export class KittySequenceOverflowEvent { } } -// Add these new event interfaces -export class InvalidChunkEvent implements BaseTelemetryEvent { - 'event.name': 'invalid_chunk'; - 'event.timestamp': string; - error_message?: string; // Optional: validation error details - - constructor(error_message?: string) { - this['event.name'] = 'invalid_chunk'; - this['event.timestamp'] = new Date().toISOString(); - this.error_message = error_message; - } -} - -export class ContentRetryEvent implements BaseTelemetryEvent { - 'event.name': 'content_retry'; - 'event.timestamp': string; - attempt_number: number; - error_type: string; // e.g., 'EmptyStreamError' - retry_delay_ms: number; - - constructor( - attempt_number: number, - error_type: string, - retry_delay_ms: number, - ) { - this['event.name'] = 'content_retry'; - this['event.timestamp'] = new Date().toISOString(); - this.attempt_number = attempt_number; - this.error_type = error_type; - this.retry_delay_ms = retry_delay_ms; - } -} - -export class ContentRetryFailureEvent implements BaseTelemetryEvent { - 'event.name': 'content_retry_failure'; - 'event.timestamp': string; - total_attempts: number; - final_error_type: string; - total_duration_ms?: number; // Optional: total time spent retrying - - constructor( - total_attempts: number, - final_error_type: string, - total_duration_ms?: number, - ) { - this['event.name'] = 'content_retry_failure'; - this['event.timestamp'] = new Date().toISOString(); - this.total_attempts = total_attempts; - this.final_error_type = final_error_type; - this.total_duration_ms = total_duration_ms; - } -} - export type TelemetryEvent = | StartSessionEvent | EndSessionEvent @@ -452,7 +399,4 @@ export type TelemetryEvent = | KittySequenceOverflowEvent | MalformedJsonResponseEvent | IdeConnectionEvent - | SlashCommandEvent - | InvalidChunkEvent - | ContentRetryEvent - | ContentRetryFailureEvent; + | SlashCommandEvent;