Revert "Hotfix/metrics stream error (#7156)"

This reverts commit 0910b35812.
This commit is contained in:
Taylor Mullen
2025-08-26 23:30:27 -07:00
parent 3a8ec2d83c
commit 2f6cf270ec
8 changed files with 7 additions and 400 deletions

View File

@@ -25,19 +25,6 @@ const mockModelsModule = {
batchEmbedContents: vi.fn(),
} as unknown as Models;
const { mockLogInvalidChunk, mockLogContentRetry, mockLogContentRetryFailure } =
vi.hoisted(() => ({
mockLogInvalidChunk: vi.fn(),
mockLogContentRetry: vi.fn(),
mockLogContentRetryFailure: vi.fn(),
}));
vi.mock('../telemetry/loggers.js', () => ({
logInvalidChunk: mockLogInvalidChunk,
logContentRetry: mockLogContentRetry,
logContentRetryFailure: mockLogContentRetryFailure,
}));
describe('GeminiChat', () => {
let chat: GeminiChat;
let mockConfig: Config;
@@ -496,7 +483,7 @@ describe('GeminiChat', () => {
});
describe('sendMessageStream with retries', () => {
it('should retry on invalid content, succeed, and report metrics', async () => {
it('should retry on invalid content and succeed on the second attempt', async () => {
// Use mockImplementationOnce to provide a fresh, promise-wrapped generator for each attempt.
vi.mocked(mockModelsModule.generateContentStream)
.mockImplementationOnce(async () =>
@@ -528,9 +515,6 @@ describe('GeminiChat', () => {
}
// Assertions
expect(mockLogInvalidChunk).toHaveBeenCalledTimes(1);
expect(mockLogContentRetry).toHaveBeenCalledTimes(1);
expect(mockLogContentRetryFailure).not.toHaveBeenCalled();
expect(mockModelsModule.generateContentStream).toHaveBeenCalledTimes(2);
expect(
chunks.some(
@@ -553,7 +537,7 @@ describe('GeminiChat', () => {
});
});
it('should fail after all retries on persistent invalid content and report metrics', async () => {
it('should fail after all retries on persistent invalid content', async () => {
vi.mocked(mockModelsModule.generateContentStream).mockImplementation(
async () =>
(async function* () {
@@ -587,9 +571,6 @@ describe('GeminiChat', () => {
// Should be called 3 times (initial + 2 retries)
expect(mockModelsModule.generateContentStream).toHaveBeenCalledTimes(3);
expect(mockLogInvalidChunk).toHaveBeenCalledTimes(3);
expect(mockLogContentRetry).toHaveBeenCalledTimes(2);
expect(mockLogContentRetryFailure).toHaveBeenCalledTimes(1);
// History should be clean, as if the failed turn never happened.
const history = chat.getHistory();
@@ -604,7 +585,7 @@ describe('GeminiChat', () => {
];
chat.setHistory(initialHistory);
// 2. Mock the API to fail once with an empty stream, then succeed.
// 2. Mock the API
vi.mocked(mockModelsModule.generateContentStream)
.mockImplementationOnce(async () =>
(async function* () {
@@ -614,7 +595,6 @@ describe('GeminiChat', () => {
})(),
)
.mockImplementationOnce(async () =>
// Second attempt succeeds
(async function* () {
yield {
candidates: [{ content: { parts: [{ text: 'Second answer' }] } }],
@@ -631,13 +611,10 @@ describe('GeminiChat', () => {
// consume stream
}
// 4. Assert the final history and metrics
// 4. Assert the final history
const history = chat.getHistory();
expect(history.length).toBe(4);
// Assert that the correct metrics were reported for one empty-stream retry
expect(mockLogContentRetry).toHaveBeenCalledTimes(1);
// Explicitly verify the structure of each part to satisfy TypeScript
const turn1 = history[0];
if (!turn1?.parts?.[0] || !('text' in turn1.parts[0])) {

View File

@@ -23,16 +23,6 @@ import { Config } from '../config/config.js';
import { DEFAULT_GEMINI_FLASH_MODEL } from '../config/models.js';
import { hasCycleInSchema } from '../tools/tools.js';
import { StructuredError } from './turn.js';
import {
logContentRetry,
logContentRetryFailure,
logInvalidChunk,
} from '../telemetry/loggers.js';
import {
ContentRetryEvent,
ContentRetryFailureEvent,
InvalidChunkEvent,
} from '../telemetry/types.js';
/**
* Options for retrying due to invalid content from the model.
@@ -48,6 +38,7 @@ const INVALID_CONTENT_RETRY_OPTIONS: ContentRetryOptions = {
maxAttempts: 3, // 1 initial call + 2 retries
initialDelayMs: 500,
};
/**
* Returns true if the response is valid, false otherwise.
*/
@@ -358,7 +349,7 @@ export class GeminiChat {
for (
let attempt = 0;
attempt < INVALID_CONTENT_RETRY_OPTIONS.maxAttempts;
attempt <= INVALID_CONTENT_RETRY_OPTIONS.maxAttempts;
attempt++
) {
try {
@@ -382,14 +373,6 @@ export class GeminiChat {
if (isContentError) {
// Check if we have more attempts left.
if (attempt < INVALID_CONTENT_RETRY_OPTIONS.maxAttempts - 1) {
logContentRetry(
self.config,
new ContentRetryEvent(
attempt,
'EmptyStreamError',
INVALID_CONTENT_RETRY_OPTIONS.initialDelayMs,
),
);
await new Promise((res) =>
setTimeout(
res,
@@ -405,15 +388,6 @@ export class GeminiChat {
}
if (lastError) {
if (lastError instanceof EmptyStreamError) {
logContentRetryFailure(
self.config,
new ContentRetryFailureEvent(
INVALID_CONTENT_RETRY_OPTIONS.maxAttempts,
'EmptyStreamError',
),
);
}
// If the stream fails, remove the user message that was added.
if (self.history[self.history.length - 1] === userContent) {
self.history.pop();
@@ -571,10 +545,6 @@ export class GeminiChat {
}
}
} else {
logInvalidChunk(
this.config,
new InvalidChunkEvent('Invalid chunk received from stream.'),
);
isStreamInvalid = true;
}
yield chunk; // Yield every chunk to the UI immediately.

View File

@@ -19,9 +19,6 @@ import {
IdeConnectionEvent,
KittySequenceOverflowEvent,
ChatCompressionEvent,
InvalidChunkEvent,
ContentRetryEvent,
ContentRetryFailureEvent,
} from '../types.js';
import { EventMetadataKey } from './event-metadata-key.js';
import { Config } from '../../config/config.js';
@@ -51,9 +48,6 @@ export enum EventNames {
IDE_CONNECTION = 'ide_connection',
KITTY_SEQUENCE_OVERFLOW = 'kitty_sequence_overflow',
CHAT_COMPRESSION = 'chat_compression',
INVALID_CHUNK = 'invalid_chunk',
CONTENT_RETRY = 'content_retry',
CONTENT_RETRY_FAILURE = 'content_retry_failure',
}
export interface LogResponse {
@@ -681,69 +675,6 @@ export class ClearcutLogger {
});
}
logInvalidChunkEvent(event: InvalidChunkEvent): void {
const data: EventValue[] = [];
if (event.error_message) {
data.push({
gemini_cli_key: EventMetadataKey.GEMINI_CLI_INVALID_CHUNK_ERROR_MESSAGE,
value: event.error_message,
});
}
this.enqueueLogEvent(this.createLogEvent(EventNames.INVALID_CHUNK, data));
this.flushIfNeeded();
}
logContentRetryEvent(event: ContentRetryEvent): void {
const data: EventValue[] = [
{
gemini_cli_key:
EventMetadataKey.GEMINI_CLI_CONTENT_RETRY_ATTEMPT_NUMBER,
value: String(event.attempt_number),
},
{
gemini_cli_key: EventMetadataKey.GEMINI_CLI_CONTENT_RETRY_ERROR_TYPE,
value: event.error_type,
},
{
gemini_cli_key: EventMetadataKey.GEMINI_CLI_CONTENT_RETRY_DELAY_MS,
value: String(event.retry_delay_ms),
},
];
this.enqueueLogEvent(this.createLogEvent(EventNames.CONTENT_RETRY, data));
this.flushIfNeeded();
}
logContentRetryFailureEvent(event: ContentRetryFailureEvent): void {
const data: EventValue[] = [
{
gemini_cli_key:
EventMetadataKey.GEMINI_CLI_CONTENT_RETRY_FAILURE_TOTAL_ATTEMPTS,
value: String(event.total_attempts),
},
{
gemini_cli_key:
EventMetadataKey.GEMINI_CLI_CONTENT_RETRY_FAILURE_FINAL_ERROR_TYPE,
value: event.final_error_type,
},
];
if (event.total_duration_ms) {
data.push({
gemini_cli_key:
EventMetadataKey.GEMINI_CLI_CONTENT_RETRY_FAILURE_TOTAL_DURATION_MS,
value: String(event.total_duration_ms),
});
}
this.enqueueLogEvent(
this.createLogEvent(EventNames.CONTENT_RETRY_FAILURE, data),
);
this.flushIfNeeded();
}
/**
* Adds default fields to data, and returns a new data array. This fields
* should exist on all log events.

View File

@@ -229,16 +229,6 @@ export enum EventMetadataKey {
// Logs the length of the kitty sequence that overflowed.
GEMINI_CLI_KITTY_SEQUENCE_LENGTH = 53,
// ==========================================================================
// Conversation Finished Event Keys
// ===========================================================================
// Logs the approval mode of the session.
GEMINI_CLI_APPROVAL_MODE = 58,
// Logs the number of turns
GEMINI_CLI_CONVERSATION_TURN_COUNT = 59,
// Logs the number of tokens before context window compression.
GEMINI_CLI_COMPRESSION_TOKENS_BEFORE = 60,
@@ -247,72 +237,4 @@ export enum EventMetadataKey {
// Logs tool type whether it is mcp or native.
GEMINI_CLI_TOOL_TYPE = 62,
// Logs name of MCP tools as comma separated string
GEMINI_CLI_START_SESSION_MCP_TOOLS = 65,
// ==========================================================================
// Research Event Keys
// ===========================================================================
// Logs the research opt-in status (true/false)
GEMINI_CLI_RESEARCH_OPT_IN_STATUS = 66,
// Logs the contact email for research participation
GEMINI_CLI_RESEARCH_CONTACT_EMAIL = 67,
// Logs the user ID for research events
GEMINI_CLI_RESEARCH_USER_ID = 68,
// Logs the type of research feedback
GEMINI_CLI_RESEARCH_FEEDBACK_TYPE = 69,
// Logs the content of research feedback
GEMINI_CLI_RESEARCH_FEEDBACK_CONTENT = 70,
// Logs survey responses for research feedback (JSON stringified)
GEMINI_CLI_RESEARCH_SURVEY_RESPONSES = 71,
// ==========================================================================
// File Operation Event Keys
// ===========================================================================
// Logs the programming language of the project.
GEMINI_CLI_PROGRAMMING_LANGUAGE = 56,
// Logs the operation type of the file operation.
GEMINI_CLI_FILE_OPERATION_TYPE = 57,
// Logs the number of lines in the file operation.
GEMINI_CLI_FILE_OPERATION_LINES = 72,
// Logs the mimetype of the file in the file operation.
GEMINI_CLI_FILE_OPERATION_MIMETYPE = 73,
// Logs the extension of the file in the file operation.
GEMINI_CLI_FILE_OPERATION_EXTENSION = 74,
// ==========================================================================
// Content Streaming Event Keys
// ===========================================================================
// Logs the error message for an invalid chunk.
GEMINI_CLI_INVALID_CHUNK_ERROR_MESSAGE = 75,
// Logs the attempt number for a content retry.
GEMINI_CLI_CONTENT_RETRY_ATTEMPT_NUMBER = 76,
// Logs the error type for a content retry.
GEMINI_CLI_CONTENT_RETRY_ERROR_TYPE = 77,
// Logs the delay in milliseconds for a content retry.
GEMINI_CLI_CONTENT_RETRY_DELAY_MS = 78,
// Logs the total number of attempts for a content retry failure.
GEMINI_CLI_CONTENT_RETRY_FAILURE_TOTAL_ATTEMPTS = 79,
// Logs the final error type for a content retry failure.
GEMINI_CLI_CONTENT_RETRY_FAILURE_FINAL_ERROR_TYPE = 80,
// Logs the total duration in milliseconds for a content retry failure.
GEMINI_CLI_CONTENT_RETRY_FAILURE_TOTAL_DURATION_MS = 81,
}

View File

@@ -17,11 +17,6 @@ export const EVENT_NEXT_SPEAKER_CHECK = 'gemini_cli.next_speaker_check';
export const EVENT_SLASH_COMMAND = 'gemini_cli.slash_command';
export const EVENT_IDE_CONNECTION = 'gemini_cli.ide_connection';
export const EVENT_CHAT_COMPRESSION = 'gemini_cli.chat_compression';
export const EVENT_INVALID_CHUNK = 'gemini_cli.chat.invalid_chunk';
export const EVENT_CONTENT_RETRY = 'gemini_cli.chat.content_retry';
export const EVENT_CONTENT_RETRY_FAILURE =
'gemini_cli.chat.content_retry_failure';
export const METRIC_TOOL_CALL_COUNT = 'gemini_cli.tool.call.count';
export const METRIC_TOOL_CALL_LATENCY = 'gemini_cli.tool.call.latency';
export const METRIC_API_REQUEST_COUNT = 'gemini_cli.api.request.count';
@@ -29,7 +24,3 @@ export const METRIC_API_REQUEST_LATENCY = 'gemini_cli.api.request.latency';
export const METRIC_TOKEN_USAGE = 'gemini_cli.token.usage';
export const METRIC_SESSION_COUNT = 'gemini_cli.session.count';
export const METRIC_FILE_OPERATION_COUNT = 'gemini_cli.file.operation.count';
export const METRIC_INVALID_CHUNK_COUNT = 'gemini_cli.chat.invalid_chunk.count';
export const METRIC_CONTENT_RETRY_COUNT = 'gemini_cli.chat.content_retry.count';
export const METRIC_CONTENT_RETRY_FAILURE_COUNT =
'gemini_cli.chat.content_retry_failure.count';

View File

@@ -20,9 +20,6 @@ import {
SERVICE_NAME,
EVENT_SLASH_COMMAND,
EVENT_CHAT_COMPRESSION,
EVENT_INVALID_CHUNK,
EVENT_CONTENT_RETRY,
EVENT_CONTENT_RETRY_FAILURE,
} from './constants.js';
import {
ApiErrorEvent,
@@ -38,9 +35,6 @@ import {
SlashCommandEvent,
KittySequenceOverflowEvent,
ChatCompressionEvent,
InvalidChunkEvent,
ContentRetryEvent,
ContentRetryFailureEvent,
} from './types.js';
import {
recordApiErrorMetrics,
@@ -48,9 +42,6 @@ import {
recordApiResponseMetrics,
recordToolCallMetrics,
recordChatCompressionMetrics,
recordInvalidChunk,
recordContentRetry,
recordContentRetryFailure,
} from './metrics.js';
import { isTelemetrySdkInitialized } from './sdk.js';
import { uiTelemetryService, UiEvent } from './uiTelemetry.js';
@@ -435,72 +426,3 @@ export function logKittySequenceOverflow(
};
logger.emit(logRecord);
}
export function logInvalidChunk(
config: Config,
event: InvalidChunkEvent,
): void {
ClearcutLogger.getInstance(config)?.logInvalidChunkEvent(event);
if (!isTelemetrySdkInitialized()) return;
const attributes: LogAttributes = {
...getCommonAttributes(config),
'event.name': EVENT_INVALID_CHUNK,
'event.timestamp': event['event.timestamp'],
};
if (event.error_message) {
attributes['error.message'] = event.error_message;
}
const logger = logs.getLogger(SERVICE_NAME);
const logRecord: LogRecord = {
body: `Invalid chunk received from stream.`,
attributes,
};
logger.emit(logRecord);
recordInvalidChunk(config);
}
export function logContentRetry(
config: Config,
event: ContentRetryEvent,
): void {
ClearcutLogger.getInstance(config)?.logContentRetryEvent(event);
if (!isTelemetrySdkInitialized()) return;
const attributes: LogAttributes = {
...getCommonAttributes(config),
...event,
'event.name': EVENT_CONTENT_RETRY,
};
const logger = logs.getLogger(SERVICE_NAME);
const logRecord: LogRecord = {
body: `Content retry attempt ${event.attempt_number} due to ${event.error_type}.`,
attributes,
};
logger.emit(logRecord);
recordContentRetry(config);
}
export function logContentRetryFailure(
config: Config,
event: ContentRetryFailureEvent,
): void {
ClearcutLogger.getInstance(config)?.logContentRetryFailureEvent(event);
if (!isTelemetrySdkInitialized()) return;
const attributes: LogAttributes = {
...getCommonAttributes(config),
...event,
'event.name': EVENT_CONTENT_RETRY_FAILURE,
};
const logger = logs.getLogger(SERVICE_NAME);
const logRecord: LogRecord = {
body: `All content retries failed after ${event.total_attempts} attempts.`,
attributes,
};
logger.emit(logRecord);
recordContentRetryFailure(config);
}

View File

@@ -22,9 +22,6 @@ import {
METRIC_SESSION_COUNT,
METRIC_FILE_OPERATION_COUNT,
EVENT_CHAT_COMPRESSION,
METRIC_INVALID_CHUNK_COUNT,
METRIC_CONTENT_RETRY_COUNT,
METRIC_CONTENT_RETRY_FAILURE_COUNT,
} from './constants.js';
import { Config } from '../config/config.js';
import { DiffStat } from '../tools/tools.js';
@@ -43,9 +40,6 @@ let apiRequestLatencyHistogram: Histogram | undefined;
let tokenUsageCounter: Counter | undefined;
let fileOperationCounter: Counter | undefined;
let chatCompressionCounter: Counter | undefined;
let invalidChunkCounter: Counter | undefined;
let contentRetryCounter: Counter | undefined;
let contentRetryFailureCounter: Counter | undefined;
let isMetricsInitialized = false;
function getCommonAttributes(config: Config): Attributes {
@@ -100,24 +94,6 @@ export function initializeMetrics(config: Config): void {
description: 'Counts chat compression events.',
valueType: ValueType.INT,
});
// New counters for content errors
invalidChunkCounter = meter.createCounter(METRIC_INVALID_CHUNK_COUNT, {
description: 'Counts invalid chunks received from a stream.',
valueType: ValueType.INT,
});
contentRetryCounter = meter.createCounter(METRIC_CONTENT_RETRY_COUNT, {
description: 'Counts retries due to content errors (e.g., empty stream).',
valueType: ValueType.INT,
});
contentRetryFailureCounter = meter.createCounter(
METRIC_CONTENT_RETRY_FAILURE_COUNT,
{
description: 'Counts occurrences of all content retries failing.',
valueType: ValueType.INT,
},
);
const sessionCounter = meter.createCounter(METRIC_SESSION_COUNT, {
description: 'Count of CLI sessions started.',
valueType: ValueType.INT,
@@ -251,29 +227,3 @@ export function recordFileOperationMetric(
}
fileOperationCounter.add(1, attributes);
}
// --- New Metric Recording Functions ---
/**
* Records a metric for when an invalid chunk is received from a stream.
*/
export function recordInvalidChunk(config: Config): void {
if (!invalidChunkCounter || !isMetricsInitialized) return;
invalidChunkCounter.add(1, getCommonAttributes(config));
}
/**
* Records a metric for when a retry is triggered due to a content error.
*/
export function recordContentRetry(config: Config): void {
if (!contentRetryCounter || !isMetricsInitialized) return;
contentRetryCounter.add(1, getCommonAttributes(config));
}
/**
* Records a metric for when all content error retries have failed for a request.
*/
export function recordContentRetryFailure(config: Config): void {
if (!contentRetryFailureCounter || !isMetricsInitialized) return;
contentRetryFailureCounter.add(1, getCommonAttributes(config));
}

View File

@@ -385,59 +385,6 @@ export class KittySequenceOverflowEvent {
}
}
// Add these new event interfaces
export class InvalidChunkEvent implements BaseTelemetryEvent {
'event.name': 'invalid_chunk';
'event.timestamp': string;
error_message?: string; // Optional: validation error details
constructor(error_message?: string) {
this['event.name'] = 'invalid_chunk';
this['event.timestamp'] = new Date().toISOString();
this.error_message = error_message;
}
}
export class ContentRetryEvent implements BaseTelemetryEvent {
'event.name': 'content_retry';
'event.timestamp': string;
attempt_number: number;
error_type: string; // e.g., 'EmptyStreamError'
retry_delay_ms: number;
constructor(
attempt_number: number,
error_type: string,
retry_delay_ms: number,
) {
this['event.name'] = 'content_retry';
this['event.timestamp'] = new Date().toISOString();
this.attempt_number = attempt_number;
this.error_type = error_type;
this.retry_delay_ms = retry_delay_ms;
}
}
export class ContentRetryFailureEvent implements BaseTelemetryEvent {
'event.name': 'content_retry_failure';
'event.timestamp': string;
total_attempts: number;
final_error_type: string;
total_duration_ms?: number; // Optional: total time spent retrying
constructor(
total_attempts: number,
final_error_type: string,
total_duration_ms?: number,
) {
this['event.name'] = 'content_retry_failure';
this['event.timestamp'] = new Date().toISOString();
this.total_attempts = total_attempts;
this.final_error_type = final_error_type;
this.total_duration_ms = total_duration_ms;
}
}
export type TelemetryEvent =
| StartSessionEvent
| EndSessionEvent
@@ -452,7 +399,4 @@ export type TelemetryEvent =
| KittySequenceOverflowEvent
| MalformedJsonResponseEvent
| IdeConnectionEvent
| SlashCommandEvent
| InvalidChunkEvent
| ContentRetryEvent
| ContentRetryFailureEvent;
| SlashCommandEvent;