2025-06-05 16:04:25 -04:00
|
|
|
/**
|
|
|
|
|
* @license
|
|
|
|
|
* Copyright 2025 Google LLC
|
|
|
|
|
* SPDX-License-Identifier: Apache-2.0
|
|
|
|
|
*/
|
|
|
|
|
|
2025-08-26 00:04:53 +02:00
|
|
|
import type { Attributes, Meter, Counter, Histogram } from '@opentelemetry/api';
|
|
|
|
|
import { metrics, ValueType } from '@opentelemetry/api';
|
2025-06-05 16:04:25 -04:00
|
|
|
import {
|
|
|
|
|
SERVICE_NAME,
|
|
|
|
|
METRIC_TOOL_CALL_COUNT,
|
|
|
|
|
METRIC_TOOL_CALL_LATENCY,
|
|
|
|
|
METRIC_API_REQUEST_COUNT,
|
|
|
|
|
METRIC_API_REQUEST_LATENCY,
|
2025-06-11 06:56:53 +00:00
|
|
|
METRIC_TOKEN_USAGE,
|
2025-06-05 16:04:25 -04:00
|
|
|
METRIC_SESSION_COUNT,
|
2025-06-15 16:24:53 -04:00
|
|
|
METRIC_FILE_OPERATION_COUNT,
|
2025-08-18 15:59:13 -04:00
|
|
|
EVENT_CHAT_COMPRESSION,
|
2025-08-22 19:06:29 -04:00
|
|
|
METRIC_INVALID_CHUNK_COUNT,
|
|
|
|
|
METRIC_CONTENT_RETRY_COUNT,
|
|
|
|
|
METRIC_CONTENT_RETRY_FAILURE_COUNT,
|
2025-09-16 16:53:58 -04:00
|
|
|
METRIC_MODEL_ROUTING_LATENCY,
|
|
|
|
|
METRIC_MODEL_ROUTING_FAILURE_COUNT,
|
2025-09-23 18:06:03 -04:00
|
|
|
METRIC_MODEL_SLASH_COMMAND_CALL_COUNT,
|
2025-06-05 16:04:25 -04:00
|
|
|
} from './constants.js';
|
2025-08-26 00:04:53 +02:00
|
|
|
import type { Config } from '../config/config.js';
|
2025-09-23 18:06:03 -04:00
|
|
|
import type { ModelRoutingEvent, ModelSlashCommandEvent } from './types.js';
|
2025-06-05 16:04:25 -04:00
|
|
|
|
2025-06-15 16:24:53 -04:00
|
|
|
export enum FileOperation {
|
|
|
|
|
CREATE = 'create',
|
|
|
|
|
READ = 'read',
|
|
|
|
|
UPDATE = 'update',
|
|
|
|
|
}
|
|
|
|
|
|
2025-06-05 16:04:25 -04:00
|
|
|
let cliMeter: Meter | undefined;
|
|
|
|
|
let toolCallCounter: Counter | undefined;
|
|
|
|
|
let toolCallLatencyHistogram: Histogram | undefined;
|
|
|
|
|
let apiRequestCounter: Counter | undefined;
|
|
|
|
|
let apiRequestLatencyHistogram: Histogram | undefined;
|
2025-06-11 06:56:53 +00:00
|
|
|
let tokenUsageCounter: Counter | undefined;
|
2025-06-15 16:24:53 -04:00
|
|
|
let fileOperationCounter: Counter | undefined;
|
2025-08-18 15:59:13 -04:00
|
|
|
let chatCompressionCounter: Counter | undefined;
|
2025-08-22 19:06:29 -04:00
|
|
|
let invalidChunkCounter: Counter | undefined;
|
|
|
|
|
let contentRetryCounter: Counter | undefined;
|
|
|
|
|
let contentRetryFailureCounter: Counter | undefined;
|
2025-09-16 16:53:58 -04:00
|
|
|
let modelRoutingLatencyHistogram: Histogram | undefined;
|
|
|
|
|
let modelRoutingFailureCounter: Counter | undefined;
|
2025-09-23 18:06:03 -04:00
|
|
|
let modelSlashCommandCallCounter: Counter | undefined;
|
2025-06-05 16:04:25 -04:00
|
|
|
let isMetricsInitialized = false;
|
|
|
|
|
|
2025-06-11 16:50:24 +00:00
|
|
|
function getCommonAttributes(config: Config): Attributes {
|
|
|
|
|
return {
|
|
|
|
|
'session.id': config.getSessionId(),
|
|
|
|
|
};
|
|
|
|
|
}
|
|
|
|
|
|
2025-06-05 16:04:25 -04:00
|
|
|
export function getMeter(): Meter | undefined {
|
|
|
|
|
if (!cliMeter) {
|
|
|
|
|
cliMeter = metrics.getMeter(SERVICE_NAME);
|
|
|
|
|
}
|
|
|
|
|
return cliMeter;
|
|
|
|
|
}
|
|
|
|
|
|
2025-06-11 16:50:24 +00:00
|
|
|
export function initializeMetrics(config: Config): void {
|
2025-06-05 16:04:25 -04:00
|
|
|
if (isMetricsInitialized) return;
|
|
|
|
|
|
|
|
|
|
const meter = getMeter();
|
|
|
|
|
if (!meter) return;
|
|
|
|
|
|
|
|
|
|
toolCallCounter = meter.createCounter(METRIC_TOOL_CALL_COUNT, {
|
|
|
|
|
description: 'Counts tool calls, tagged by function name and success.',
|
|
|
|
|
valueType: ValueType.INT,
|
|
|
|
|
});
|
|
|
|
|
toolCallLatencyHistogram = meter.createHistogram(METRIC_TOOL_CALL_LATENCY, {
|
|
|
|
|
description: 'Latency of tool calls in milliseconds.',
|
|
|
|
|
unit: 'ms',
|
|
|
|
|
valueType: ValueType.INT,
|
|
|
|
|
});
|
|
|
|
|
apiRequestCounter = meter.createCounter(METRIC_API_REQUEST_COUNT, {
|
|
|
|
|
description: 'Counts API requests, tagged by model and status.',
|
|
|
|
|
valueType: ValueType.INT,
|
|
|
|
|
});
|
|
|
|
|
apiRequestLatencyHistogram = meter.createHistogram(
|
|
|
|
|
METRIC_API_REQUEST_LATENCY,
|
|
|
|
|
{
|
|
|
|
|
description: 'Latency of API requests in milliseconds.',
|
|
|
|
|
unit: 'ms',
|
|
|
|
|
valueType: ValueType.INT,
|
|
|
|
|
},
|
|
|
|
|
);
|
2025-06-11 06:56:53 +00:00
|
|
|
tokenUsageCounter = meter.createCounter(METRIC_TOKEN_USAGE, {
|
|
|
|
|
description: 'Counts the total number of tokens used.',
|
2025-06-05 16:04:25 -04:00
|
|
|
valueType: ValueType.INT,
|
|
|
|
|
});
|
2025-06-15 16:24:53 -04:00
|
|
|
fileOperationCounter = meter.createCounter(METRIC_FILE_OPERATION_COUNT, {
|
|
|
|
|
description: 'Counts file operations (create, read, update).',
|
|
|
|
|
valueType: ValueType.INT,
|
|
|
|
|
});
|
2025-08-18 15:59:13 -04:00
|
|
|
chatCompressionCounter = meter.createCounter(EVENT_CHAT_COMPRESSION, {
|
|
|
|
|
description: 'Counts chat compression events.',
|
|
|
|
|
valueType: ValueType.INT,
|
|
|
|
|
});
|
2025-08-22 19:06:29 -04:00
|
|
|
|
|
|
|
|
// New counters for content errors
|
|
|
|
|
invalidChunkCounter = meter.createCounter(METRIC_INVALID_CHUNK_COUNT, {
|
|
|
|
|
description: 'Counts invalid chunks received from a stream.',
|
|
|
|
|
valueType: ValueType.INT,
|
|
|
|
|
});
|
|
|
|
|
contentRetryCounter = meter.createCounter(METRIC_CONTENT_RETRY_COUNT, {
|
|
|
|
|
description: 'Counts retries due to content errors (e.g., empty stream).',
|
|
|
|
|
valueType: ValueType.INT,
|
|
|
|
|
});
|
|
|
|
|
contentRetryFailureCounter = meter.createCounter(
|
|
|
|
|
METRIC_CONTENT_RETRY_FAILURE_COUNT,
|
|
|
|
|
{
|
|
|
|
|
description: 'Counts occurrences of all content retries failing.',
|
|
|
|
|
valueType: ValueType.INT,
|
|
|
|
|
},
|
|
|
|
|
);
|
2025-09-16 16:53:58 -04:00
|
|
|
modelRoutingLatencyHistogram = meter.createHistogram(
|
|
|
|
|
METRIC_MODEL_ROUTING_LATENCY,
|
|
|
|
|
{
|
|
|
|
|
description: 'Latency of model routing decisions in milliseconds.',
|
|
|
|
|
unit: 'ms',
|
|
|
|
|
valueType: ValueType.INT,
|
|
|
|
|
},
|
|
|
|
|
);
|
|
|
|
|
modelRoutingFailureCounter = meter.createCounter(
|
|
|
|
|
METRIC_MODEL_ROUTING_FAILURE_COUNT,
|
|
|
|
|
{
|
|
|
|
|
description: 'Counts model routing failures.',
|
|
|
|
|
valueType: ValueType.INT,
|
|
|
|
|
},
|
|
|
|
|
);
|
2025-09-23 18:06:03 -04:00
|
|
|
modelSlashCommandCallCounter = meter.createCounter(
|
|
|
|
|
METRIC_MODEL_SLASH_COMMAND_CALL_COUNT,
|
|
|
|
|
{
|
|
|
|
|
description: 'Counts model slash command calls.',
|
|
|
|
|
valueType: ValueType.INT,
|
|
|
|
|
},
|
|
|
|
|
);
|
2025-08-22 19:06:29 -04:00
|
|
|
|
2025-06-05 16:04:25 -04:00
|
|
|
const sessionCounter = meter.createCounter(METRIC_SESSION_COUNT, {
|
|
|
|
|
description: 'Count of CLI sessions started.',
|
|
|
|
|
valueType: ValueType.INT,
|
|
|
|
|
});
|
2025-06-11 16:50:24 +00:00
|
|
|
sessionCounter.add(1, getCommonAttributes(config));
|
2025-06-05 16:04:25 -04:00
|
|
|
isMetricsInitialized = true;
|
|
|
|
|
}
|
|
|
|
|
|
2025-08-18 15:59:13 -04:00
|
|
|
export function recordChatCompressionMetrics(
|
|
|
|
|
config: Config,
|
|
|
|
|
args: { tokens_before: number; tokens_after: number },
|
|
|
|
|
) {
|
|
|
|
|
if (!chatCompressionCounter || !isMetricsInitialized) return;
|
|
|
|
|
chatCompressionCounter.add(1, {
|
|
|
|
|
...getCommonAttributes(config),
|
|
|
|
|
...args,
|
|
|
|
|
});
|
|
|
|
|
}
|
|
|
|
|
|
2025-06-05 16:04:25 -04:00
|
|
|
export function recordToolCallMetrics(
|
2025-06-11 16:50:24 +00:00
|
|
|
config: Config,
|
2025-06-05 16:04:25 -04:00
|
|
|
functionName: string,
|
|
|
|
|
durationMs: number,
|
|
|
|
|
success: boolean,
|
2025-08-06 23:16:42 +05:30
|
|
|
decision?: 'accept' | 'reject' | 'modify' | 'auto_accept',
|
2025-08-19 10:55:47 +05:30
|
|
|
tool_type?: 'native' | 'mcp',
|
2025-06-05 16:04:25 -04:00
|
|
|
): void {
|
|
|
|
|
if (!toolCallCounter || !toolCallLatencyHistogram || !isMetricsInitialized)
|
|
|
|
|
return;
|
|
|
|
|
|
|
|
|
|
const metricAttributes: Attributes = {
|
2025-06-11 16:50:24 +00:00
|
|
|
...getCommonAttributes(config),
|
2025-06-05 16:04:25 -04:00
|
|
|
function_name: functionName,
|
|
|
|
|
success,
|
2025-06-12 16:48:10 -04:00
|
|
|
decision,
|
2025-08-19 10:55:47 +05:30
|
|
|
tool_type,
|
2025-06-05 16:04:25 -04:00
|
|
|
};
|
|
|
|
|
toolCallCounter.add(1, metricAttributes);
|
|
|
|
|
toolCallLatencyHistogram.record(durationMs, {
|
2025-06-11 16:50:24 +00:00
|
|
|
...getCommonAttributes(config),
|
2025-06-05 16:04:25 -04:00
|
|
|
function_name: functionName,
|
|
|
|
|
});
|
|
|
|
|
}
|
|
|
|
|
|
2025-06-11 06:56:53 +00:00
|
|
|
export function recordTokenUsageMetrics(
|
2025-06-11 16:50:24 +00:00
|
|
|
config: Config,
|
2025-06-05 16:04:25 -04:00
|
|
|
model: string,
|
2025-06-11 06:56:53 +00:00
|
|
|
tokenCount: number,
|
|
|
|
|
type: 'input' | 'output' | 'thought' | 'cache' | 'tool',
|
2025-06-05 16:04:25 -04:00
|
|
|
): void {
|
2025-06-11 06:56:53 +00:00
|
|
|
if (!tokenUsageCounter || !isMetricsInitialized) return;
|
2025-06-11 16:50:24 +00:00
|
|
|
tokenUsageCounter.add(tokenCount, {
|
|
|
|
|
...getCommonAttributes(config),
|
|
|
|
|
model,
|
|
|
|
|
type,
|
|
|
|
|
});
|
2025-06-05 16:04:25 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
export function recordApiResponseMetrics(
|
2025-06-11 16:50:24 +00:00
|
|
|
config: Config,
|
2025-06-05 16:04:25 -04:00
|
|
|
model: string,
|
|
|
|
|
durationMs: number,
|
|
|
|
|
statusCode?: number | string,
|
|
|
|
|
): void {
|
|
|
|
|
if (
|
|
|
|
|
!apiRequestCounter ||
|
|
|
|
|
!apiRequestLatencyHistogram ||
|
|
|
|
|
!isMetricsInitialized
|
|
|
|
|
)
|
|
|
|
|
return;
|
|
|
|
|
const metricAttributes: Attributes = {
|
2025-06-11 16:50:24 +00:00
|
|
|
...getCommonAttributes(config),
|
2025-06-05 16:04:25 -04:00
|
|
|
model,
|
2025-09-11 13:35:17 -04:00
|
|
|
status_code: statusCode ?? 'ok',
|
2025-06-05 16:04:25 -04:00
|
|
|
};
|
|
|
|
|
apiRequestCounter.add(1, metricAttributes);
|
2025-06-11 16:50:24 +00:00
|
|
|
apiRequestLatencyHistogram.record(durationMs, {
|
|
|
|
|
...getCommonAttributes(config),
|
|
|
|
|
model,
|
|
|
|
|
});
|
2025-06-05 16:04:25 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
export function recordApiErrorMetrics(
|
2025-06-11 16:50:24 +00:00
|
|
|
config: Config,
|
2025-06-05 16:04:25 -04:00
|
|
|
model: string,
|
|
|
|
|
durationMs: number,
|
|
|
|
|
statusCode?: number | string,
|
|
|
|
|
errorType?: string,
|
|
|
|
|
): void {
|
|
|
|
|
if (
|
|
|
|
|
!apiRequestCounter ||
|
|
|
|
|
!apiRequestLatencyHistogram ||
|
|
|
|
|
!isMetricsInitialized
|
|
|
|
|
)
|
|
|
|
|
return;
|
|
|
|
|
const metricAttributes: Attributes = {
|
2025-06-11 16:50:24 +00:00
|
|
|
...getCommonAttributes(config),
|
2025-06-05 16:04:25 -04:00
|
|
|
model,
|
|
|
|
|
status_code: statusCode ?? 'error',
|
|
|
|
|
error_type: errorType ?? 'unknown',
|
|
|
|
|
};
|
|
|
|
|
apiRequestCounter.add(1, metricAttributes);
|
2025-06-11 16:50:24 +00:00
|
|
|
apiRequestLatencyHistogram.record(durationMs, {
|
|
|
|
|
...getCommonAttributes(config),
|
|
|
|
|
model,
|
|
|
|
|
});
|
2025-06-05 16:04:25 -04:00
|
|
|
}
|
2025-06-15 16:24:53 -04:00
|
|
|
|
|
|
|
|
export function recordFileOperationMetric(
|
|
|
|
|
config: Config,
|
|
|
|
|
operation: FileOperation,
|
|
|
|
|
lines?: number,
|
|
|
|
|
mimetype?: string,
|
|
|
|
|
extension?: string,
|
2025-08-22 17:47:32 +05:30
|
|
|
programming_language?: string,
|
2025-06-15 16:24:53 -04:00
|
|
|
): void {
|
|
|
|
|
if (!fileOperationCounter || !isMetricsInitialized) return;
|
|
|
|
|
const attributes: Attributes = {
|
|
|
|
|
...getCommonAttributes(config),
|
|
|
|
|
operation,
|
|
|
|
|
};
|
2025-08-17 12:43:21 -04:00
|
|
|
if (lines !== undefined) attributes['lines'] = lines;
|
|
|
|
|
if (mimetype !== undefined) attributes['mimetype'] = mimetype;
|
|
|
|
|
if (extension !== undefined) attributes['extension'] = extension;
|
2025-08-22 17:47:32 +05:30
|
|
|
if (programming_language !== undefined) {
|
|
|
|
|
attributes['programming_language'] = programming_language;
|
|
|
|
|
}
|
2025-06-15 16:24:53 -04:00
|
|
|
fileOperationCounter.add(1, attributes);
|
|
|
|
|
}
|
2025-08-22 19:06:29 -04:00
|
|
|
|
|
|
|
|
// --- New Metric Recording Functions ---
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Records a metric for when an invalid chunk is received from a stream.
|
|
|
|
|
*/
|
|
|
|
|
export function recordInvalidChunk(config: Config): void {
|
|
|
|
|
if (!invalidChunkCounter || !isMetricsInitialized) return;
|
|
|
|
|
invalidChunkCounter.add(1, getCommonAttributes(config));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Records a metric for when a retry is triggered due to a content error.
|
|
|
|
|
*/
|
|
|
|
|
export function recordContentRetry(config: Config): void {
|
|
|
|
|
if (!contentRetryCounter || !isMetricsInitialized) return;
|
|
|
|
|
contentRetryCounter.add(1, getCommonAttributes(config));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Records a metric for when all content error retries have failed for a request.
|
|
|
|
|
*/
|
|
|
|
|
export function recordContentRetryFailure(config: Config): void {
|
|
|
|
|
if (!contentRetryFailureCounter || !isMetricsInitialized) return;
|
|
|
|
|
contentRetryFailureCounter.add(1, getCommonAttributes(config));
|
|
|
|
|
}
|
2025-09-16 16:53:58 -04:00
|
|
|
|
2025-09-23 18:06:03 -04:00
|
|
|
export function recordModelSlashCommand(
|
|
|
|
|
config: Config,
|
|
|
|
|
event: ModelSlashCommandEvent,
|
|
|
|
|
): void {
|
|
|
|
|
if (!modelSlashCommandCallCounter || !isMetricsInitialized) return;
|
|
|
|
|
modelSlashCommandCallCounter.add(1, {
|
|
|
|
|
...getCommonAttributes(config),
|
|
|
|
|
'slash_command.model.model_name': event.model_name,
|
|
|
|
|
});
|
|
|
|
|
}
|
|
|
|
|
|
2025-09-16 16:53:58 -04:00
|
|
|
export function recordModelRoutingMetrics(
|
|
|
|
|
config: Config,
|
|
|
|
|
event: ModelRoutingEvent,
|
|
|
|
|
): void {
|
|
|
|
|
if (
|
|
|
|
|
!modelRoutingLatencyHistogram ||
|
|
|
|
|
!modelRoutingFailureCounter ||
|
|
|
|
|
!isMetricsInitialized
|
|
|
|
|
)
|
|
|
|
|
return;
|
|
|
|
|
|
|
|
|
|
modelRoutingLatencyHistogram.record(event.routing_latency_ms, {
|
|
|
|
|
...getCommonAttributes(config),
|
|
|
|
|
'routing.decision_model': event.decision_model,
|
|
|
|
|
'routing.decision_source': event.decision_source,
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
if (event.failed) {
|
|
|
|
|
modelRoutingFailureCounter.add(1, {
|
|
|
|
|
...getCommonAttributes(config),
|
|
|
|
|
'routing.decision_source': event.decision_source,
|
|
|
|
|
'routing.error_message': event.error_message,
|
|
|
|
|
});
|
|
|
|
|
}
|
|
|
|
|
}
|