[Part 3/6] feat(telemetry): enhance metrics with performance monitoring APIs (#8113)

This commit is contained in:
Adrian Arribas
2025-09-25 18:12:46 +02:00
committed by GitHub
parent 135d3401cd
commit f80eb71068
4 changed files with 1015 additions and 3 deletions
+351 -1
View File
@@ -5,7 +5,7 @@
*/
import type { Attributes, Meter, Counter, Histogram } from '@opentelemetry/api';
import { metrics, ValueType } from '@opentelemetry/api';
import { diag, metrics, ValueType } from '@opentelemetry/api';
import {
SERVICE_NAME,
METRIC_TOOL_CALL_COUNT,
@@ -22,6 +22,18 @@ import {
METRIC_MODEL_ROUTING_LATENCY,
METRIC_MODEL_ROUTING_FAILURE_COUNT,
METRIC_MODEL_SLASH_COMMAND_CALL_COUNT,
// Performance Monitoring Metrics
METRIC_STARTUP_TIME,
METRIC_MEMORY_USAGE,
METRIC_CPU_USAGE,
METRIC_TOOL_QUEUE_DEPTH,
METRIC_TOOL_EXECUTION_BREAKDOWN,
METRIC_TOKEN_EFFICIENCY,
METRIC_API_REQUEST_BREAKDOWN,
METRIC_PERFORMANCE_SCORE,
METRIC_REGRESSION_DETECTION,
METRIC_REGRESSION_PERCENTAGE_CHANGE,
METRIC_BASELINE_COMPARISON,
} from './constants.js';
import type { Config } from '../config/config.js';
import type { ModelRoutingEvent, ModelSlashCommandEvent } from './types.js';
@@ -32,6 +44,36 @@ export enum FileOperation {
UPDATE = 'update',
}
export enum PerformanceMetricType {
STARTUP = 'startup',
MEMORY = 'memory',
CPU = 'cpu',
TOOL_EXECUTION = 'tool_execution',
API_REQUEST = 'api_request',
TOKEN_EFFICIENCY = 'token_efficiency',
}
export enum MemoryMetricType {
HEAP_USED = 'heap_used',
HEAP_TOTAL = 'heap_total',
EXTERNAL = 'external',
RSS = 'rss',
}
export enum ToolExecutionPhase {
VALIDATION = 'validation',
PREPARATION = 'preparation',
EXECUTION = 'execution',
RESULT_PROCESSING = 'result_processing',
}
export enum ApiRequestPhase {
REQUEST_PREPARATION = 'request_preparation',
NETWORK_LATENCY = 'network_latency',
RESPONSE_PROCESSING = 'response_processing',
TOKEN_PROCESSING = 'token_processing',
}
let cliMeter: Meter | undefined;
let toolCallCounter: Counter | undefined;
let toolCallLatencyHistogram: Histogram | undefined;
@@ -46,7 +88,21 @@ let contentRetryFailureCounter: Counter | undefined;
let modelRoutingLatencyHistogram: Histogram | undefined;
let modelRoutingFailureCounter: Counter | undefined;
let modelSlashCommandCallCounter: Counter | undefined;
// Performance Monitoring Metrics
let startupTimeHistogram: Histogram | undefined;
let memoryUsageGauge: Histogram | undefined; // Using Histogram until ObservableGauge is available
let cpuUsageGauge: Histogram | undefined;
let toolQueueDepthGauge: Histogram | undefined;
let toolExecutionBreakdownHistogram: Histogram | undefined;
let tokenEfficiencyHistogram: Histogram | undefined;
let apiRequestBreakdownHistogram: Histogram | undefined;
let performanceScoreGauge: Histogram | undefined;
let regressionDetectionCounter: Counter | undefined;
let regressionPercentageChangeHistogram: Histogram | undefined;
let baselineComparisonHistogram: Histogram | undefined;
let isMetricsInitialized = false;
let isPerformanceMonitoringEnabled = false;
function getCommonAttributes(config: Config): Attributes {
return {
@@ -67,6 +123,7 @@ export function initializeMetrics(config: Config): void {
const meter = getMeter();
if (!meter) return;
// Initialize core metrics
toolCallCounter = meter.createCounter(METRIC_TOOL_CALL_COUNT, {
description: 'Counts tool calls, tagged by function name and success.',
valueType: ValueType.INT,
@@ -145,6 +202,10 @@ export function initializeMetrics(config: Config): void {
valueType: ValueType.INT,
});
sessionCounter.add(1, getCommonAttributes(config));
// Initialize performance monitoring metrics if enabled
initializePerformanceMonitoring(config);
isMetricsInitialized = true;
}
@@ -332,3 +393,292 @@ export function recordModelRoutingMetrics(
});
}
}
// Performance Monitoring Functions
export function initializePerformanceMonitoring(config: Config): void {
const meter = getMeter();
if (!meter) return;
// Check if performance monitoring is enabled in config
// For now, enable performance monitoring when telemetry is enabled
// TODO: Add specific performance monitoring settings to config
isPerformanceMonitoringEnabled = config.getTelemetryEnabled();
if (!isPerformanceMonitoringEnabled) return;
// Initialize startup time histogram
startupTimeHistogram = meter.createHistogram(METRIC_STARTUP_TIME, {
description:
'CLI startup time in milliseconds, broken down by initialization phase.',
unit: 'ms',
valueType: ValueType.DOUBLE,
});
// Initialize memory usage histogram (using histogram until ObservableGauge is available)
memoryUsageGauge = meter.createHistogram(METRIC_MEMORY_USAGE, {
description: 'Memory usage in bytes.',
unit: 'bytes',
valueType: ValueType.INT,
});
// Initialize CPU usage histogram
cpuUsageGauge = meter.createHistogram(METRIC_CPU_USAGE, {
description: 'CPU usage percentage.',
unit: 'percent',
valueType: ValueType.DOUBLE,
});
// Initialize tool queue depth histogram
toolQueueDepthGauge = meter.createHistogram(METRIC_TOOL_QUEUE_DEPTH, {
description: 'Number of tools in execution queue.',
valueType: ValueType.INT,
});
// Initialize performance breakdowns
toolExecutionBreakdownHistogram = meter.createHistogram(
METRIC_TOOL_EXECUTION_BREAKDOWN,
{
description: 'Tool execution time breakdown by phase in milliseconds.',
unit: 'ms',
valueType: ValueType.INT,
},
);
tokenEfficiencyHistogram = meter.createHistogram(METRIC_TOKEN_EFFICIENCY, {
description:
'Token efficiency metrics (tokens per operation, cache hit rate, etc.).',
valueType: ValueType.DOUBLE,
});
apiRequestBreakdownHistogram = meter.createHistogram(
METRIC_API_REQUEST_BREAKDOWN,
{
description: 'API request time breakdown by phase in milliseconds.',
unit: 'ms',
valueType: ValueType.INT,
},
);
// Initialize performance score and regression detection
performanceScoreGauge = meter.createHistogram(METRIC_PERFORMANCE_SCORE, {
description: 'Composite performance score (0-100).',
unit: 'score',
valueType: ValueType.DOUBLE,
});
regressionDetectionCounter = meter.createCounter(
METRIC_REGRESSION_DETECTION,
{
description: 'Performance regression detection events.',
valueType: ValueType.INT,
},
);
regressionPercentageChangeHistogram = meter.createHistogram(
METRIC_REGRESSION_PERCENTAGE_CHANGE,
{
description:
'Percentage change compared to baseline for detected regressions.',
unit: 'percent',
valueType: ValueType.DOUBLE,
},
);
baselineComparisonHistogram = meter.createHistogram(
METRIC_BASELINE_COMPARISON,
{
description:
'Performance comparison to established baseline (percentage change).',
unit: 'percent',
valueType: ValueType.DOUBLE,
},
);
}
export function recordStartupPerformance(
config: Config,
phase: string,
durationMs: number,
details?: Record<string, string | number | boolean>,
): void {
if (!startupTimeHistogram || !isPerformanceMonitoringEnabled) return;
const attributes: Attributes = {
...getCommonAttributes(config),
phase,
...details,
};
startupTimeHistogram.record(durationMs, attributes);
}
export function recordMemoryUsage(
config: Config,
memoryType: MemoryMetricType,
bytes: number,
component?: string,
): void {
if (!memoryUsageGauge || !isPerformanceMonitoringEnabled) return;
const attributes: Attributes = {
...getCommonAttributes(config),
memory_type: memoryType,
component,
};
memoryUsageGauge.record(bytes, attributes);
}
export function recordCpuUsage(
config: Config,
percentage: number,
component?: string,
): void {
if (!cpuUsageGauge || !isPerformanceMonitoringEnabled) return;
const attributes: Attributes = {
...getCommonAttributes(config),
component,
};
cpuUsageGauge.record(percentage, attributes);
}
export function recordToolQueueDepth(config: Config, queueDepth: number): void {
if (!toolQueueDepthGauge || !isPerformanceMonitoringEnabled) return;
const attributes: Attributes = {
...getCommonAttributes(config),
};
toolQueueDepthGauge.record(queueDepth, attributes);
}
export function recordToolExecutionBreakdown(
config: Config,
functionName: string,
phase: ToolExecutionPhase,
durationMs: number,
): void {
if (!toolExecutionBreakdownHistogram || !isPerformanceMonitoringEnabled)
return;
const attributes: Attributes = {
...getCommonAttributes(config),
function_name: functionName,
phase,
};
toolExecutionBreakdownHistogram.record(durationMs, attributes);
}
export function recordTokenEfficiency(
config: Config,
model: string,
metric: string,
value: number,
context?: string,
): void {
if (!tokenEfficiencyHistogram || !isPerformanceMonitoringEnabled) return;
const attributes: Attributes = {
...getCommonAttributes(config),
model,
metric,
context,
};
tokenEfficiencyHistogram.record(value, attributes);
}
export function recordApiRequestBreakdown(
config: Config,
model: string,
phase: ApiRequestPhase,
durationMs: number,
): void {
if (!apiRequestBreakdownHistogram || !isPerformanceMonitoringEnabled) return;
const attributes: Attributes = {
...getCommonAttributes(config),
model,
phase,
};
apiRequestBreakdownHistogram.record(durationMs, attributes);
}
export function recordPerformanceScore(
config: Config,
score: number,
category: string,
baseline?: number,
): void {
if (!performanceScoreGauge || !isPerformanceMonitoringEnabled) return;
const attributes: Attributes = {
...getCommonAttributes(config),
category,
baseline,
};
performanceScoreGauge.record(score, attributes);
}
export function recordPerformanceRegression(
config: Config,
metric: string,
currentValue: number,
baselineValue: number,
severity: 'low' | 'medium' | 'high',
): void {
if (!regressionDetectionCounter || !isPerformanceMonitoringEnabled) return;
const attributes: Attributes = {
...getCommonAttributes(config),
metric,
severity,
current_value: currentValue,
baseline_value: baselineValue,
};
regressionDetectionCounter.add(1, attributes);
if (baselineValue !== 0 && regressionPercentageChangeHistogram) {
const percentageChange =
((currentValue - baselineValue) / baselineValue) * 100;
regressionPercentageChangeHistogram.record(percentageChange, attributes);
}
}
export function recordBaselineComparison(
config: Config,
metric: string,
currentValue: number,
baselineValue: number,
category: string,
): void {
if (!baselineComparisonHistogram || !isPerformanceMonitoringEnabled) return;
if (baselineValue === 0) {
diag.warn('Baseline value is zero, skipping comparison.');
return;
}
const percentageChange =
((currentValue - baselineValue) / baselineValue) * 100;
const attributes: Attributes = {
...getCommonAttributes(config),
metric,
category,
current_value: currentValue,
baseline_value: baselineValue,
};
baselineComparisonHistogram.record(percentageChange, attributes);
}
// Utility function to check if performance monitoring is enabled
export function isPerformanceMonitoringActive(): boolean {
return isPerformanceMonitoringEnabled && isMetricsInitialized;
}