mirror of
https://github.com/google-gemini/gemini-cli.git
synced 2026-03-10 14:10:37 -07:00
feat(telemetry): add OpenTelemetry GenAI semantic convention metrics (#10343)
This commit is contained in:
@@ -16,6 +16,8 @@ Learn how to enable and setup OpenTelemetry for Gemini CLI.
|
||||
- [Logs and Metrics](#logs-and-metrics)
|
||||
- [Logs](#logs)
|
||||
- [Metrics](#metrics)
|
||||
- [Custom](#custom)
|
||||
- [GenAI Semantic Convention](#genai-semantic-convention)
|
||||
|
||||
## Key Benefits
|
||||
|
||||
@@ -322,7 +324,9 @@ for Gemini CLI:
|
||||
|
||||
### Metrics
|
||||
|
||||
Metrics are numerical measurements of behavior over time. The following metrics are collected for Gemini CLI:
|
||||
Metrics are numerical measurements of behavior over time.
|
||||
|
||||
#### Custom
|
||||
|
||||
- `gemini_cli.session.count` (Counter, Int): Incremented once per CLI startup.
|
||||
|
||||
@@ -347,11 +351,16 @@ Metrics are numerical measurements of behavior over time. The following metrics
|
||||
- `gemini_cli.api.request.latency` (Histogram, ms): Measures API request latency.
|
||||
- **Attributes**:
|
||||
- `model`
|
||||
- **Note**: This metric overlaps with `gen_ai.client.operation.duration` below
|
||||
that's compliant with GenAI Semantic Conventions.
|
||||
|
||||
- `gemini_cli.token.usage` (Counter, Int): Counts the number of tokens used.
|
||||
- **Attributes**:
|
||||
- `model`
|
||||
- `type` (string: "input", "output", "thought", "cache", or "tool")
|
||||
- **Note**: This metric overlaps with `gen_ai.client.token.usage` below for
|
||||
`input`/`output` token types that's compliant with GenAI Semantic
|
||||
Conventions.
|
||||
|
||||
- `gemini_cli.file.operation.count` (Counter, Int): Counts file operations.
|
||||
- **Attributes**:
|
||||
@@ -369,3 +378,30 @@ Metrics are numerical measurements of behavior over time. The following metrics
|
||||
- **Attributes**:
|
||||
- `tokens_before`: (Int): Number of tokens in context prior to compression
|
||||
- `tokens_after`: (Int): Number of tokens in context after compression
|
||||
|
||||
#### GenAI Semantic Convention
|
||||
|
||||
The following metrics comply with [OpenTelemetry GenAI semantic conventions]
|
||||
for standardized observability across GenAI applications:
|
||||
|
||||
- `gen_ai.client.token.usage` (Histogram, token): Number of input and output tokens used per operation.
|
||||
- **Attributes**:
|
||||
- `gen_ai.operation.name` (string): The operation type (e.g., "generate_content", "chat")
|
||||
- `gen_ai.provider.name` (string): The GenAI provider ("gcp.gen_ai" or "gcp.vertex_ai")
|
||||
- `gen_ai.token.type` (string): The token type ("input" or "output")
|
||||
- `gen_ai.request.model` (string, optional): The model name used for the request
|
||||
- `gen_ai.response.model` (string, optional): The model name that generated the response
|
||||
- `server.address` (string, optional): GenAI server address
|
||||
- `server.port` (int, optional): GenAI server port
|
||||
|
||||
- `gen_ai.client.operation.duration` (Histogram, s): GenAI operation duration in seconds.
|
||||
- **Attributes**:
|
||||
- `gen_ai.operation.name` (string): The operation type (e.g., "generate_content", "chat")
|
||||
- `gen_ai.provider.name` (string): The GenAI provider ("gcp.gen_ai" or "gcp.vertex_ai")
|
||||
- `gen_ai.request.model` (string, optional): The model name used for the request
|
||||
- `gen_ai.response.model` (string, optional): The model name that generated the response
|
||||
- `server.address` (string, optional): GenAI server address
|
||||
- `server.port` (int, optional): GenAI server port
|
||||
- `error.type` (string, optional): Error type if the operation failed
|
||||
|
||||
[OpenTelemetry GenAI semantic conventions]: https://github.com/open-telemetry/semantic-conventions/blob/main/docs/gen-ai/gen-ai-metrics.md
|
||||
|
||||
@@ -85,6 +85,13 @@ export {
|
||||
recordContentRetry,
|
||||
recordContentRetryFailure,
|
||||
recordModelRoutingMetrics,
|
||||
// Custom metrics for token usage and API responses
|
||||
recordCustomTokenUsageMetrics,
|
||||
recordCustomApiResponseMetrics,
|
||||
// OpenTelemetry GenAI semantic convention for token usage and operation duration
|
||||
recordGenAiClientTokenUsage,
|
||||
recordGenAiClientOperationDuration,
|
||||
getConventionAttributes,
|
||||
// Performance monitoring functions
|
||||
recordStartupPerformance,
|
||||
recordMemoryUsage,
|
||||
@@ -103,4 +110,8 @@ export {
|
||||
ToolExecutionPhase,
|
||||
ApiRequestPhase,
|
||||
FileOperation,
|
||||
// OpenTelemetry Semantic Convention types
|
||||
GenAiOperationName,
|
||||
GenAiProviderName,
|
||||
GenAiTokenType,
|
||||
} from './metrics.js';
|
||||
|
||||
@@ -76,7 +76,11 @@ import {
|
||||
ExtensionUninstallEvent,
|
||||
} from './types.js';
|
||||
import * as metrics from './metrics.js';
|
||||
import { FileOperation } from './metrics.js';
|
||||
import {
|
||||
FileOperation,
|
||||
GenAiOperationName,
|
||||
GenAiProviderName,
|
||||
} from './metrics.js';
|
||||
import * as sdk from './sdk.js';
|
||||
import { vi, describe, beforeEach, it, expect, afterEach } from 'vitest';
|
||||
import type {
|
||||
@@ -289,6 +293,12 @@ describe('loggers', () => {
|
||||
const mockMetrics = {
|
||||
recordApiResponseMetrics: vi.fn(),
|
||||
recordTokenUsageMetrics: vi.fn(),
|
||||
getConventionAttributes: vi.fn(() => ({
|
||||
'gen_ai.operation.name': GenAiOperationName.GENERATE_CONTENT,
|
||||
'gen_ai.provider.name': GenAiProviderName.GCP_VERTEX_AI,
|
||||
'gen_ai.request.model': 'test-model',
|
||||
'gen_ai.response.model': 'test-model',
|
||||
})),
|
||||
};
|
||||
|
||||
beforeEach(() => {
|
||||
@@ -298,6 +308,9 @@ describe('loggers', () => {
|
||||
vi.spyOn(metrics, 'recordTokenUsageMetrics').mockImplementation(
|
||||
mockMetrics.recordTokenUsageMetrics,
|
||||
);
|
||||
vi.spyOn(metrics, 'getConventionAttributes').mockImplementation(
|
||||
mockMetrics.getConventionAttributes,
|
||||
);
|
||||
});
|
||||
|
||||
it('should log an API response with all fields', () => {
|
||||
@@ -345,13 +358,47 @@ describe('loggers', () => {
|
||||
expect(mockMetrics.recordApiResponseMetrics).toHaveBeenCalledWith(
|
||||
mockConfig,
|
||||
100,
|
||||
{ model: 'test-model', status_code: 200 },
|
||||
{
|
||||
model: 'test-model',
|
||||
status_code: 200,
|
||||
genAiAttributes: {
|
||||
'gen_ai.operation.name': 'generate_content',
|
||||
'gen_ai.provider.name': 'gcp.vertex_ai',
|
||||
'gen_ai.request.model': 'test-model',
|
||||
'gen_ai.response.model': 'test-model',
|
||||
},
|
||||
},
|
||||
);
|
||||
|
||||
// Verify token usage calls for all token types
|
||||
expect(mockMetrics.recordTokenUsageMetrics).toHaveBeenCalledWith(
|
||||
mockConfig,
|
||||
17,
|
||||
{
|
||||
model: 'test-model',
|
||||
type: 'input',
|
||||
genAiAttributes: {
|
||||
'gen_ai.operation.name': 'generate_content',
|
||||
'gen_ai.provider.name': 'gcp.vertex_ai',
|
||||
'gen_ai.request.model': 'test-model',
|
||||
'gen_ai.response.model': 'test-model',
|
||||
},
|
||||
},
|
||||
);
|
||||
|
||||
expect(mockMetrics.recordTokenUsageMetrics).toHaveBeenCalledWith(
|
||||
mockConfig,
|
||||
50,
|
||||
{ model: 'test-model', type: 'output' },
|
||||
{
|
||||
model: 'test-model',
|
||||
type: 'output',
|
||||
genAiAttributes: {
|
||||
'gen_ai.operation.name': 'generate_content',
|
||||
'gen_ai.provider.name': 'gcp.vertex_ai',
|
||||
'gen_ai.request.model': 'test-model',
|
||||
'gen_ai.response.model': 'test-model',
|
||||
},
|
||||
},
|
||||
);
|
||||
|
||||
expect(mockUiEvent.addEvent).toHaveBeenCalledWith({
|
||||
|
||||
@@ -67,8 +67,6 @@ import type {
|
||||
} from './types.js';
|
||||
import {
|
||||
recordApiErrorMetrics,
|
||||
recordTokenUsageMetrics,
|
||||
recordApiResponseMetrics,
|
||||
recordToolCallMetrics,
|
||||
recordChatCompressionMetrics,
|
||||
recordFileOperationMetric,
|
||||
@@ -77,6 +75,9 @@ import {
|
||||
recordContentRetryFailure,
|
||||
recordModelRoutingMetrics,
|
||||
recordModelSlashCommand,
|
||||
getConventionAttributes,
|
||||
recordTokenUsageMetrics,
|
||||
recordApiResponseMetrics,
|
||||
} from './metrics.js';
|
||||
import { isTelemetrySdkInitialized } from './sdk.js';
|
||||
import type { UiEvent } from './uiTelemetry.js';
|
||||
@@ -366,6 +367,17 @@ export function logApiError(config: Config, event: ApiErrorEvent): void {
|
||||
status_code: event.status_code,
|
||||
error_type: event.error_type,
|
||||
});
|
||||
|
||||
// Record GenAI operation duration for errors
|
||||
const conventionAttributes = getConventionAttributes(event);
|
||||
recordApiResponseMetrics(config, event.duration_ms, {
|
||||
model: event.model,
|
||||
status_code: event.status_code,
|
||||
genAiAttributes: {
|
||||
...conventionAttributes,
|
||||
'error.type': event.error_type || 'unknown',
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
export function logApiResponse(config: Config, event: ApiResponseEvent): void {
|
||||
@@ -398,30 +410,30 @@ export function logApiResponse(config: Config, event: ApiResponseEvent): void {
|
||||
attributes,
|
||||
};
|
||||
logger.emit(logRecord);
|
||||
|
||||
const conventionAttributes = getConventionAttributes(event);
|
||||
|
||||
recordApiResponseMetrics(config, event.duration_ms, {
|
||||
model: event.model,
|
||||
status_code: event.status_code,
|
||||
genAiAttributes: conventionAttributes,
|
||||
});
|
||||
recordTokenUsageMetrics(config, event.input_token_count, {
|
||||
model: event.model,
|
||||
type: 'input',
|
||||
});
|
||||
recordTokenUsageMetrics(config, event.output_token_count, {
|
||||
model: event.model,
|
||||
type: 'output',
|
||||
});
|
||||
recordTokenUsageMetrics(config, event.cached_content_token_count, {
|
||||
model: event.model,
|
||||
type: 'cache',
|
||||
});
|
||||
recordTokenUsageMetrics(config, event.thoughts_token_count, {
|
||||
model: event.model,
|
||||
type: 'thought',
|
||||
});
|
||||
recordTokenUsageMetrics(config, event.tool_token_count, {
|
||||
model: event.model,
|
||||
type: 'tool',
|
||||
});
|
||||
|
||||
const tokenUsageData = [
|
||||
{ count: event.input_token_count, type: 'input' as const },
|
||||
{ count: event.output_token_count, type: 'output' as const },
|
||||
{ count: event.cached_content_token_count, type: 'cache' as const },
|
||||
{ count: event.thoughts_token_count, type: 'thought' as const },
|
||||
{ count: event.tool_token_count, type: 'tool' as const },
|
||||
];
|
||||
|
||||
for (const { count, type } of tokenUsageData) {
|
||||
recordTokenUsageMetrics(config, count, {
|
||||
model: event.model,
|
||||
type,
|
||||
genAiAttributes: conventionAttributes,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
export function logLoopDetected(
|
||||
|
||||
@@ -61,6 +61,11 @@ function originalOtelMockFactory() {
|
||||
setLogger: vi.fn(),
|
||||
warn: vi.fn(),
|
||||
},
|
||||
DiagConsoleLogger: vi.fn(),
|
||||
DiagLogLevel: {
|
||||
NONE: 0,
|
||||
INFO: 1,
|
||||
},
|
||||
} as const;
|
||||
}
|
||||
|
||||
@@ -82,6 +87,8 @@ describe('Telemetry Metrics', () => {
|
||||
let recordPerformanceScoreModule: typeof import('./metrics.js').recordPerformanceScore;
|
||||
let recordPerformanceRegressionModule: typeof import('./metrics.js').recordPerformanceRegression;
|
||||
let recordBaselineComparisonModule: typeof import('./metrics.js').recordBaselineComparison;
|
||||
let recordGenAiClientTokenUsageModule: typeof import('./metrics.js').recordGenAiClientTokenUsage;
|
||||
let recordGenAiClientOperationDurationModule: typeof import('./metrics.js').recordGenAiClientOperationDuration;
|
||||
|
||||
beforeEach(async () => {
|
||||
vi.resetModules();
|
||||
@@ -110,6 +117,10 @@ describe('Telemetry Metrics', () => {
|
||||
recordPerformanceRegressionModule =
|
||||
metricsJsModule.recordPerformanceRegression;
|
||||
recordBaselineComparisonModule = metricsJsModule.recordBaselineComparison;
|
||||
recordGenAiClientTokenUsageModule =
|
||||
metricsJsModule.recordGenAiClientTokenUsage;
|
||||
recordGenAiClientOperationDurationModule =
|
||||
metricsJsModule.recordGenAiClientOperationDuration;
|
||||
|
||||
const otelApiModule = await import('@opentelemetry/api');
|
||||
|
||||
@@ -428,6 +439,182 @@ describe('Telemetry Metrics', () => {
|
||||
});
|
||||
});
|
||||
|
||||
describe('OpenTelemetry GenAI Semantic Convention Metrics', () => {
|
||||
const mockConfig = {
|
||||
getSessionId: () => 'test-session-id',
|
||||
getTelemetryEnabled: () => true,
|
||||
} as unknown as Config;
|
||||
|
||||
describe('recordGenAiClientTokenUsage', () => {
|
||||
it('should not record metrics when not initialized', () => {
|
||||
recordGenAiClientTokenUsageModule(mockConfig, 100, {
|
||||
'gen_ai.operation.name': 'generate_content',
|
||||
'gen_ai.provider.name': 'gcp.gen_ai',
|
||||
'gen_ai.token.type': 'input',
|
||||
});
|
||||
|
||||
expect(mockHistogramRecordFn).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('should record input token usage with correct attributes', () => {
|
||||
initializeMetricsModule(mockConfig);
|
||||
mockHistogramRecordFn.mockClear();
|
||||
|
||||
recordGenAiClientTokenUsageModule(mockConfig, 150, {
|
||||
'gen_ai.operation.name': 'generate_content',
|
||||
'gen_ai.provider.name': 'gcp.gen_ai',
|
||||
'gen_ai.token.type': 'input',
|
||||
'gen_ai.request.model': 'gemini-2.0-flash',
|
||||
'gen_ai.response.model': 'gemini-2.0-flash',
|
||||
});
|
||||
|
||||
expect(mockHistogramRecordFn).toHaveBeenCalledWith(150, {
|
||||
'session.id': 'test-session-id',
|
||||
'gen_ai.operation.name': 'generate_content',
|
||||
'gen_ai.provider.name': 'gcp.gen_ai',
|
||||
'gen_ai.token.type': 'input',
|
||||
'gen_ai.request.model': 'gemini-2.0-flash',
|
||||
'gen_ai.response.model': 'gemini-2.0-flash',
|
||||
});
|
||||
});
|
||||
|
||||
it('should record output token usage with correct attributes', () => {
|
||||
initializeMetricsModule(mockConfig);
|
||||
mockHistogramRecordFn.mockClear();
|
||||
|
||||
recordGenAiClientTokenUsageModule(mockConfig, 75, {
|
||||
'gen_ai.operation.name': 'generate_content',
|
||||
'gen_ai.provider.name': 'gcp.vertex_ai',
|
||||
'gen_ai.token.type': 'output',
|
||||
'gen_ai.request.model': 'gemini-pro',
|
||||
});
|
||||
|
||||
expect(mockHistogramRecordFn).toHaveBeenCalledWith(75, {
|
||||
'session.id': 'test-session-id',
|
||||
'gen_ai.operation.name': 'generate_content',
|
||||
'gen_ai.provider.name': 'gcp.vertex_ai',
|
||||
'gen_ai.token.type': 'output',
|
||||
'gen_ai.request.model': 'gemini-pro',
|
||||
});
|
||||
});
|
||||
|
||||
it('should record token usage with optional attributes', () => {
|
||||
initializeMetricsModule(mockConfig);
|
||||
mockHistogramRecordFn.mockClear();
|
||||
|
||||
recordGenAiClientTokenUsageModule(mockConfig, 200, {
|
||||
'gen_ai.operation.name': 'generate_content',
|
||||
'gen_ai.provider.name': 'gcp.vertex_ai',
|
||||
'gen_ai.token.type': 'input',
|
||||
'gen_ai.request.model': 'text-embedding-004',
|
||||
'server.address': 'aiplatform.googleapis.com',
|
||||
'server.port': 443,
|
||||
});
|
||||
|
||||
expect(mockHistogramRecordFn).toHaveBeenCalledWith(200, {
|
||||
'session.id': 'test-session-id',
|
||||
'gen_ai.operation.name': 'generate_content',
|
||||
'gen_ai.provider.name': 'gcp.vertex_ai',
|
||||
'gen_ai.token.type': 'input',
|
||||
'gen_ai.request.model': 'text-embedding-004',
|
||||
'server.address': 'aiplatform.googleapis.com',
|
||||
'server.port': 443,
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe('recordGenAiClientOperationDuration', () => {
|
||||
it('should not record metrics when not initialized', () => {
|
||||
recordGenAiClientOperationDurationModule(mockConfig, 2.5, {
|
||||
'gen_ai.operation.name': 'generate_content',
|
||||
'gen_ai.provider.name': 'gcp.gen_ai',
|
||||
});
|
||||
|
||||
expect(mockHistogramRecordFn).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('should record successful operation duration with correct attributes', () => {
|
||||
initializeMetricsModule(mockConfig);
|
||||
mockHistogramRecordFn.mockClear();
|
||||
|
||||
recordGenAiClientOperationDurationModule(mockConfig, 1.25, {
|
||||
'gen_ai.operation.name': 'generate_content',
|
||||
'gen_ai.provider.name': 'gcp.gen_ai',
|
||||
'gen_ai.request.model': 'gemini-2.0-flash',
|
||||
'gen_ai.response.model': 'gemini-2.0-flash',
|
||||
});
|
||||
|
||||
expect(mockHistogramRecordFn).toHaveBeenCalledWith(1.25, {
|
||||
'session.id': 'test-session-id',
|
||||
'gen_ai.operation.name': 'generate_content',
|
||||
'gen_ai.provider.name': 'gcp.gen_ai',
|
||||
'gen_ai.request.model': 'gemini-2.0-flash',
|
||||
'gen_ai.response.model': 'gemini-2.0-flash',
|
||||
});
|
||||
});
|
||||
|
||||
it('should record failed operation duration with error type', () => {
|
||||
initializeMetricsModule(mockConfig);
|
||||
mockHistogramRecordFn.mockClear();
|
||||
|
||||
recordGenAiClientOperationDurationModule(mockConfig, 3.75, {
|
||||
'gen_ai.operation.name': 'generate_content',
|
||||
'gen_ai.provider.name': 'gcp.vertex_ai',
|
||||
'gen_ai.request.model': 'gemini-pro',
|
||||
'error.type': 'quota_exceeded',
|
||||
});
|
||||
|
||||
expect(mockHistogramRecordFn).toHaveBeenCalledWith(3.75, {
|
||||
'session.id': 'test-session-id',
|
||||
'gen_ai.operation.name': 'generate_content',
|
||||
'gen_ai.provider.name': 'gcp.vertex_ai',
|
||||
'gen_ai.request.model': 'gemini-pro',
|
||||
'error.type': 'quota_exceeded',
|
||||
});
|
||||
});
|
||||
|
||||
it('should record operation duration with server details', () => {
|
||||
initializeMetricsModule(mockConfig);
|
||||
mockHistogramRecordFn.mockClear();
|
||||
|
||||
recordGenAiClientOperationDurationModule(mockConfig, 0.95, {
|
||||
'gen_ai.operation.name': 'generate_content',
|
||||
'gen_ai.provider.name': 'gcp.vertex_ai',
|
||||
'gen_ai.request.model': 'gemini-1.5-pro',
|
||||
'gen_ai.response.model': 'gemini-1.5-pro-001',
|
||||
'server.address': 'us-central1-aiplatform.googleapis.com',
|
||||
'server.port': 443,
|
||||
});
|
||||
|
||||
expect(mockHistogramRecordFn).toHaveBeenCalledWith(0.95, {
|
||||
'session.id': 'test-session-id',
|
||||
'gen_ai.operation.name': 'generate_content',
|
||||
'gen_ai.provider.name': 'gcp.vertex_ai',
|
||||
'gen_ai.request.model': 'gemini-1.5-pro',
|
||||
'gen_ai.response.model': 'gemini-1.5-pro-001',
|
||||
'server.address': 'us-central1-aiplatform.googleapis.com',
|
||||
'server.port': 443,
|
||||
});
|
||||
});
|
||||
|
||||
it('should handle minimal required attributes', () => {
|
||||
initializeMetricsModule(mockConfig);
|
||||
mockHistogramRecordFn.mockClear();
|
||||
|
||||
recordGenAiClientOperationDurationModule(mockConfig, 2.1, {
|
||||
'gen_ai.operation.name': 'generate_content',
|
||||
'gen_ai.provider.name': 'gcp.gen_ai',
|
||||
});
|
||||
|
||||
expect(mockHistogramRecordFn).toHaveBeenCalledWith(2.1, {
|
||||
'session.id': 'test-session-id',
|
||||
'gen_ai.operation.name': 'generate_content',
|
||||
'gen_ai.provider.name': 'gcp.gen_ai',
|
||||
});
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe('Performance Monitoring Metrics', () => {
|
||||
const mockConfig = {
|
||||
getSessionId: () => 'test-session-id',
|
||||
|
||||
@@ -9,6 +9,7 @@ import { diag, metrics, ValueType } from '@opentelemetry/api';
|
||||
import { SERVICE_NAME, EVENT_CHAT_COMPRESSION } from './constants.js';
|
||||
import type { Config } from '../config/config.js';
|
||||
import type { ModelRoutingEvent, ModelSlashCommandEvent } from './types.js';
|
||||
import { AuthType } from '../core/contentGenerator.js';
|
||||
|
||||
const TOOL_CALL_COUNT = 'gemini_cli.tool.call.count';
|
||||
const TOOL_CALL_LATENCY = 'gemini_cli.tool.call.latency';
|
||||
@@ -26,6 +27,10 @@ const MODEL_ROUTING_FAILURE_COUNT = 'gemini_cli.model_routing.failure.count';
|
||||
const MODEL_SLASH_COMMAND_CALL_COUNT =
|
||||
'gemini_cli.slash_command.model.call_count';
|
||||
|
||||
// OpenTelemetry GenAI Semantic Convention Metrics
|
||||
const GEN_AI_CLIENT_TOKEN_USAGE = 'gen_ai.client.token.usage';
|
||||
const GEN_AI_CLIENT_OPERATION_DURATION = 'gen_ai.client.operation.duration';
|
||||
|
||||
// Performance Monitoring Metrics
|
||||
const STARTUP_TIME = 'gemini_cli.startup.duration';
|
||||
const MEMORY_USAGE = 'gemini_cli.memory.usage';
|
||||
@@ -170,6 +175,36 @@ const HISTOGRAM_DEFINITIONS = {
|
||||
'routing.decision_source': string;
|
||||
},
|
||||
},
|
||||
[GEN_AI_CLIENT_TOKEN_USAGE]: {
|
||||
description: 'Number of input and output tokens used.',
|
||||
unit: 'token',
|
||||
valueType: ValueType.INT,
|
||||
assign: (h: Histogram) => (genAiClientTokenUsageHistogram = h),
|
||||
attributes: {} as {
|
||||
'gen_ai.operation.name': string;
|
||||
'gen_ai.provider.name': string;
|
||||
'gen_ai.token.type': 'input' | 'output';
|
||||
'gen_ai.request.model'?: string;
|
||||
'gen_ai.response.model'?: string;
|
||||
'server.address'?: string;
|
||||
'server.port'?: number;
|
||||
},
|
||||
},
|
||||
[GEN_AI_CLIENT_OPERATION_DURATION]: {
|
||||
description: 'GenAI operation duration.',
|
||||
unit: 's',
|
||||
valueType: ValueType.DOUBLE,
|
||||
assign: (h: Histogram) => (genAiClientOperationDurationHistogram = h),
|
||||
attributes: {} as {
|
||||
'gen_ai.operation.name': string;
|
||||
'gen_ai.provider.name': string;
|
||||
'gen_ai.request.model'?: string;
|
||||
'gen_ai.response.model'?: string;
|
||||
'server.address'?: string;
|
||||
'server.port'?: number;
|
||||
'error.type'?: string;
|
||||
},
|
||||
},
|
||||
} as const;
|
||||
|
||||
const PERFORMANCE_COUNTER_DEFINITIONS = {
|
||||
@@ -341,6 +376,20 @@ export enum ApiRequestPhase {
|
||||
TOKEN_PROCESSING = 'token_processing',
|
||||
}
|
||||
|
||||
export enum GenAiOperationName {
|
||||
GENERATE_CONTENT = 'generate_content',
|
||||
}
|
||||
|
||||
export enum GenAiProviderName {
|
||||
GCP_GEN_AI = 'gcp.gen_ai',
|
||||
GCP_VERTEX_AI = 'gcp.vertex_ai',
|
||||
}
|
||||
|
||||
export enum GenAiTokenType {
|
||||
INPUT = 'input',
|
||||
OUTPUT = 'output',
|
||||
}
|
||||
|
||||
let cliMeter: Meter | undefined;
|
||||
let toolCallCounter: Counter | undefined;
|
||||
let toolCallLatencyHistogram: Histogram | undefined;
|
||||
@@ -357,6 +406,10 @@ let modelRoutingLatencyHistogram: Histogram | undefined;
|
||||
let modelRoutingFailureCounter: Counter | undefined;
|
||||
let modelSlashCommandCallCounter: Counter | undefined;
|
||||
|
||||
// OpenTelemetry GenAI Semantic Convention Metrics
|
||||
let genAiClientTokenUsageHistogram: Histogram | undefined;
|
||||
let genAiClientOperationDurationHistogram: Histogram | undefined;
|
||||
|
||||
// Performance Monitoring Metrics
|
||||
let startupTimeHistogram: Histogram | undefined;
|
||||
let memoryUsageGauge: Histogram | undefined; // Using Histogram until ObservableGauge is available
|
||||
@@ -437,7 +490,7 @@ export function recordToolCallMetrics(
|
||||
});
|
||||
}
|
||||
|
||||
export function recordTokenUsageMetrics(
|
||||
export function recordCustomTokenUsageMetrics(
|
||||
config: Config,
|
||||
tokenCount: number,
|
||||
attributes: MetricDefinitions[typeof TOKEN_USAGE]['attributes'],
|
||||
@@ -449,7 +502,7 @@ export function recordTokenUsageMetrics(
|
||||
});
|
||||
}
|
||||
|
||||
export function recordApiResponseMetrics(
|
||||
export function recordCustomApiResponseMetrics(
|
||||
config: Config,
|
||||
durationMs: number,
|
||||
attributes: MetricDefinitions[typeof API_REQUEST_COUNT]['attributes'],
|
||||
@@ -572,6 +625,81 @@ export function recordModelRoutingMetrics(
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// OpenTelemetry GenAI Semantic Convention Recording Functions
|
||||
|
||||
export function recordGenAiClientTokenUsage(
|
||||
config: Config,
|
||||
tokenCount: number,
|
||||
attributes: MetricDefinitions[typeof GEN_AI_CLIENT_TOKEN_USAGE]['attributes'],
|
||||
): void {
|
||||
if (!genAiClientTokenUsageHistogram || !isMetricsInitialized) return;
|
||||
|
||||
const metricAttributes: Attributes = {
|
||||
...baseMetricDefinition.getCommonAttributes(config),
|
||||
...attributes,
|
||||
};
|
||||
|
||||
genAiClientTokenUsageHistogram.record(tokenCount, metricAttributes);
|
||||
}
|
||||
|
||||
export function recordGenAiClientOperationDuration(
|
||||
config: Config,
|
||||
durationSeconds: number,
|
||||
attributes: MetricDefinitions[typeof GEN_AI_CLIENT_OPERATION_DURATION]['attributes'],
|
||||
): void {
|
||||
if (!genAiClientOperationDurationHistogram || !isMetricsInitialized) return;
|
||||
|
||||
const metricAttributes: Attributes = {
|
||||
...baseMetricDefinition.getCommonAttributes(config),
|
||||
...attributes,
|
||||
};
|
||||
|
||||
genAiClientOperationDurationHistogram.record(
|
||||
durationSeconds,
|
||||
metricAttributes,
|
||||
);
|
||||
}
|
||||
|
||||
export function getConventionAttributes(event: {
|
||||
model: string;
|
||||
auth_type?: string;
|
||||
}): {
|
||||
'gen_ai.operation.name': GenAiOperationName;
|
||||
'gen_ai.provider.name': GenAiProviderName;
|
||||
'gen_ai.request.model': string;
|
||||
'gen_ai.response.model': string;
|
||||
} {
|
||||
const operationName = getGenAiOperationName();
|
||||
const provider = getGenAiProvider(event.auth_type);
|
||||
|
||||
return {
|
||||
'gen_ai.operation.name': operationName,
|
||||
'gen_ai.provider.name': provider,
|
||||
'gen_ai.request.model': event.model,
|
||||
'gen_ai.response.model': event.model,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Maps authentication type to GenAI provider name following OpenTelemetry conventions
|
||||
*/
|
||||
function getGenAiProvider(authType?: string): GenAiProviderName {
|
||||
switch (authType) {
|
||||
case AuthType.USE_VERTEX_AI:
|
||||
case AuthType.CLOUD_SHELL:
|
||||
case AuthType.LOGIN_WITH_GOOGLE:
|
||||
return GenAiProviderName.GCP_VERTEX_AI;
|
||||
case AuthType.USE_GEMINI:
|
||||
default:
|
||||
return GenAiProviderName.GCP_GEN_AI;
|
||||
}
|
||||
}
|
||||
|
||||
function getGenAiOperationName(): GenAiOperationName {
|
||||
return GenAiOperationName.GENERATE_CONTENT;
|
||||
}
|
||||
|
||||
// Performance Monitoring Functions
|
||||
|
||||
export function initializePerformanceMonitoring(config: Config): void {
|
||||
@@ -767,3 +895,71 @@ export function recordBaselineComparison(
|
||||
export function isPerformanceMonitoringActive(): boolean {
|
||||
return isPerformanceMonitoringEnabled && isMetricsInitialized;
|
||||
}
|
||||
|
||||
/**
|
||||
* Token usage recording that emits both custom and convention metrics.
|
||||
*/
|
||||
export function recordTokenUsageMetrics(
|
||||
config: Config,
|
||||
tokenCount: number,
|
||||
attributes: {
|
||||
model: string;
|
||||
type: 'input' | 'output' | 'thought' | 'cache' | 'tool';
|
||||
genAiAttributes?: {
|
||||
'gen_ai.operation.name': string;
|
||||
'gen_ai.provider.name': string;
|
||||
'gen_ai.request.model'?: string;
|
||||
'gen_ai.response.model'?: string;
|
||||
'server.address'?: string;
|
||||
'server.port'?: number;
|
||||
};
|
||||
},
|
||||
): void {
|
||||
recordCustomTokenUsageMetrics(config, tokenCount, {
|
||||
model: attributes.model,
|
||||
type: attributes.type,
|
||||
});
|
||||
|
||||
if (
|
||||
(attributes.type === 'input' || attributes.type === 'output') &&
|
||||
attributes.genAiAttributes
|
||||
) {
|
||||
recordGenAiClientTokenUsage(config, tokenCount, {
|
||||
...attributes.genAiAttributes,
|
||||
'gen_ai.token.type': attributes.type,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Operation latency recording that emits both custom and convention metrics.
|
||||
*/
|
||||
export function recordApiResponseMetrics(
|
||||
config: Config,
|
||||
durationMs: number,
|
||||
attributes: {
|
||||
model: string;
|
||||
status_code?: number | string;
|
||||
genAiAttributes?: {
|
||||
'gen_ai.operation.name': string;
|
||||
'gen_ai.provider.name': string;
|
||||
'gen_ai.request.model'?: string;
|
||||
'gen_ai.response.model'?: string;
|
||||
'server.address'?: string;
|
||||
'server.port'?: number;
|
||||
'error.type'?: string;
|
||||
};
|
||||
},
|
||||
): void {
|
||||
recordCustomApiResponseMetrics(config, durationMs, {
|
||||
model: attributes.model,
|
||||
status_code: attributes.status_code,
|
||||
});
|
||||
|
||||
if (attributes.genAiAttributes) {
|
||||
const durationSeconds = durationMs / 1000;
|
||||
recordGenAiClientOperationDuration(config, durationSeconds, {
|
||||
...attributes.genAiAttributes,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user