mirror of
https://github.com/google-gemini/gemini-cli.git
synced 2026-05-12 12:54:07 -07:00
feat(telemetry): add OpenTelemetry GenAI semantic convention metrics (#10343)
This commit is contained in:
+37
-1
@@ -16,6 +16,8 @@ Learn how to enable and setup OpenTelemetry for Gemini CLI.
|
|||||||
- [Logs and Metrics](#logs-and-metrics)
|
- [Logs and Metrics](#logs-and-metrics)
|
||||||
- [Logs](#logs)
|
- [Logs](#logs)
|
||||||
- [Metrics](#metrics)
|
- [Metrics](#metrics)
|
||||||
|
- [Custom](#custom)
|
||||||
|
- [GenAI Semantic Convention](#genai-semantic-convention)
|
||||||
|
|
||||||
## Key Benefits
|
## Key Benefits
|
||||||
|
|
||||||
@@ -322,7 +324,9 @@ for Gemini CLI:
|
|||||||
|
|
||||||
### Metrics
|
### Metrics
|
||||||
|
|
||||||
Metrics are numerical measurements of behavior over time. The following metrics are collected for Gemini CLI:
|
Metrics are numerical measurements of behavior over time.
|
||||||
|
|
||||||
|
#### Custom
|
||||||
|
|
||||||
- `gemini_cli.session.count` (Counter, Int): Incremented once per CLI startup.
|
- `gemini_cli.session.count` (Counter, Int): Incremented once per CLI startup.
|
||||||
|
|
||||||
@@ -347,11 +351,16 @@ Metrics are numerical measurements of behavior over time. The following metrics
|
|||||||
- `gemini_cli.api.request.latency` (Histogram, ms): Measures API request latency.
|
- `gemini_cli.api.request.latency` (Histogram, ms): Measures API request latency.
|
||||||
- **Attributes**:
|
- **Attributes**:
|
||||||
- `model`
|
- `model`
|
||||||
|
- **Note**: This metric overlaps with `gen_ai.client.operation.duration` below
|
||||||
|
that's compliant with GenAI Semantic Conventions.
|
||||||
|
|
||||||
- `gemini_cli.token.usage` (Counter, Int): Counts the number of tokens used.
|
- `gemini_cli.token.usage` (Counter, Int): Counts the number of tokens used.
|
||||||
- **Attributes**:
|
- **Attributes**:
|
||||||
- `model`
|
- `model`
|
||||||
- `type` (string: "input", "output", "thought", "cache", or "tool")
|
- `type` (string: "input", "output", "thought", "cache", or "tool")
|
||||||
|
- **Note**: This metric overlaps with `gen_ai.client.token.usage` below for
|
||||||
|
`input`/`output` token types that's compliant with GenAI Semantic
|
||||||
|
Conventions.
|
||||||
|
|
||||||
- `gemini_cli.file.operation.count` (Counter, Int): Counts file operations.
|
- `gemini_cli.file.operation.count` (Counter, Int): Counts file operations.
|
||||||
- **Attributes**:
|
- **Attributes**:
|
||||||
@@ -369,3 +378,30 @@ Metrics are numerical measurements of behavior over time. The following metrics
|
|||||||
- **Attributes**:
|
- **Attributes**:
|
||||||
- `tokens_before`: (Int): Number of tokens in context prior to compression
|
- `tokens_before`: (Int): Number of tokens in context prior to compression
|
||||||
- `tokens_after`: (Int): Number of tokens in context after compression
|
- `tokens_after`: (Int): Number of tokens in context after compression
|
||||||
|
|
||||||
|
#### GenAI Semantic Convention
|
||||||
|
|
||||||
|
The following metrics comply with [OpenTelemetry GenAI semantic conventions]
|
||||||
|
for standardized observability across GenAI applications:
|
||||||
|
|
||||||
|
- `gen_ai.client.token.usage` (Histogram, token): Number of input and output tokens used per operation.
|
||||||
|
- **Attributes**:
|
||||||
|
- `gen_ai.operation.name` (string): The operation type (e.g., "generate_content", "chat")
|
||||||
|
- `gen_ai.provider.name` (string): The GenAI provider ("gcp.gen_ai" or "gcp.vertex_ai")
|
||||||
|
- `gen_ai.token.type` (string): The token type ("input" or "output")
|
||||||
|
- `gen_ai.request.model` (string, optional): The model name used for the request
|
||||||
|
- `gen_ai.response.model` (string, optional): The model name that generated the response
|
||||||
|
- `server.address` (string, optional): GenAI server address
|
||||||
|
- `server.port` (int, optional): GenAI server port
|
||||||
|
|
||||||
|
- `gen_ai.client.operation.duration` (Histogram, s): GenAI operation duration in seconds.
|
||||||
|
- **Attributes**:
|
||||||
|
- `gen_ai.operation.name` (string): The operation type (e.g., "generate_content", "chat")
|
||||||
|
- `gen_ai.provider.name` (string): The GenAI provider ("gcp.gen_ai" or "gcp.vertex_ai")
|
||||||
|
- `gen_ai.request.model` (string, optional): The model name used for the request
|
||||||
|
- `gen_ai.response.model` (string, optional): The model name that generated the response
|
||||||
|
- `server.address` (string, optional): GenAI server address
|
||||||
|
- `server.port` (int, optional): GenAI server port
|
||||||
|
- `error.type` (string, optional): Error type if the operation failed
|
||||||
|
|
||||||
|
[OpenTelemetry GenAI semantic conventions]: https://github.com/open-telemetry/semantic-conventions/blob/main/docs/gen-ai/gen-ai-metrics.md
|
||||||
|
|||||||
@@ -85,6 +85,13 @@ export {
|
|||||||
recordContentRetry,
|
recordContentRetry,
|
||||||
recordContentRetryFailure,
|
recordContentRetryFailure,
|
||||||
recordModelRoutingMetrics,
|
recordModelRoutingMetrics,
|
||||||
|
// Custom metrics for token usage and API responses
|
||||||
|
recordCustomTokenUsageMetrics,
|
||||||
|
recordCustomApiResponseMetrics,
|
||||||
|
// OpenTelemetry GenAI semantic convention for token usage and operation duration
|
||||||
|
recordGenAiClientTokenUsage,
|
||||||
|
recordGenAiClientOperationDuration,
|
||||||
|
getConventionAttributes,
|
||||||
// Performance monitoring functions
|
// Performance monitoring functions
|
||||||
recordStartupPerformance,
|
recordStartupPerformance,
|
||||||
recordMemoryUsage,
|
recordMemoryUsage,
|
||||||
@@ -103,4 +110,8 @@ export {
|
|||||||
ToolExecutionPhase,
|
ToolExecutionPhase,
|
||||||
ApiRequestPhase,
|
ApiRequestPhase,
|
||||||
FileOperation,
|
FileOperation,
|
||||||
|
// OpenTelemetry Semantic Convention types
|
||||||
|
GenAiOperationName,
|
||||||
|
GenAiProviderName,
|
||||||
|
GenAiTokenType,
|
||||||
} from './metrics.js';
|
} from './metrics.js';
|
||||||
|
|||||||
@@ -76,7 +76,11 @@ import {
|
|||||||
ExtensionUninstallEvent,
|
ExtensionUninstallEvent,
|
||||||
} from './types.js';
|
} from './types.js';
|
||||||
import * as metrics from './metrics.js';
|
import * as metrics from './metrics.js';
|
||||||
import { FileOperation } from './metrics.js';
|
import {
|
||||||
|
FileOperation,
|
||||||
|
GenAiOperationName,
|
||||||
|
GenAiProviderName,
|
||||||
|
} from './metrics.js';
|
||||||
import * as sdk from './sdk.js';
|
import * as sdk from './sdk.js';
|
||||||
import { vi, describe, beforeEach, it, expect, afterEach } from 'vitest';
|
import { vi, describe, beforeEach, it, expect, afterEach } from 'vitest';
|
||||||
import type {
|
import type {
|
||||||
@@ -289,6 +293,12 @@ describe('loggers', () => {
|
|||||||
const mockMetrics = {
|
const mockMetrics = {
|
||||||
recordApiResponseMetrics: vi.fn(),
|
recordApiResponseMetrics: vi.fn(),
|
||||||
recordTokenUsageMetrics: vi.fn(),
|
recordTokenUsageMetrics: vi.fn(),
|
||||||
|
getConventionAttributes: vi.fn(() => ({
|
||||||
|
'gen_ai.operation.name': GenAiOperationName.GENERATE_CONTENT,
|
||||||
|
'gen_ai.provider.name': GenAiProviderName.GCP_VERTEX_AI,
|
||||||
|
'gen_ai.request.model': 'test-model',
|
||||||
|
'gen_ai.response.model': 'test-model',
|
||||||
|
})),
|
||||||
};
|
};
|
||||||
|
|
||||||
beforeEach(() => {
|
beforeEach(() => {
|
||||||
@@ -298,6 +308,9 @@ describe('loggers', () => {
|
|||||||
vi.spyOn(metrics, 'recordTokenUsageMetrics').mockImplementation(
|
vi.spyOn(metrics, 'recordTokenUsageMetrics').mockImplementation(
|
||||||
mockMetrics.recordTokenUsageMetrics,
|
mockMetrics.recordTokenUsageMetrics,
|
||||||
);
|
);
|
||||||
|
vi.spyOn(metrics, 'getConventionAttributes').mockImplementation(
|
||||||
|
mockMetrics.getConventionAttributes,
|
||||||
|
);
|
||||||
});
|
});
|
||||||
|
|
||||||
it('should log an API response with all fields', () => {
|
it('should log an API response with all fields', () => {
|
||||||
@@ -345,13 +358,47 @@ describe('loggers', () => {
|
|||||||
expect(mockMetrics.recordApiResponseMetrics).toHaveBeenCalledWith(
|
expect(mockMetrics.recordApiResponseMetrics).toHaveBeenCalledWith(
|
||||||
mockConfig,
|
mockConfig,
|
||||||
100,
|
100,
|
||||||
{ model: 'test-model', status_code: 200 },
|
{
|
||||||
|
model: 'test-model',
|
||||||
|
status_code: 200,
|
||||||
|
genAiAttributes: {
|
||||||
|
'gen_ai.operation.name': 'generate_content',
|
||||||
|
'gen_ai.provider.name': 'gcp.vertex_ai',
|
||||||
|
'gen_ai.request.model': 'test-model',
|
||||||
|
'gen_ai.response.model': 'test-model',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
);
|
||||||
|
|
||||||
|
// Verify token usage calls for all token types
|
||||||
|
expect(mockMetrics.recordTokenUsageMetrics).toHaveBeenCalledWith(
|
||||||
|
mockConfig,
|
||||||
|
17,
|
||||||
|
{
|
||||||
|
model: 'test-model',
|
||||||
|
type: 'input',
|
||||||
|
genAiAttributes: {
|
||||||
|
'gen_ai.operation.name': 'generate_content',
|
||||||
|
'gen_ai.provider.name': 'gcp.vertex_ai',
|
||||||
|
'gen_ai.request.model': 'test-model',
|
||||||
|
'gen_ai.response.model': 'test-model',
|
||||||
|
},
|
||||||
|
},
|
||||||
);
|
);
|
||||||
|
|
||||||
expect(mockMetrics.recordTokenUsageMetrics).toHaveBeenCalledWith(
|
expect(mockMetrics.recordTokenUsageMetrics).toHaveBeenCalledWith(
|
||||||
mockConfig,
|
mockConfig,
|
||||||
50,
|
50,
|
||||||
{ model: 'test-model', type: 'output' },
|
{
|
||||||
|
model: 'test-model',
|
||||||
|
type: 'output',
|
||||||
|
genAiAttributes: {
|
||||||
|
'gen_ai.operation.name': 'generate_content',
|
||||||
|
'gen_ai.provider.name': 'gcp.vertex_ai',
|
||||||
|
'gen_ai.request.model': 'test-model',
|
||||||
|
'gen_ai.response.model': 'test-model',
|
||||||
|
},
|
||||||
|
},
|
||||||
);
|
);
|
||||||
|
|
||||||
expect(mockUiEvent.addEvent).toHaveBeenCalledWith({
|
expect(mockUiEvent.addEvent).toHaveBeenCalledWith({
|
||||||
|
|||||||
@@ -67,8 +67,6 @@ import type {
|
|||||||
} from './types.js';
|
} from './types.js';
|
||||||
import {
|
import {
|
||||||
recordApiErrorMetrics,
|
recordApiErrorMetrics,
|
||||||
recordTokenUsageMetrics,
|
|
||||||
recordApiResponseMetrics,
|
|
||||||
recordToolCallMetrics,
|
recordToolCallMetrics,
|
||||||
recordChatCompressionMetrics,
|
recordChatCompressionMetrics,
|
||||||
recordFileOperationMetric,
|
recordFileOperationMetric,
|
||||||
@@ -77,6 +75,9 @@ import {
|
|||||||
recordContentRetryFailure,
|
recordContentRetryFailure,
|
||||||
recordModelRoutingMetrics,
|
recordModelRoutingMetrics,
|
||||||
recordModelSlashCommand,
|
recordModelSlashCommand,
|
||||||
|
getConventionAttributes,
|
||||||
|
recordTokenUsageMetrics,
|
||||||
|
recordApiResponseMetrics,
|
||||||
} from './metrics.js';
|
} from './metrics.js';
|
||||||
import { isTelemetrySdkInitialized } from './sdk.js';
|
import { isTelemetrySdkInitialized } from './sdk.js';
|
||||||
import type { UiEvent } from './uiTelemetry.js';
|
import type { UiEvent } from './uiTelemetry.js';
|
||||||
@@ -366,6 +367,17 @@ export function logApiError(config: Config, event: ApiErrorEvent): void {
|
|||||||
status_code: event.status_code,
|
status_code: event.status_code,
|
||||||
error_type: event.error_type,
|
error_type: event.error_type,
|
||||||
});
|
});
|
||||||
|
|
||||||
|
// Record GenAI operation duration for errors
|
||||||
|
const conventionAttributes = getConventionAttributes(event);
|
||||||
|
recordApiResponseMetrics(config, event.duration_ms, {
|
||||||
|
model: event.model,
|
||||||
|
status_code: event.status_code,
|
||||||
|
genAiAttributes: {
|
||||||
|
...conventionAttributes,
|
||||||
|
'error.type': event.error_type || 'unknown',
|
||||||
|
},
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
export function logApiResponse(config: Config, event: ApiResponseEvent): void {
|
export function logApiResponse(config: Config, event: ApiResponseEvent): void {
|
||||||
@@ -398,30 +410,30 @@ export function logApiResponse(config: Config, event: ApiResponseEvent): void {
|
|||||||
attributes,
|
attributes,
|
||||||
};
|
};
|
||||||
logger.emit(logRecord);
|
logger.emit(logRecord);
|
||||||
|
|
||||||
|
const conventionAttributes = getConventionAttributes(event);
|
||||||
|
|
||||||
recordApiResponseMetrics(config, event.duration_ms, {
|
recordApiResponseMetrics(config, event.duration_ms, {
|
||||||
model: event.model,
|
model: event.model,
|
||||||
status_code: event.status_code,
|
status_code: event.status_code,
|
||||||
|
genAiAttributes: conventionAttributes,
|
||||||
});
|
});
|
||||||
recordTokenUsageMetrics(config, event.input_token_count, {
|
|
||||||
model: event.model,
|
const tokenUsageData = [
|
||||||
type: 'input',
|
{ count: event.input_token_count, type: 'input' as const },
|
||||||
});
|
{ count: event.output_token_count, type: 'output' as const },
|
||||||
recordTokenUsageMetrics(config, event.output_token_count, {
|
{ count: event.cached_content_token_count, type: 'cache' as const },
|
||||||
model: event.model,
|
{ count: event.thoughts_token_count, type: 'thought' as const },
|
||||||
type: 'output',
|
{ count: event.tool_token_count, type: 'tool' as const },
|
||||||
});
|
];
|
||||||
recordTokenUsageMetrics(config, event.cached_content_token_count, {
|
|
||||||
model: event.model,
|
for (const { count, type } of tokenUsageData) {
|
||||||
type: 'cache',
|
recordTokenUsageMetrics(config, count, {
|
||||||
});
|
model: event.model,
|
||||||
recordTokenUsageMetrics(config, event.thoughts_token_count, {
|
type,
|
||||||
model: event.model,
|
genAiAttributes: conventionAttributes,
|
||||||
type: 'thought',
|
});
|
||||||
});
|
}
|
||||||
recordTokenUsageMetrics(config, event.tool_token_count, {
|
|
||||||
model: event.model,
|
|
||||||
type: 'tool',
|
|
||||||
});
|
|
||||||
}
|
}
|
||||||
|
|
||||||
export function logLoopDetected(
|
export function logLoopDetected(
|
||||||
|
|||||||
@@ -61,6 +61,11 @@ function originalOtelMockFactory() {
|
|||||||
setLogger: vi.fn(),
|
setLogger: vi.fn(),
|
||||||
warn: vi.fn(),
|
warn: vi.fn(),
|
||||||
},
|
},
|
||||||
|
DiagConsoleLogger: vi.fn(),
|
||||||
|
DiagLogLevel: {
|
||||||
|
NONE: 0,
|
||||||
|
INFO: 1,
|
||||||
|
},
|
||||||
} as const;
|
} as const;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -82,6 +87,8 @@ describe('Telemetry Metrics', () => {
|
|||||||
let recordPerformanceScoreModule: typeof import('./metrics.js').recordPerformanceScore;
|
let recordPerformanceScoreModule: typeof import('./metrics.js').recordPerformanceScore;
|
||||||
let recordPerformanceRegressionModule: typeof import('./metrics.js').recordPerformanceRegression;
|
let recordPerformanceRegressionModule: typeof import('./metrics.js').recordPerformanceRegression;
|
||||||
let recordBaselineComparisonModule: typeof import('./metrics.js').recordBaselineComparison;
|
let recordBaselineComparisonModule: typeof import('./metrics.js').recordBaselineComparison;
|
||||||
|
let recordGenAiClientTokenUsageModule: typeof import('./metrics.js').recordGenAiClientTokenUsage;
|
||||||
|
let recordGenAiClientOperationDurationModule: typeof import('./metrics.js').recordGenAiClientOperationDuration;
|
||||||
|
|
||||||
beforeEach(async () => {
|
beforeEach(async () => {
|
||||||
vi.resetModules();
|
vi.resetModules();
|
||||||
@@ -110,6 +117,10 @@ describe('Telemetry Metrics', () => {
|
|||||||
recordPerformanceRegressionModule =
|
recordPerformanceRegressionModule =
|
||||||
metricsJsModule.recordPerformanceRegression;
|
metricsJsModule.recordPerformanceRegression;
|
||||||
recordBaselineComparisonModule = metricsJsModule.recordBaselineComparison;
|
recordBaselineComparisonModule = metricsJsModule.recordBaselineComparison;
|
||||||
|
recordGenAiClientTokenUsageModule =
|
||||||
|
metricsJsModule.recordGenAiClientTokenUsage;
|
||||||
|
recordGenAiClientOperationDurationModule =
|
||||||
|
metricsJsModule.recordGenAiClientOperationDuration;
|
||||||
|
|
||||||
const otelApiModule = await import('@opentelemetry/api');
|
const otelApiModule = await import('@opentelemetry/api');
|
||||||
|
|
||||||
@@ -428,6 +439,182 @@ describe('Telemetry Metrics', () => {
|
|||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
describe('OpenTelemetry GenAI Semantic Convention Metrics', () => {
|
||||||
|
const mockConfig = {
|
||||||
|
getSessionId: () => 'test-session-id',
|
||||||
|
getTelemetryEnabled: () => true,
|
||||||
|
} as unknown as Config;
|
||||||
|
|
||||||
|
describe('recordGenAiClientTokenUsage', () => {
|
||||||
|
it('should not record metrics when not initialized', () => {
|
||||||
|
recordGenAiClientTokenUsageModule(mockConfig, 100, {
|
||||||
|
'gen_ai.operation.name': 'generate_content',
|
||||||
|
'gen_ai.provider.name': 'gcp.gen_ai',
|
||||||
|
'gen_ai.token.type': 'input',
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(mockHistogramRecordFn).not.toHaveBeenCalled();
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should record input token usage with correct attributes', () => {
|
||||||
|
initializeMetricsModule(mockConfig);
|
||||||
|
mockHistogramRecordFn.mockClear();
|
||||||
|
|
||||||
|
recordGenAiClientTokenUsageModule(mockConfig, 150, {
|
||||||
|
'gen_ai.operation.name': 'generate_content',
|
||||||
|
'gen_ai.provider.name': 'gcp.gen_ai',
|
||||||
|
'gen_ai.token.type': 'input',
|
||||||
|
'gen_ai.request.model': 'gemini-2.0-flash',
|
||||||
|
'gen_ai.response.model': 'gemini-2.0-flash',
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(mockHistogramRecordFn).toHaveBeenCalledWith(150, {
|
||||||
|
'session.id': 'test-session-id',
|
||||||
|
'gen_ai.operation.name': 'generate_content',
|
||||||
|
'gen_ai.provider.name': 'gcp.gen_ai',
|
||||||
|
'gen_ai.token.type': 'input',
|
||||||
|
'gen_ai.request.model': 'gemini-2.0-flash',
|
||||||
|
'gen_ai.response.model': 'gemini-2.0-flash',
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should record output token usage with correct attributes', () => {
|
||||||
|
initializeMetricsModule(mockConfig);
|
||||||
|
mockHistogramRecordFn.mockClear();
|
||||||
|
|
||||||
|
recordGenAiClientTokenUsageModule(mockConfig, 75, {
|
||||||
|
'gen_ai.operation.name': 'generate_content',
|
||||||
|
'gen_ai.provider.name': 'gcp.vertex_ai',
|
||||||
|
'gen_ai.token.type': 'output',
|
||||||
|
'gen_ai.request.model': 'gemini-pro',
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(mockHistogramRecordFn).toHaveBeenCalledWith(75, {
|
||||||
|
'session.id': 'test-session-id',
|
||||||
|
'gen_ai.operation.name': 'generate_content',
|
||||||
|
'gen_ai.provider.name': 'gcp.vertex_ai',
|
||||||
|
'gen_ai.token.type': 'output',
|
||||||
|
'gen_ai.request.model': 'gemini-pro',
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should record token usage with optional attributes', () => {
|
||||||
|
initializeMetricsModule(mockConfig);
|
||||||
|
mockHistogramRecordFn.mockClear();
|
||||||
|
|
||||||
|
recordGenAiClientTokenUsageModule(mockConfig, 200, {
|
||||||
|
'gen_ai.operation.name': 'generate_content',
|
||||||
|
'gen_ai.provider.name': 'gcp.vertex_ai',
|
||||||
|
'gen_ai.token.type': 'input',
|
||||||
|
'gen_ai.request.model': 'text-embedding-004',
|
||||||
|
'server.address': 'aiplatform.googleapis.com',
|
||||||
|
'server.port': 443,
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(mockHistogramRecordFn).toHaveBeenCalledWith(200, {
|
||||||
|
'session.id': 'test-session-id',
|
||||||
|
'gen_ai.operation.name': 'generate_content',
|
||||||
|
'gen_ai.provider.name': 'gcp.vertex_ai',
|
||||||
|
'gen_ai.token.type': 'input',
|
||||||
|
'gen_ai.request.model': 'text-embedding-004',
|
||||||
|
'server.address': 'aiplatform.googleapis.com',
|
||||||
|
'server.port': 443,
|
||||||
|
});
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
describe('recordGenAiClientOperationDuration', () => {
|
||||||
|
it('should not record metrics when not initialized', () => {
|
||||||
|
recordGenAiClientOperationDurationModule(mockConfig, 2.5, {
|
||||||
|
'gen_ai.operation.name': 'generate_content',
|
||||||
|
'gen_ai.provider.name': 'gcp.gen_ai',
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(mockHistogramRecordFn).not.toHaveBeenCalled();
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should record successful operation duration with correct attributes', () => {
|
||||||
|
initializeMetricsModule(mockConfig);
|
||||||
|
mockHistogramRecordFn.mockClear();
|
||||||
|
|
||||||
|
recordGenAiClientOperationDurationModule(mockConfig, 1.25, {
|
||||||
|
'gen_ai.operation.name': 'generate_content',
|
||||||
|
'gen_ai.provider.name': 'gcp.gen_ai',
|
||||||
|
'gen_ai.request.model': 'gemini-2.0-flash',
|
||||||
|
'gen_ai.response.model': 'gemini-2.0-flash',
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(mockHistogramRecordFn).toHaveBeenCalledWith(1.25, {
|
||||||
|
'session.id': 'test-session-id',
|
||||||
|
'gen_ai.operation.name': 'generate_content',
|
||||||
|
'gen_ai.provider.name': 'gcp.gen_ai',
|
||||||
|
'gen_ai.request.model': 'gemini-2.0-flash',
|
||||||
|
'gen_ai.response.model': 'gemini-2.0-flash',
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should record failed operation duration with error type', () => {
|
||||||
|
initializeMetricsModule(mockConfig);
|
||||||
|
mockHistogramRecordFn.mockClear();
|
||||||
|
|
||||||
|
recordGenAiClientOperationDurationModule(mockConfig, 3.75, {
|
||||||
|
'gen_ai.operation.name': 'generate_content',
|
||||||
|
'gen_ai.provider.name': 'gcp.vertex_ai',
|
||||||
|
'gen_ai.request.model': 'gemini-pro',
|
||||||
|
'error.type': 'quota_exceeded',
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(mockHistogramRecordFn).toHaveBeenCalledWith(3.75, {
|
||||||
|
'session.id': 'test-session-id',
|
||||||
|
'gen_ai.operation.name': 'generate_content',
|
||||||
|
'gen_ai.provider.name': 'gcp.vertex_ai',
|
||||||
|
'gen_ai.request.model': 'gemini-pro',
|
||||||
|
'error.type': 'quota_exceeded',
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should record operation duration with server details', () => {
|
||||||
|
initializeMetricsModule(mockConfig);
|
||||||
|
mockHistogramRecordFn.mockClear();
|
||||||
|
|
||||||
|
recordGenAiClientOperationDurationModule(mockConfig, 0.95, {
|
||||||
|
'gen_ai.operation.name': 'generate_content',
|
||||||
|
'gen_ai.provider.name': 'gcp.vertex_ai',
|
||||||
|
'gen_ai.request.model': 'gemini-1.5-pro',
|
||||||
|
'gen_ai.response.model': 'gemini-1.5-pro-001',
|
||||||
|
'server.address': 'us-central1-aiplatform.googleapis.com',
|
||||||
|
'server.port': 443,
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(mockHistogramRecordFn).toHaveBeenCalledWith(0.95, {
|
||||||
|
'session.id': 'test-session-id',
|
||||||
|
'gen_ai.operation.name': 'generate_content',
|
||||||
|
'gen_ai.provider.name': 'gcp.vertex_ai',
|
||||||
|
'gen_ai.request.model': 'gemini-1.5-pro',
|
||||||
|
'gen_ai.response.model': 'gemini-1.5-pro-001',
|
||||||
|
'server.address': 'us-central1-aiplatform.googleapis.com',
|
||||||
|
'server.port': 443,
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should handle minimal required attributes', () => {
|
||||||
|
initializeMetricsModule(mockConfig);
|
||||||
|
mockHistogramRecordFn.mockClear();
|
||||||
|
|
||||||
|
recordGenAiClientOperationDurationModule(mockConfig, 2.1, {
|
||||||
|
'gen_ai.operation.name': 'generate_content',
|
||||||
|
'gen_ai.provider.name': 'gcp.gen_ai',
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(mockHistogramRecordFn).toHaveBeenCalledWith(2.1, {
|
||||||
|
'session.id': 'test-session-id',
|
||||||
|
'gen_ai.operation.name': 'generate_content',
|
||||||
|
'gen_ai.provider.name': 'gcp.gen_ai',
|
||||||
|
});
|
||||||
|
});
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
describe('Performance Monitoring Metrics', () => {
|
describe('Performance Monitoring Metrics', () => {
|
||||||
const mockConfig = {
|
const mockConfig = {
|
||||||
getSessionId: () => 'test-session-id',
|
getSessionId: () => 'test-session-id',
|
||||||
|
|||||||
@@ -9,6 +9,7 @@ import { diag, metrics, ValueType } from '@opentelemetry/api';
|
|||||||
import { SERVICE_NAME, EVENT_CHAT_COMPRESSION } from './constants.js';
|
import { SERVICE_NAME, EVENT_CHAT_COMPRESSION } from './constants.js';
|
||||||
import type { Config } from '../config/config.js';
|
import type { Config } from '../config/config.js';
|
||||||
import type { ModelRoutingEvent, ModelSlashCommandEvent } from './types.js';
|
import type { ModelRoutingEvent, ModelSlashCommandEvent } from './types.js';
|
||||||
|
import { AuthType } from '../core/contentGenerator.js';
|
||||||
|
|
||||||
const TOOL_CALL_COUNT = 'gemini_cli.tool.call.count';
|
const TOOL_CALL_COUNT = 'gemini_cli.tool.call.count';
|
||||||
const TOOL_CALL_LATENCY = 'gemini_cli.tool.call.latency';
|
const TOOL_CALL_LATENCY = 'gemini_cli.tool.call.latency';
|
||||||
@@ -26,6 +27,10 @@ const MODEL_ROUTING_FAILURE_COUNT = 'gemini_cli.model_routing.failure.count';
|
|||||||
const MODEL_SLASH_COMMAND_CALL_COUNT =
|
const MODEL_SLASH_COMMAND_CALL_COUNT =
|
||||||
'gemini_cli.slash_command.model.call_count';
|
'gemini_cli.slash_command.model.call_count';
|
||||||
|
|
||||||
|
// OpenTelemetry GenAI Semantic Convention Metrics
|
||||||
|
const GEN_AI_CLIENT_TOKEN_USAGE = 'gen_ai.client.token.usage';
|
||||||
|
const GEN_AI_CLIENT_OPERATION_DURATION = 'gen_ai.client.operation.duration';
|
||||||
|
|
||||||
// Performance Monitoring Metrics
|
// Performance Monitoring Metrics
|
||||||
const STARTUP_TIME = 'gemini_cli.startup.duration';
|
const STARTUP_TIME = 'gemini_cli.startup.duration';
|
||||||
const MEMORY_USAGE = 'gemini_cli.memory.usage';
|
const MEMORY_USAGE = 'gemini_cli.memory.usage';
|
||||||
@@ -170,6 +175,36 @@ const HISTOGRAM_DEFINITIONS = {
|
|||||||
'routing.decision_source': string;
|
'routing.decision_source': string;
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
[GEN_AI_CLIENT_TOKEN_USAGE]: {
|
||||||
|
description: 'Number of input and output tokens used.',
|
||||||
|
unit: 'token',
|
||||||
|
valueType: ValueType.INT,
|
||||||
|
assign: (h: Histogram) => (genAiClientTokenUsageHistogram = h),
|
||||||
|
attributes: {} as {
|
||||||
|
'gen_ai.operation.name': string;
|
||||||
|
'gen_ai.provider.name': string;
|
||||||
|
'gen_ai.token.type': 'input' | 'output';
|
||||||
|
'gen_ai.request.model'?: string;
|
||||||
|
'gen_ai.response.model'?: string;
|
||||||
|
'server.address'?: string;
|
||||||
|
'server.port'?: number;
|
||||||
|
},
|
||||||
|
},
|
||||||
|
[GEN_AI_CLIENT_OPERATION_DURATION]: {
|
||||||
|
description: 'GenAI operation duration.',
|
||||||
|
unit: 's',
|
||||||
|
valueType: ValueType.DOUBLE,
|
||||||
|
assign: (h: Histogram) => (genAiClientOperationDurationHistogram = h),
|
||||||
|
attributes: {} as {
|
||||||
|
'gen_ai.operation.name': string;
|
||||||
|
'gen_ai.provider.name': string;
|
||||||
|
'gen_ai.request.model'?: string;
|
||||||
|
'gen_ai.response.model'?: string;
|
||||||
|
'server.address'?: string;
|
||||||
|
'server.port'?: number;
|
||||||
|
'error.type'?: string;
|
||||||
|
},
|
||||||
|
},
|
||||||
} as const;
|
} as const;
|
||||||
|
|
||||||
const PERFORMANCE_COUNTER_DEFINITIONS = {
|
const PERFORMANCE_COUNTER_DEFINITIONS = {
|
||||||
@@ -341,6 +376,20 @@ export enum ApiRequestPhase {
|
|||||||
TOKEN_PROCESSING = 'token_processing',
|
TOKEN_PROCESSING = 'token_processing',
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export enum GenAiOperationName {
|
||||||
|
GENERATE_CONTENT = 'generate_content',
|
||||||
|
}
|
||||||
|
|
||||||
|
export enum GenAiProviderName {
|
||||||
|
GCP_GEN_AI = 'gcp.gen_ai',
|
||||||
|
GCP_VERTEX_AI = 'gcp.vertex_ai',
|
||||||
|
}
|
||||||
|
|
||||||
|
export enum GenAiTokenType {
|
||||||
|
INPUT = 'input',
|
||||||
|
OUTPUT = 'output',
|
||||||
|
}
|
||||||
|
|
||||||
let cliMeter: Meter | undefined;
|
let cliMeter: Meter | undefined;
|
||||||
let toolCallCounter: Counter | undefined;
|
let toolCallCounter: Counter | undefined;
|
||||||
let toolCallLatencyHistogram: Histogram | undefined;
|
let toolCallLatencyHistogram: Histogram | undefined;
|
||||||
@@ -357,6 +406,10 @@ let modelRoutingLatencyHistogram: Histogram | undefined;
|
|||||||
let modelRoutingFailureCounter: Counter | undefined;
|
let modelRoutingFailureCounter: Counter | undefined;
|
||||||
let modelSlashCommandCallCounter: Counter | undefined;
|
let modelSlashCommandCallCounter: Counter | undefined;
|
||||||
|
|
||||||
|
// OpenTelemetry GenAI Semantic Convention Metrics
|
||||||
|
let genAiClientTokenUsageHistogram: Histogram | undefined;
|
||||||
|
let genAiClientOperationDurationHistogram: Histogram | undefined;
|
||||||
|
|
||||||
// Performance Monitoring Metrics
|
// Performance Monitoring Metrics
|
||||||
let startupTimeHistogram: Histogram | undefined;
|
let startupTimeHistogram: Histogram | undefined;
|
||||||
let memoryUsageGauge: Histogram | undefined; // Using Histogram until ObservableGauge is available
|
let memoryUsageGauge: Histogram | undefined; // Using Histogram until ObservableGauge is available
|
||||||
@@ -437,7 +490,7 @@ export function recordToolCallMetrics(
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
export function recordTokenUsageMetrics(
|
export function recordCustomTokenUsageMetrics(
|
||||||
config: Config,
|
config: Config,
|
||||||
tokenCount: number,
|
tokenCount: number,
|
||||||
attributes: MetricDefinitions[typeof TOKEN_USAGE]['attributes'],
|
attributes: MetricDefinitions[typeof TOKEN_USAGE]['attributes'],
|
||||||
@@ -449,7 +502,7 @@ export function recordTokenUsageMetrics(
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
export function recordApiResponseMetrics(
|
export function recordCustomApiResponseMetrics(
|
||||||
config: Config,
|
config: Config,
|
||||||
durationMs: number,
|
durationMs: number,
|
||||||
attributes: MetricDefinitions[typeof API_REQUEST_COUNT]['attributes'],
|
attributes: MetricDefinitions[typeof API_REQUEST_COUNT]['attributes'],
|
||||||
@@ -572,6 +625,81 @@ export function recordModelRoutingMetrics(
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// OpenTelemetry GenAI Semantic Convention Recording Functions
|
||||||
|
|
||||||
|
export function recordGenAiClientTokenUsage(
|
||||||
|
config: Config,
|
||||||
|
tokenCount: number,
|
||||||
|
attributes: MetricDefinitions[typeof GEN_AI_CLIENT_TOKEN_USAGE]['attributes'],
|
||||||
|
): void {
|
||||||
|
if (!genAiClientTokenUsageHistogram || !isMetricsInitialized) return;
|
||||||
|
|
||||||
|
const metricAttributes: Attributes = {
|
||||||
|
...baseMetricDefinition.getCommonAttributes(config),
|
||||||
|
...attributes,
|
||||||
|
};
|
||||||
|
|
||||||
|
genAiClientTokenUsageHistogram.record(tokenCount, metricAttributes);
|
||||||
|
}
|
||||||
|
|
||||||
|
export function recordGenAiClientOperationDuration(
|
||||||
|
config: Config,
|
||||||
|
durationSeconds: number,
|
||||||
|
attributes: MetricDefinitions[typeof GEN_AI_CLIENT_OPERATION_DURATION]['attributes'],
|
||||||
|
): void {
|
||||||
|
if (!genAiClientOperationDurationHistogram || !isMetricsInitialized) return;
|
||||||
|
|
||||||
|
const metricAttributes: Attributes = {
|
||||||
|
...baseMetricDefinition.getCommonAttributes(config),
|
||||||
|
...attributes,
|
||||||
|
};
|
||||||
|
|
||||||
|
genAiClientOperationDurationHistogram.record(
|
||||||
|
durationSeconds,
|
||||||
|
metricAttributes,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
export function getConventionAttributes(event: {
|
||||||
|
model: string;
|
||||||
|
auth_type?: string;
|
||||||
|
}): {
|
||||||
|
'gen_ai.operation.name': GenAiOperationName;
|
||||||
|
'gen_ai.provider.name': GenAiProviderName;
|
||||||
|
'gen_ai.request.model': string;
|
||||||
|
'gen_ai.response.model': string;
|
||||||
|
} {
|
||||||
|
const operationName = getGenAiOperationName();
|
||||||
|
const provider = getGenAiProvider(event.auth_type);
|
||||||
|
|
||||||
|
return {
|
||||||
|
'gen_ai.operation.name': operationName,
|
||||||
|
'gen_ai.provider.name': provider,
|
||||||
|
'gen_ai.request.model': event.model,
|
||||||
|
'gen_ai.response.model': event.model,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Maps authentication type to GenAI provider name following OpenTelemetry conventions
|
||||||
|
*/
|
||||||
|
function getGenAiProvider(authType?: string): GenAiProviderName {
|
||||||
|
switch (authType) {
|
||||||
|
case AuthType.USE_VERTEX_AI:
|
||||||
|
case AuthType.CLOUD_SHELL:
|
||||||
|
case AuthType.LOGIN_WITH_GOOGLE:
|
||||||
|
return GenAiProviderName.GCP_VERTEX_AI;
|
||||||
|
case AuthType.USE_GEMINI:
|
||||||
|
default:
|
||||||
|
return GenAiProviderName.GCP_GEN_AI;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function getGenAiOperationName(): GenAiOperationName {
|
||||||
|
return GenAiOperationName.GENERATE_CONTENT;
|
||||||
|
}
|
||||||
|
|
||||||
// Performance Monitoring Functions
|
// Performance Monitoring Functions
|
||||||
|
|
||||||
export function initializePerformanceMonitoring(config: Config): void {
|
export function initializePerformanceMonitoring(config: Config): void {
|
||||||
@@ -767,3 +895,71 @@ export function recordBaselineComparison(
|
|||||||
export function isPerformanceMonitoringActive(): boolean {
|
export function isPerformanceMonitoringActive(): boolean {
|
||||||
return isPerformanceMonitoringEnabled && isMetricsInitialized;
|
return isPerformanceMonitoringEnabled && isMetricsInitialized;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Token usage recording that emits both custom and convention metrics.
|
||||||
|
*/
|
||||||
|
export function recordTokenUsageMetrics(
|
||||||
|
config: Config,
|
||||||
|
tokenCount: number,
|
||||||
|
attributes: {
|
||||||
|
model: string;
|
||||||
|
type: 'input' | 'output' | 'thought' | 'cache' | 'tool';
|
||||||
|
genAiAttributes?: {
|
||||||
|
'gen_ai.operation.name': string;
|
||||||
|
'gen_ai.provider.name': string;
|
||||||
|
'gen_ai.request.model'?: string;
|
||||||
|
'gen_ai.response.model'?: string;
|
||||||
|
'server.address'?: string;
|
||||||
|
'server.port'?: number;
|
||||||
|
};
|
||||||
|
},
|
||||||
|
): void {
|
||||||
|
recordCustomTokenUsageMetrics(config, tokenCount, {
|
||||||
|
model: attributes.model,
|
||||||
|
type: attributes.type,
|
||||||
|
});
|
||||||
|
|
||||||
|
if (
|
||||||
|
(attributes.type === 'input' || attributes.type === 'output') &&
|
||||||
|
attributes.genAiAttributes
|
||||||
|
) {
|
||||||
|
recordGenAiClientTokenUsage(config, tokenCount, {
|
||||||
|
...attributes.genAiAttributes,
|
||||||
|
'gen_ai.token.type': attributes.type,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Operation latency recording that emits both custom and convention metrics.
|
||||||
|
*/
|
||||||
|
export function recordApiResponseMetrics(
|
||||||
|
config: Config,
|
||||||
|
durationMs: number,
|
||||||
|
attributes: {
|
||||||
|
model: string;
|
||||||
|
status_code?: number | string;
|
||||||
|
genAiAttributes?: {
|
||||||
|
'gen_ai.operation.name': string;
|
||||||
|
'gen_ai.provider.name': string;
|
||||||
|
'gen_ai.request.model'?: string;
|
||||||
|
'gen_ai.response.model'?: string;
|
||||||
|
'server.address'?: string;
|
||||||
|
'server.port'?: number;
|
||||||
|
'error.type'?: string;
|
||||||
|
};
|
||||||
|
},
|
||||||
|
): void {
|
||||||
|
recordCustomApiResponseMetrics(config, durationMs, {
|
||||||
|
model: attributes.model,
|
||||||
|
status_code: attributes.status_code,
|
||||||
|
});
|
||||||
|
|
||||||
|
if (attributes.genAiAttributes) {
|
||||||
|
const durationSeconds = durationMs / 1000;
|
||||||
|
recordGenAiClientOperationDuration(config, durationSeconds, {
|
||||||
|
...attributes.genAiAttributes,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user