feat(telemetry): Add telemetry and metrics for model routing (#8518)

2026-04-26 04:54:25 -07:00 · 2025-09-16 16:53:58 -04:00
parent 459de383b2
commit a0079785af
16 changed files with 507 additions and 20 deletions
@@ -289,6 +289,7 @@ describe('Gemini Client (client.ts)', () => {
      getChatCompression: vi.fn().mockReturnValue(undefined),
      getSkipNextSpeakerCheck: vi.fn().mockReturnValue(false),
      getUseSmartEdit: vi.fn().mockReturnValue(false),
+      getUseModelRouter: vi.fn().mockReturnValue(false),
      getProjectRoot: vi.fn().mockReturnValue('/test/project/root'),
      storage: {
        getProjectTempDir: vi.fn().mockReturnValue('/test/temp'),
@@ -188,6 +188,7 @@ describe('CoreToolScheduler', () => {
      getTruncateToolOutputLines: () => DEFAULT_TRUNCATE_TOOL_OUTPUT_LINES,
      getToolRegistry: () => mockToolRegistry,
      getUseSmartEdit: () => false,
+      getUseModelRouter: () => false,
      getGeminiClient: () => null, // No client needed for these tests
    } as unknown as Config;

@@ -223,6 +224,7 @@ describe('CoreToolScheduler', () => {
      const mockConfig = {
        getToolRegistry: () => mockToolRegistry,
        getUseSmartEdit: () => false,
+        getUseModelRouter: () => false,
        getGeminiClient: () => null, // No client needed for these tests
      } as unknown as Config;
      const mockToolRegistry = {
@@ -299,6 +301,7 @@ describe('CoreToolScheduler with payload', () => {
      getTruncateToolOutputLines: () => DEFAULT_TRUNCATE_TOOL_OUTPUT_LINES,
      getToolRegistry: () => mockToolRegistry,
      getUseSmartEdit: () => false,
+      getUseModelRouter: () => false,
      getGeminiClient: () => null, // No client needed for these tests
    } as unknown as Config;

@@ -618,6 +621,7 @@ describe('CoreToolScheduler edit cancellation', () => {
      },
      getToolRegistry: () => mockToolRegistry,
      getUseSmartEdit: () => false,
+      getUseModelRouter: () => false,
      getGeminiClient: () => null, // No client needed for these tests
    } as unknown as Config;

@@ -721,6 +725,7 @@ describe('CoreToolScheduler YOLO mode', () => {
        DEFAULT_TRUNCATE_TOOL_OUTPUT_THRESHOLD,
      getTruncateToolOutputLines: () => DEFAULT_TRUNCATE_TOOL_OUTPUT_LINES,
      getUseSmartEdit: () => false,
+      getUseModelRouter: () => false,
      getGeminiClient: () => null, // No client needed for these tests
    } as unknown as Config;

@@ -827,6 +832,7 @@ describe('CoreToolScheduler request queueing', () => {
      getTruncateToolOutputLines: () => DEFAULT_TRUNCATE_TOOL_OUTPUT_LINES,
      getToolRegistry: () => mockToolRegistry,
      getUseSmartEdit: () => false,
+      getUseModelRouter: () => false,
      getGeminiClient: () => null, // No client needed for these tests
    } as unknown as Config;

@@ -957,6 +963,7 @@ describe('CoreToolScheduler request queueing', () => {
        DEFAULT_TRUNCATE_TOOL_OUTPUT_THRESHOLD,
      getTruncateToolOutputLines: () => DEFAULT_TRUNCATE_TOOL_OUTPUT_LINES,
      getUseSmartEdit: () => false,
+      getUseModelRouter: () => false,
      getGeminiClient: () => null, // No client needed for these tests
    } as unknown as Config;

@@ -1054,6 +1061,7 @@ describe('CoreToolScheduler request queueing', () => {
      getTruncateToolOutputLines: () => DEFAULT_TRUNCATE_TOOL_OUTPUT_LINES,
      getToolRegistry: () => mockToolRegistry,
      getUseSmartEdit: () => false,
+      getUseModelRouter: () => false,
      getGeminiClient: () => null, // No client needed for these tests
    } as unknown as Config;

@@ -1125,6 +1133,7 @@ describe('CoreToolScheduler request queueing', () => {
        DEFAULT_TRUNCATE_TOOL_OUTPUT_THRESHOLD,
      getTruncateToolOutputLines: () => DEFAULT_TRUNCATE_TOOL_OUTPUT_LINES,
      getUseSmartEdit: () => false,
+      getUseModelRouter: () => false,
      getGeminiClient: () => null, // No client needed for these tests
    } as unknown as Config;

@@ -57,6 +57,7 @@ describe('executeToolCall', () => {
        DEFAULT_TRUNCATE_TOOL_OUTPUT_THRESHOLD,
      getTruncateToolOutputLines: () => DEFAULT_TRUNCATE_TOOL_OUTPUT_LINES,
      getUseSmartEdit: () => false,
+      getUseModelRouter: () => false,
      getGeminiClient: () => null, // No client needed for these tests
    } as unknown as Config;

@@ -14,6 +14,8 @@ import { CompositeStrategy } from './strategies/compositeStrategy.js';
 import { FallbackStrategy } from './strategies/fallbackStrategy.js';
 import { OverrideStrategy } from './strategies/overrideStrategy.js';
 import { ClassifierStrategy } from './strategies/classifierStrategy.js';
+import { logModelRouting } from '../telemetry/loggers.js';
+import { ModelRoutingEvent } from '../telemetry/types.js';

 vi.mock('../config/config.js');
 vi.mock('../core/baseLlmClient.js');
@@ -22,6 +24,8 @@ vi.mock('./strategies/compositeStrategy.js');
 vi.mock('./strategies/fallbackStrategy.js');
 vi.mock('./strategies/overrideStrategy.js');
 vi.mock('./strategies/classifierStrategy.js');
+vi.mock('../telemetry/loggers.js');
+vi.mock('../telemetry/types.js');

 describe('ModelRouterService', () => {
  let service: ModelRouterService;
@@ -78,15 +82,16 @@ describe('ModelRouterService', () => {
  });

  describe('route()', () => {
+    const strategyDecision: RoutingDecision = {
+      model: 'strategy-chosen-model',
+      metadata: {
+        source: 'test-router/fallback',
+        latencyMs: 10,
+        reasoning: 'Strategy reasoning',
+      },
+    };
+
    it('should delegate routing to the composite strategy', async () => {
-      const strategyDecision: RoutingDecision = {
-        model: 'strategy-chosen-model',
-        metadata: {
-          source: 'test-router/fallback',
-          latencyMs: 10,
-          reasoning: 'Strategy reasoning',
-        },
-      };
      const strategySpy = vi
        .spyOn(mockCompositeStrategy, 'route')
        .mockResolvedValue(strategyDecision);
@@ -100,5 +105,47 @@ describe('ModelRouterService', () => {
      );
      expect(decision).toEqual(strategyDecision);
    });
+
+    it('should log a telemetry event on a successful decision', async () => {
+      vi.spyOn(mockCompositeStrategy, 'route').mockResolvedValue(
+        strategyDecision,
+      );
+
+      await service.route(mockContext);
+
+      expect(ModelRoutingEvent).toHaveBeenCalledWith(
+        'strategy-chosen-model',
+        'test-router/fallback',
+        10,
+        'Strategy reasoning',
+        false,
+        undefined,
+      );
+      expect(logModelRouting).toHaveBeenCalledWith(
+        mockConfig,
+        expect.any(ModelRoutingEvent),
+      );
+    });
+
+    it('should log a telemetry event and re-throw on a failed decision', async () => {
+      const testError = new Error('Strategy failed');
+      vi.spyOn(mockCompositeStrategy, 'route').mockRejectedValue(testError);
+      vi.spyOn(mockConfig, 'getModel').mockReturnValue('default-model');
+
+      await expect(service.route(mockContext)).rejects.toThrow(testError);
+
+      expect(ModelRoutingEvent).toHaveBeenCalledWith(
+        'default-model',
+        'router-exception',
+        expect.any(Number),
+        'An exception occurred during routing.',
+        true,
+        'Strategy failed',
+      );
+      expect(logModelRouting).toHaveBeenCalledWith(
+        mockConfig,
+        expect.any(ModelRoutingEvent),
+      );
+    });
  });
 });
@@ -16,6 +16,9 @@ import { CompositeStrategy } from './strategies/compositeStrategy.js';
 import { FallbackStrategy } from './strategies/fallbackStrategy.js';
 import { OverrideStrategy } from './strategies/overrideStrategy.js';

+import { logModelRouting } from '../telemetry/loggers.js';
+import { ModelRoutingEvent } from '../telemetry/types.js';
+
 /**
 * A centralized service for making model routing decisions.
 */
@@ -49,12 +52,55 @@ export class ModelRouterService {
   * @returns A promise that resolves to a RoutingDecision.
   */
  async route(context: RoutingContext): Promise<RoutingDecision> {
-    const decision = await this.strategy.route(
-      context,
-      this.config,
-      this.config.getBaseLlmClient(),
-    );
+    const startTime = Date.now();
+    let decision: RoutingDecision;

-    return decision;
+    try {
+      decision = await this.strategy.route(
+        context,
+        this.config,
+        this.config.getBaseLlmClient(),
+      );
+
+      const event = new ModelRoutingEvent(
+        decision.model,
+        decision.metadata.source,
+        decision.metadata.latencyMs,
+        decision.metadata.reasoning,
+        false, // failed
+        undefined, // error_message
+      );
+      logModelRouting(this.config, event);
+
+      return decision;
+    } catch (e) {
+      const failed = true;
+      const error_message = e instanceof Error ? e.message : String(e);
+      // Create a fallback decision for logging purposes
+      // We do not actually route here. This should never happen so we should
+      // fail loudly to catch any issues where this happens.
+      decision = {
+        model: this.config.getModel(),
+        metadata: {
+          source: 'router-exception',
+          latencyMs: Date.now() - startTime,
+          reasoning: 'An exception occurred during routing.',
+          error: error_message,
+        },
+      };
+
+      const event = new ModelRoutingEvent(
+        decision.model,
+        decision.metadata.source,
+        decision.metadata.latencyMs,
+        decision.metadata.reasoning,
+        failed,
+        error_message,
+      );
+
+      logModelRouting(this.config, event);
+
+      throw e;
+    }
  }
 }
@@ -23,7 +23,11 @@ import { EventMetadataKey } from './event-metadata-key.js';
 import { makeFakeConfig } from '../../test-utils/config.js';
 import { http, HttpResponse } from 'msw';
 import { server } from '../../mocks/msw.js';
-import { UserPromptEvent, makeChatCompressionEvent } from '../types.js';
+import {
+  UserPromptEvent,
+  makeChatCompressionEvent,
+  ModelRoutingEvent,
+} from '../types.js';
 import { GIT_COMMIT_INFO, CLI_VERSION } from '../../generated/git-commit.js';
 import { UserAccountManager } from '../../utils/userAccountManager.js';
 import { InstallationManager } from '../../utils/installationManager.js';
@@ -209,7 +213,9 @@ describe('ClearcutLogger', () => {
      const cli_version = CLI_VERSION;
      const git_commit_hash = GIT_COMMIT_INFO;
      const prompt_id = 'my-prompt-123';
-      const user_settings = safeJsonStringify([{ smart_edit_enabled: true }]);
+      const user_settings = safeJsonStringify([
+        { smart_edit_enabled: true, model_router_enabled: false },
+      ]);

      // Setup logger with expected values
      const { logger, loggerConfig } = setup({
@@ -293,11 +299,13 @@ describe('ClearcutLogger', () => {
      });
    });

-    it('logs the value of config.useSmartEdit', () => {
-      const user_settings = safeJsonStringify([{ smart_edit_enabled: true }]);
+    it('logs the value of config.useSmartEdit and config.useModelRouter', () => {
+      const user_settings = safeJsonStringify([
+        { smart_edit_enabled: true, model_router_enabled: true },
+      ]);

      const { logger } = setup({
-        config: { useSmartEdit: true },
+        config: { useSmartEdit: true, useModelRouter: true },
      });

      vi.stubEnv('TERM_PROGRAM', 'vscode');
@@ -595,4 +603,78 @@ describe('ClearcutLogger', () => {
      expect(firstRequeuedEvent.event_id).toBe('failed_5');
    });
  });
+
+  describe('logModelRoutingEvent', () => {
+    it('logs a successful routing event', () => {
+      const { logger } = setup();
+      const event = new ModelRoutingEvent(
+        'gemini-pro',
+        'default-strategy',
+        123,
+        'some reasoning',
+        false,
+        undefined,
+      );
+
+      logger?.logModelRoutingEvent(event);
+
+      const events = getEvents(logger!);
+      expect(events.length).toBe(1);
+      expect(events[0]).toHaveEventName(EventNames.MODEL_ROUTING);
+      expect(events[0]).toHaveMetadataValue([
+        EventMetadataKey.GEMINI_CLI_ROUTING_DECISION,
+        'gemini-pro',
+      ]);
+      expect(events[0]).toHaveMetadataValue([
+        EventMetadataKey.GEMINI_CLI_ROUTING_DECISION_SOURCE,
+        'default-strategy',
+      ]);
+      expect(events[0]).toHaveMetadataValue([
+        EventMetadataKey.GEMINI_CLI_ROUTING_LATENCY_MS,
+        '123',
+      ]);
+      expect(events[0]).toHaveMetadataValue([
+        EventMetadataKey.GEMINI_CLI_ROUTING_FAILURE,
+        'false',
+      ]);
+    });
+
+    it('logs a failed routing event with a reason', () => {
+      const { logger } = setup();
+      const event = new ModelRoutingEvent(
+        'gemini-pro',
+        'router-exception',
+        234,
+        'some reasoning',
+        true,
+        'Something went wrong',
+      );
+
+      logger?.logModelRoutingEvent(event);
+
+      const events = getEvents(logger!);
+      expect(events.length).toBe(1);
+      expect(events[0]).toHaveEventName(EventNames.MODEL_ROUTING);
+      expect(events[0]).toHaveMetadataValue([
+        EventMetadataKey.GEMINI_CLI_ROUTING_DECISION,
+        'gemini-pro',
+      ]);
+      expect(events[0]).toHaveMetadataValue([
+        EventMetadataKey.GEMINI_CLI_ROUTING_DECISION_SOURCE,
+        'router-exception',
+      ]);
+      expect(events[0]).toHaveMetadataValue([
+        EventMetadataKey.GEMINI_CLI_ROUTING_LATENCY_MS,
+        '234',
+      ]);
+      expect(events[0]).toHaveMetadataValue([
+        EventMetadataKey.GEMINI_CLI_ROUTING_FAILURE,
+        'true',
+      ]);
+      expect(events[0]).toHaveMetadataValue([
+        EventMetadataKey.GEMINI_CLI_ROUTING_FAILURE_REASON,
+        'Something went wrong',
+      ]);
+    });
+  });
 });
@@ -27,6 +27,7 @@ import type {
  ExtensionInstallEvent,
  ToolOutputTruncatedEvent,
  ExtensionUninstallEvent,
+  ModelRoutingEvent,
 } from '../types.js';
 import { EventMetadataKey } from './event-metadata-key.js';
 import type { Config } from '../../config/config.js';
@@ -63,6 +64,7 @@ export enum EventNames {
  EXTENSION_INSTALL = 'extension_install',
  EXTENSION_UNINSTALL = 'extension_uninstall',
  TOOL_OUTPUT_TRUNCATED = 'tool_output_truncated',
+  MODEL_ROUTING = 'model_routing',
 }

 export interface LogResponse {
@@ -926,6 +928,37 @@ export class ClearcutLogger {
    this.flushIfNeeded();
  }

+  logModelRoutingEvent(event: ModelRoutingEvent): void {
+    const data: EventValue[] = [
+      {
+        gemini_cli_key: EventMetadataKey.GEMINI_CLI_ROUTING_DECISION,
+        value: event.decision_model,
+      },
+      {
+        gemini_cli_key: EventMetadataKey.GEMINI_CLI_ROUTING_DECISION_SOURCE,
+        value: event.decision_source,
+      },
+      {
+        gemini_cli_key: EventMetadataKey.GEMINI_CLI_ROUTING_LATENCY_MS,
+        value: event.routing_latency_ms.toString(),
+      },
+      {
+        gemini_cli_key: EventMetadataKey.GEMINI_CLI_ROUTING_FAILURE,
+        value: event.failed.toString(),
+      },
+    ];
+
+    if (event.error_message) {
+      data.push({
+        gemini_cli_key: EventMetadataKey.GEMINI_CLI_ROUTING_FAILURE_REASON,
+        value: event.error_message,
+      });
+    }
+
+    this.enqueueLogEvent(this.createLogEvent(EventNames.MODEL_ROUTING, data));
+    this.flushIfNeeded();
+  }
+
  /**
   * Adds default fields to data, and returns a new data array.  This fields
   * should exist on all log events.
@@ -975,7 +1008,10 @@ export class ClearcutLogger {
      {
        gemini_cli_key: EventMetadataKey.GEMINI_CLI_USER_SETTINGS,
        value: safeJsonStringify([
-          { smart_edit_enabled: this.config?.getUseSmartEdit() ?? false },
+          {
+            smart_edit_enabled: this.config?.getUseSmartEdit() ?? false,
+            model_router_enabled: this.config?.getUseModelRouter() ?? false,
+          },
        ]),
      },
    ];
@@ -368,4 +368,25 @@ export enum EventMetadataKey {

  // Logs the number of lines the tool output was truncated to.
  GEMINI_CLI_TOOL_OUTPUT_TRUNCATED_LINES = 92,
+
+  // ==========================================================================
+  // Model Router Event Keys
+  // ==========================================================================
+
+  // Logs the outcome of a model routing decision (e.g., which route/model was
+  // selected).
+  GEMINI_CLI_ROUTING_DECISION = 97,
+
+  // Logs an event when the model router fails to make a decision or the chosen
+  // route fails.
+  GEMINI_CLI_ROUTING_FAILURE = 98,
+
+  // Logs the latency in milliseconds for the router to make a decision.
+  GEMINI_CLI_ROUTING_LATENCY_MS = 99,
+
+  // Logs a specific reason for a routing failure.
+  GEMINI_CLI_ROUTING_FAILURE_REASON = 100,
+
+  // Logs the source of the decision.
+  GEMINI_CLI_ROUTING_DECISION_SOURCE = 101,
 }
@@ -37,3 +37,7 @@ export const METRIC_INVALID_CHUNK_COUNT = 'gemini_cli.chat.invalid_chunk.count';
 export const METRIC_CONTENT_RETRY_COUNT = 'gemini_cli.chat.content_retry.count';
 export const METRIC_CONTENT_RETRY_FAILURE_COUNT =
  'gemini_cli.chat.content_retry_failure.count';
+export const EVENT_MODEL_ROUTING = 'gemini_cli.model_routing';
+export const METRIC_MODEL_ROUTING_LATENCY = 'gemini_cli.model_routing.latency';
+export const METRIC_MODEL_ROUTING_FAILURE_COUNT =
+  'gemini_cli.model_routing.failure.count';
@@ -32,6 +32,7 @@ import {
  EVENT_MALFORMED_JSON_RESPONSE,
  EVENT_FILE_OPERATION,
  EVENT_RIPGREP_FALLBACK,
+  EVENT_MODEL_ROUTING,
 } from './constants.js';
 import {
  logApiRequest,
@@ -45,6 +46,7 @@ import {
  logFileOperation,
  logRipgrepFallback,
  logToolOutputTruncated,
+  logModelRouting,
 } from './loggers.js';
 import { ToolCallDecision } from './tool-call-decision.js';
 import {
@@ -59,6 +61,7 @@ import {
  makeChatCompressionEvent,
  FileOperationEvent,
  ToolOutputTruncatedEvent,
+  ModelRoutingEvent,
 } from './types.js';
 import * as metrics from './metrics.js';
 import { FileOperation } from './metrics.js';
@@ -1041,4 +1044,68 @@ describe('loggers', () => {
      });
    });
  });
+
+  describe('logModelRouting', () => {
+    const mockConfig = {
+      getSessionId: () => 'test-session-id',
+      getUsageStatisticsEnabled: () => true,
+    } as unknown as Config;
+
+    beforeEach(() => {
+      vi.spyOn(ClearcutLogger.prototype, 'logModelRoutingEvent');
+      vi.spyOn(metrics, 'recordModelRoutingMetrics');
+    });
+
+    it('should log the event to Clearcut and OTEL, and record metrics', () => {
+      const event = new ModelRoutingEvent(
+        'gemini-pro',
+        'default',
+        100,
+        'test-reason',
+        false,
+        undefined,
+      );
+
+      logModelRouting(mockConfig, event);
+
+      expect(
+        ClearcutLogger.prototype.logModelRoutingEvent,
+      ).toHaveBeenCalledWith(event);
+
+      expect(mockLogger.emit).toHaveBeenCalledWith({
+        body: 'Model routing decision. Model: gemini-pro, Source: default',
+        attributes: {
+          'session.id': 'test-session-id',
+          'user.email': 'test-user@example.com',
+          ...event,
+          'event.name': EVENT_MODEL_ROUTING,
+        },
+      });
+
+      expect(metrics.recordModelRoutingMetrics).toHaveBeenCalledWith(
+        mockConfig,
+        event,
+      );
+    });
+
+    it('should only log to Clearcut if OTEL SDK is not initialized', () => {
+      vi.spyOn(sdk, 'isTelemetrySdkInitialized').mockReturnValue(false);
+      const event = new ModelRoutingEvent(
+        'gemini-pro',
+        'default',
+        100,
+        'test-reason',
+        false,
+        undefined,
+      );
+
+      logModelRouting(mockConfig, event);
+
+      expect(
+        ClearcutLogger.prototype.logModelRoutingEvent,
+      ).toHaveBeenCalledWith(event);
+      expect(mockLogger.emit).not.toHaveBeenCalled();
+      expect(metrics.recordModelRoutingMetrics).not.toHaveBeenCalled();
+    });
+  });
 });
@@ -28,6 +28,7 @@ import {
  EVENT_CONTENT_RETRY_FAILURE,
  EVENT_FILE_OPERATION,
  EVENT_RIPGREP_FALLBACK,
+  EVENT_MODEL_ROUTING,
 } from './constants.js';
 import type {
  ApiErrorEvent,
@@ -52,6 +53,7 @@ import type {
  ContentRetryFailureEvent,
  RipgrepFallbackEvent,
  ToolOutputTruncatedEvent,
+  ModelRoutingEvent,
 } from './types.js';
 import {
  recordApiErrorMetrics,
@@ -63,6 +65,7 @@ import {
  recordInvalidChunk,
  recordContentRetry,
  recordContentRetryFailure,
+  recordModelRoutingMetrics,
 } from './metrics.js';
 import { isTelemetrySdkInitialized } from './sdk.js';
 import type { UiEvent } from './uiTelemetry.js';
@@ -666,3 +669,25 @@ export function logContentRetryFailure(
  logger.emit(logRecord);
  recordContentRetryFailure(config);
 }
+
+export function logModelRouting(
+  config: Config,
+  event: ModelRoutingEvent,
+): void {
+  ClearcutLogger.getInstance(config)?.logModelRoutingEvent(event);
+  if (!isTelemetrySdkInitialized()) return;
+
+  const attributes: LogAttributes = {
+    ...getCommonAttributes(config),
+    ...event,
+    'event.name': EVENT_MODEL_ROUTING,
+  };
+
+  const logger = logs.getLogger(SERVICE_NAME);
+  const logRecord: LogRecord = {
+    body: `Model routing decision. Model: ${event.decision_model}, Source: ${event.decision_source}`,
+    attributes,
+  };
+  logger.emit(logRecord);
+  recordModelRoutingMetrics(config, event);
+}
@@ -15,6 +15,7 @@ import type {
 import type { Config } from '../config/config.js';
 import { FileOperation } from './metrics.js';
 import { makeFakeConfig } from '../test-utils/config.js';
+import { ModelRoutingEvent } from './types.js';

 const mockCounterAddFn: Mock<
  (value: number, attributes?: Attributes, context?: Context) => void
@@ -63,6 +64,7 @@ describe('Telemetry Metrics', () => {
  let recordTokenUsageMetricsModule: typeof import('./metrics.js').recordTokenUsageMetrics;
  let recordFileOperationMetricModule: typeof import('./metrics.js').recordFileOperationMetric;
  let recordChatCompressionMetricsModule: typeof import('./metrics.js').recordChatCompressionMetrics;
+  let recordModelRoutingMetricsModule: typeof import('./metrics.js').recordModelRoutingMetrics;

  beforeEach(async () => {
    vi.resetModules();
@@ -78,6 +80,7 @@ describe('Telemetry Metrics', () => {
    recordFileOperationMetricModule = metricsJsModule.recordFileOperationMetric;
    recordChatCompressionMetricsModule =
      metricsJsModule.recordChatCompressionMetrics;
+    recordModelRoutingMetricsModule = metricsJsModule.recordModelRoutingMetrics;

    const otelApiModule = await import('@opentelemetry/api');

@@ -316,4 +319,71 @@ describe('Telemetry Metrics', () => {
      });
    });
  });
+
+  describe('recordModelRoutingMetrics', () => {
+    const mockConfig = {
+      getSessionId: () => 'test-session-id',
+    } as unknown as Config;
+
+    it('should not record metrics if not initialized', () => {
+      const event = new ModelRoutingEvent(
+        'gemini-pro',
+        'default',
+        100,
+        'test-reason',
+        false,
+        undefined,
+      );
+      recordModelRoutingMetricsModule(mockConfig, event);
+      expect(mockHistogramRecordFn).not.toHaveBeenCalled();
+      expect(mockCounterAddFn).not.toHaveBeenCalled();
+    });
+
+    it('should record latency for a successful routing decision', () => {
+      initializeMetricsModule(mockConfig);
+      const event = new ModelRoutingEvent(
+        'gemini-pro',
+        'default',
+        150,
+        'test-reason',
+        false,
+        undefined,
+      );
+      recordModelRoutingMetricsModule(mockConfig, event);
+
+      expect(mockHistogramRecordFn).toHaveBeenCalledWith(150, {
+        'session.id': 'test-session-id',
+        'routing.decision_model': 'gemini-pro',
+        'routing.decision_source': 'default',
+      });
+      // The session counter is called once on init
+      expect(mockCounterAddFn).toHaveBeenCalledTimes(1);
+    });
+
+    it('should record latency and failure for a failed routing decision', () => {
+      initializeMetricsModule(mockConfig);
+      const event = new ModelRoutingEvent(
+        'gemini-pro',
+        'classifier',
+        200,
+        'test-reason',
+        true,
+        'test-error',
+      );
+      recordModelRoutingMetricsModule(mockConfig, event);
+
+      expect(mockHistogramRecordFn).toHaveBeenCalledWith(200, {
+        'session.id': 'test-session-id',
+        'routing.decision_model': 'gemini-pro',
+        'routing.decision_source': 'classifier',
+      });
+
+      expect(mockCounterAddFn).toHaveBeenCalledTimes(2);
+      expect(mockCounterAddFn).toHaveBeenNthCalledWith(2, 1, {
+        'session.id': 'test-session-id',
+        'routing.decision_source': 'classifier',
+        'routing.error_message': 'test-error',
+      });
+    });
+  });
 });
@@ -19,8 +19,11 @@ import {
  METRIC_INVALID_CHUNK_COUNT,
  METRIC_CONTENT_RETRY_COUNT,
  METRIC_CONTENT_RETRY_FAILURE_COUNT,
+  METRIC_MODEL_ROUTING_LATENCY,
+  METRIC_MODEL_ROUTING_FAILURE_COUNT,
 } from './constants.js';
 import type { Config } from '../config/config.js';
+import type { ModelRoutingEvent } from './types.js';

 export enum FileOperation {
  CREATE = 'create',
@@ -39,6 +42,8 @@ let chatCompressionCounter: Counter | undefined;
 let invalidChunkCounter: Counter | undefined;
 let contentRetryCounter: Counter | undefined;
 let contentRetryFailureCounter: Counter | undefined;
+let modelRoutingLatencyHistogram: Histogram | undefined;
+let modelRoutingFailureCounter: Counter | undefined;
 let isMetricsInitialized = false;

 function getCommonAttributes(config: Config): Attributes {
@@ -110,6 +115,21 @@ export function initializeMetrics(config: Config): void {
      valueType: ValueType.INT,
    },
  );
+  modelRoutingLatencyHistogram = meter.createHistogram(
+    METRIC_MODEL_ROUTING_LATENCY,
+    {
+      description: 'Latency of model routing decisions in milliseconds.',
+      unit: 'ms',
+      valueType: ValueType.INT,
+    },
+  );
+  modelRoutingFailureCounter = meter.createCounter(
+    METRIC_MODEL_ROUTING_FAILURE_COUNT,
+    {
+      description: 'Counts model routing failures.',
+      valueType: ValueType.INT,
+    },
+  );

  const sessionCounter = meter.createCounter(METRIC_SESSION_COUNT, {
    description: 'Count of CLI sessions started.',
@@ -266,3 +286,29 @@ export function recordContentRetryFailure(config: Config): void {
  if (!contentRetryFailureCounter || !isMetricsInitialized) return;
  contentRetryFailureCounter.add(1, getCommonAttributes(config));
 }
+
+export function recordModelRoutingMetrics(
+  config: Config,
+  event: ModelRoutingEvent,
+): void {
+  if (
+    !modelRoutingLatencyHistogram ||
+    !modelRoutingFailureCounter ||
+    !isMetricsInitialized
+  )
+    return;
+
+  modelRoutingLatencyHistogram.record(event.routing_latency_ms, {
+    ...getCommonAttributes(config),
+    'routing.decision_model': event.decision_model,
+    'routing.decision_source': event.decision_source,
+  });
+
+  if (event.failed) {
+    modelRoutingFailureCounter.add(1, {
+      ...getCommonAttributes(config),
+      'routing.decision_source': event.decision_source,
+      'routing.error_message': event.error_message,
+    });
+  }
+}
@@ -526,6 +526,35 @@ export class ContentRetryFailureEvent implements BaseTelemetryEvent {
  }
 }

+export class ModelRoutingEvent implements BaseTelemetryEvent {
+  'event.name': 'model_routing';
+  'event.timestamp': string;
+  decision_model: string;
+  decision_source: string;
+  routing_latency_ms: number;
+  reasoning?: string;
+  failed: boolean;
+  error_message?: string;
+
+  constructor(
+    decision_model: string,
+    decision_source: string,
+    routing_latency_ms: number,
+    reasoning: string | undefined,
+    failed: boolean,
+    error_message: string | undefined,
+  ) {
+    this['event.name'] = 'model_routing';
+    this['event.timestamp'] = new Date().toISOString();
+    this.decision_model = decision_model;
+    this.decision_source = decision_source;
+    this.routing_latency_ms = routing_latency_ms;
+    this.reasoning = reasoning;
+    this.failed = failed;
+    this.error_message = error_message;
+  }
+}
+
 export type TelemetryEvent =
  | StartSessionEvent
  | EndSessionEvent
@@ -549,6 +578,7 @@ export type TelemetryEvent =
  | ContentRetryFailureEvent
  | ExtensionInstallEvent
  | ExtensionUninstallEvent
+  | ModelRoutingEvent
  | ToolOutputTruncatedEvent;

 export class ExtensionInstallEvent implements BaseTelemetryEvent {