feat(routing): Initialize model routing architecture (#8153)

2026-04-29 14:34:55 -07:00 · 2025-09-11 13:38:50 -04:00
parent 5504f933e1
commit 538e6cd19a
28 changed files with 1263 additions and 67 deletions
@@ -206,9 +206,7 @@ describe('Server Config (config.ts)', () => {
    it('should refresh auth and update config', async () => {
      const config = new Config(baseParams);
      const authType = AuthType.USE_GEMINI;
-      const newModel = 'gemini-flash';
      const mockContentConfig = {
-        model: newModel,
        apiKey: 'test-key',
      };

@@ -226,10 +224,8 @@ describe('Server Config (config.ts)', () => {
        config,
        authType,
      );
-      // Verify that contentGeneratorConfig is updated with the new model
+      // Verify that contentGeneratorConfig is updated
      expect(config.getContentGeneratorConfig()).toEqual(mockContentConfig);
-      expect(config.getContentGeneratorConfig().model).toBe(newModel);
-      expect(config.getModel()).toBe(newModel); // getModel() should return the updated model
      expect(GeminiClient).toHaveBeenCalledWith(config);
      // Verify that fallback mode is reset
      expect(config.isInFallbackMode()).toBe(false);
@@ -44,6 +44,7 @@ import { StartSessionEvent } from '../telemetry/index.js';
 import {
  DEFAULT_GEMINI_EMBEDDING_MODEL,
  DEFAULT_GEMINI_FLASH_MODEL,
+  DEFAULT_GEMINI_MODEL,
 } from './models.js';
 import { shouldAttemptBrowserLaunch } from '../utils/browser.js';
 import type { MCPOAuthConfig } from '../mcp/oauth-provider.js';
@@ -62,6 +63,7 @@ import {
  RipgrepFallbackEvent,
 } from '../telemetry/types.js';
 import type { FallbackModelHandler } from '../fallback/types.js';
+import { ModelRouterService } from '../routing/modelRouterService.js';
 import { OutputFormat } from '../output/types.js';

 // Re-export OAuth config type
@@ -270,6 +272,7 @@ export class Config {
  private readonly usageStatisticsEnabled: boolean;
  private geminiClient!: GeminiClient;
  private baseLlmClient!: BaseLlmClient;
+  private modelRouterService: ModelRouterService;
  private readonly fileFiltering: {
    respectGitIgnore: boolean;
    respectGeminiIgnore: boolean;
@@ -282,7 +285,7 @@ export class Config {
  private readonly proxy: string | undefined;
  private readonly cwd: string;
  private readonly bugCommand: BugCommandSettings | undefined;
-  private readonly model: string;
+  private model: string;
  private readonly extensionContextFilePaths: string[];
  private readonly noBrowser: boolean;
  private readonly folderTrustFeature: boolean;
@@ -372,7 +375,7 @@ export class Config {
    this.cwd = params.cwd ?? process.cwd();
    this.fileDiscoveryService = params.fileDiscoveryService ?? null;
    this.bugCommand = params.bugCommand;
-    this.model = params.model;
+    this.model = params.model || DEFAULT_GEMINI_MODEL;
    this.extensionContextFilePaths = params.extensionContextFilePaths ?? [];
    this.maxSessionTurns = params.maxSessionTurns ?? -1;
    this.experimentalZedIntegration =
@@ -424,6 +427,7 @@ export class Config {
      setGlobalDispatcher(new ProxyAgent(this.getProxy() as string));
    }
    this.geminiClient = new GeminiClient(this);
+    this.modelRouterService = new ModelRouterService(this);
  }

  /**
@@ -523,13 +527,16 @@ export class Config {
  }

  getModel(): string {
-    return this.contentGeneratorConfig?.model || this.model;
+    return this.model;
  }

  setModel(newModel: string): void {
-    if (this.contentGeneratorConfig) {
-      this.contentGeneratorConfig.model = newModel;
+    // Do not allow Pro usage if the user is in fallback mode.
+    if (newModel.includes('pro') && this.isInFallbackMode()) {
+      return;
    }
+
+    this.model = newModel;
  }

  isInFallbackMode(): boolean {
@@ -699,6 +706,10 @@ export class Config {
    return this.geminiClient;
  }

+  getModelRouterService(): ModelRouterService {
+    return this.modelRouterService;
+  }
+
  getEnableRecursiveFileSearch(): boolean {
    return this.fileFiltering.enableRecursiveFileSearch;
  }
@@ -0,0 +1,83 @@
+/**
+ * @license
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import { describe, it, expect } from 'vitest';
+import {
+  getEffectiveModel,
+  DEFAULT_GEMINI_MODEL,
+  DEFAULT_GEMINI_FLASH_MODEL,
+  DEFAULT_GEMINI_FLASH_LITE_MODEL,
+} from './models.js';
+
+describe('getEffectiveModel', () => {
+  describe('When NOT in fallback mode', () => {
+    const isInFallbackMode = false;
+
+    it('should return the Pro model when Pro is requested', () => {
+      const model = getEffectiveModel(isInFallbackMode, DEFAULT_GEMINI_MODEL);
+      expect(model).toBe(DEFAULT_GEMINI_MODEL);
+    });
+
+    it('should return the Flash model when Flash is requested', () => {
+      const model = getEffectiveModel(
+        isInFallbackMode,
+        DEFAULT_GEMINI_FLASH_MODEL,
+      );
+      expect(model).toBe(DEFAULT_GEMINI_FLASH_MODEL);
+    });
+
+    it('should return the Lite model when Lite is requested', () => {
+      const model = getEffectiveModel(
+        isInFallbackMode,
+        DEFAULT_GEMINI_FLASH_LITE_MODEL,
+      );
+      expect(model).toBe(DEFAULT_GEMINI_FLASH_LITE_MODEL);
+    });
+
+    it('should return a custom model name when requested', () => {
+      const customModel = 'custom-model-v1';
+      const model = getEffectiveModel(isInFallbackMode, customModel);
+      expect(model).toBe(customModel);
+    });
+  });
+
+  describe('When IN fallback mode', () => {
+    const isInFallbackMode = true;
+
+    it('should downgrade the Pro model to the Flash model', () => {
+      const model = getEffectiveModel(isInFallbackMode, DEFAULT_GEMINI_MODEL);
+      expect(model).toBe(DEFAULT_GEMINI_FLASH_MODEL);
+    });
+
+    it('should return the Flash model when Flash is requested', () => {
+      const model = getEffectiveModel(
+        isInFallbackMode,
+        DEFAULT_GEMINI_FLASH_MODEL,
+      );
+      expect(model).toBe(DEFAULT_GEMINI_FLASH_MODEL);
+    });
+
+    it('should HONOR the Lite model when Lite is requested', () => {
+      const model = getEffectiveModel(
+        isInFallbackMode,
+        DEFAULT_GEMINI_FLASH_LITE_MODEL,
+      );
+      expect(model).toBe(DEFAULT_GEMINI_FLASH_LITE_MODEL);
+    });
+
+    it('should HONOR any model with "lite" in its name', () => {
+      const customLiteModel = 'gemini-2.5-custom-lite-vNext';
+      const model = getEffectiveModel(isInFallbackMode, customLiteModel);
+      expect(model).toBe(customLiteModel);
+    });
+
+    it('should downgrade any other custom model to the Flash model', () => {
+      const customModel = 'custom-model-v1-unlisted';
+      const model = getEffectiveModel(isInFallbackMode, customModel);
+      expect(model).toBe(DEFAULT_GEMINI_FLASH_MODEL);
+    });
+  });
+});
@@ -12,3 +12,35 @@ export const DEFAULT_GEMINI_EMBEDDING_MODEL = 'gemini-embedding-001';

 // Some thinking models do not default to dynamic thinking which is done by a value of -1
 export const DEFAULT_THINKING_MODE = -1;
+
+/**
+ * Determines the effective model to use, applying fallback logic if necessary.
+ *
+ * When fallback mode is active, this function enforces the use of the standard
+ * fallback model. However, it makes an exception for "lite" models (any model
+ * with "lite" in its name), allowing them to be used to preserve cost savings.
+ * This ensures that "pro" models are always downgraded, while "lite" model
+ * requests are honored.
+ *
+ * @param isInFallbackMode Whether the application is in fallback mode.
+ * @param requestedModel The model that was originally requested.
+ * @returns The effective model name.
+ */
+export function getEffectiveModel(
+  isInFallbackMode: boolean,
+  requestedModel: string,
+): string {
+  // If we are not in fallback mode, simply use the requested model.
+  if (!isInFallbackMode) {
+    return requestedModel;
+  }
+
+  // If a "lite" model is requested, honor it. This allows for variations of
+  // lite models without needing to list them all as constants.
+  if (requestedModel.includes('lite')) {
+    return requestedModel;
+  }
+
+  // Default fallback for Gemini CLI.
+  return DEFAULT_GEMINI_FLASH_MODEL;
+}