feat(routing): Initialize model routing architecture (#8153)

This commit is contained in:
Abhi
2025-09-11 13:38:50 -04:00
committed by GitHub
parent 5504f933e1
commit 538e6cd19a
28 changed files with 1263 additions and 67 deletions
+1 -5
View File
@@ -206,9 +206,7 @@ describe('Server Config (config.ts)', () => {
it('should refresh auth and update config', async () => {
const config = new Config(baseParams);
const authType = AuthType.USE_GEMINI;
const newModel = 'gemini-flash';
const mockContentConfig = {
model: newModel,
apiKey: 'test-key',
};
@@ -226,10 +224,8 @@ describe('Server Config (config.ts)', () => {
config,
authType,
);
// Verify that contentGeneratorConfig is updated with the new model
// Verify that contentGeneratorConfig is updated
expect(config.getContentGeneratorConfig()).toEqual(mockContentConfig);
expect(config.getContentGeneratorConfig().model).toBe(newModel);
expect(config.getModel()).toBe(newModel); // getModel() should return the updated model
expect(GeminiClient).toHaveBeenCalledWith(config);
// Verify that fallback mode is reset
expect(config.isInFallbackMode()).toBe(false);
+16 -5
View File
@@ -44,6 +44,7 @@ import { StartSessionEvent } from '../telemetry/index.js';
import {
DEFAULT_GEMINI_EMBEDDING_MODEL,
DEFAULT_GEMINI_FLASH_MODEL,
DEFAULT_GEMINI_MODEL,
} from './models.js';
import { shouldAttemptBrowserLaunch } from '../utils/browser.js';
import type { MCPOAuthConfig } from '../mcp/oauth-provider.js';
@@ -62,6 +63,7 @@ import {
RipgrepFallbackEvent,
} from '../telemetry/types.js';
import type { FallbackModelHandler } from '../fallback/types.js';
import { ModelRouterService } from '../routing/modelRouterService.js';
import { OutputFormat } from '../output/types.js';
// Re-export OAuth config type
@@ -270,6 +272,7 @@ export class Config {
private readonly usageStatisticsEnabled: boolean;
private geminiClient!: GeminiClient;
private baseLlmClient!: BaseLlmClient;
private modelRouterService: ModelRouterService;
private readonly fileFiltering: {
respectGitIgnore: boolean;
respectGeminiIgnore: boolean;
@@ -282,7 +285,7 @@ export class Config {
private readonly proxy: string | undefined;
private readonly cwd: string;
private readonly bugCommand: BugCommandSettings | undefined;
private readonly model: string;
private model: string;
private readonly extensionContextFilePaths: string[];
private readonly noBrowser: boolean;
private readonly folderTrustFeature: boolean;
@@ -372,7 +375,7 @@ export class Config {
this.cwd = params.cwd ?? process.cwd();
this.fileDiscoveryService = params.fileDiscoveryService ?? null;
this.bugCommand = params.bugCommand;
this.model = params.model;
this.model = params.model || DEFAULT_GEMINI_MODEL;
this.extensionContextFilePaths = params.extensionContextFilePaths ?? [];
this.maxSessionTurns = params.maxSessionTurns ?? -1;
this.experimentalZedIntegration =
@@ -424,6 +427,7 @@ export class Config {
setGlobalDispatcher(new ProxyAgent(this.getProxy() as string));
}
this.geminiClient = new GeminiClient(this);
this.modelRouterService = new ModelRouterService(this);
}
/**
@@ -523,13 +527,16 @@ export class Config {
}
getModel(): string {
return this.contentGeneratorConfig?.model || this.model;
return this.model;
}
setModel(newModel: string): void {
if (this.contentGeneratorConfig) {
this.contentGeneratorConfig.model = newModel;
// Do not allow Pro usage if the user is in fallback mode.
if (newModel.includes('pro') && this.isInFallbackMode()) {
return;
}
this.model = newModel;
}
isInFallbackMode(): boolean {
@@ -699,6 +706,10 @@ export class Config {
return this.geminiClient;
}
getModelRouterService(): ModelRouterService {
return this.modelRouterService;
}
getEnableRecursiveFileSearch(): boolean {
return this.fileFiltering.enableRecursiveFileSearch;
}
+83
View File
@@ -0,0 +1,83 @@
/**
* @license
* Copyright 2025 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import { describe, it, expect } from 'vitest';
import {
getEffectiveModel,
DEFAULT_GEMINI_MODEL,
DEFAULT_GEMINI_FLASH_MODEL,
DEFAULT_GEMINI_FLASH_LITE_MODEL,
} from './models.js';
describe('getEffectiveModel', () => {
describe('When NOT in fallback mode', () => {
const isInFallbackMode = false;
it('should return the Pro model when Pro is requested', () => {
const model = getEffectiveModel(isInFallbackMode, DEFAULT_GEMINI_MODEL);
expect(model).toBe(DEFAULT_GEMINI_MODEL);
});
it('should return the Flash model when Flash is requested', () => {
const model = getEffectiveModel(
isInFallbackMode,
DEFAULT_GEMINI_FLASH_MODEL,
);
expect(model).toBe(DEFAULT_GEMINI_FLASH_MODEL);
});
it('should return the Lite model when Lite is requested', () => {
const model = getEffectiveModel(
isInFallbackMode,
DEFAULT_GEMINI_FLASH_LITE_MODEL,
);
expect(model).toBe(DEFAULT_GEMINI_FLASH_LITE_MODEL);
});
it('should return a custom model name when requested', () => {
const customModel = 'custom-model-v1';
const model = getEffectiveModel(isInFallbackMode, customModel);
expect(model).toBe(customModel);
});
});
describe('When IN fallback mode', () => {
const isInFallbackMode = true;
it('should downgrade the Pro model to the Flash model', () => {
const model = getEffectiveModel(isInFallbackMode, DEFAULT_GEMINI_MODEL);
expect(model).toBe(DEFAULT_GEMINI_FLASH_MODEL);
});
it('should return the Flash model when Flash is requested', () => {
const model = getEffectiveModel(
isInFallbackMode,
DEFAULT_GEMINI_FLASH_MODEL,
);
expect(model).toBe(DEFAULT_GEMINI_FLASH_MODEL);
});
it('should HONOR the Lite model when Lite is requested', () => {
const model = getEffectiveModel(
isInFallbackMode,
DEFAULT_GEMINI_FLASH_LITE_MODEL,
);
expect(model).toBe(DEFAULT_GEMINI_FLASH_LITE_MODEL);
});
it('should HONOR any model with "lite" in its name', () => {
const customLiteModel = 'gemini-2.5-custom-lite-vNext';
const model = getEffectiveModel(isInFallbackMode, customLiteModel);
expect(model).toBe(customLiteModel);
});
it('should downgrade any other custom model to the Flash model', () => {
const customModel = 'custom-model-v1-unlisted';
const model = getEffectiveModel(isInFallbackMode, customModel);
expect(model).toBe(DEFAULT_GEMINI_FLASH_MODEL);
});
});
});
+32
View File
@@ -12,3 +12,35 @@ export const DEFAULT_GEMINI_EMBEDDING_MODEL = 'gemini-embedding-001';
// Some thinking models do not default to dynamic thinking which is done by a value of -1
export const DEFAULT_THINKING_MODE = -1;
/**
* Determines the effective model to use, applying fallback logic if necessary.
*
* When fallback mode is active, this function enforces the use of the standard
* fallback model. However, it makes an exception for "lite" models (any model
* with "lite" in its name), allowing them to be used to preserve cost savings.
* This ensures that "pro" models are always downgraded, while "lite" model
* requests are honored.
*
* @param isInFallbackMode Whether the application is in fallback mode.
* @param requestedModel The model that was originally requested.
* @returns The effective model name.
*/
export function getEffectiveModel(
isInFallbackMode: boolean,
requestedModel: string,
): string {
// If we are not in fallback mode, simply use the requested model.
if (!isInFallbackMode) {
return requestedModel;
}
// If a "lite" model is requested, honor it. This allows for variations of
// lite models without needing to list them all as constants.
if (requestedModel.includes('lite')) {
return requestedModel;
}
// Default fallback for Gemini CLI.
return DEFAULT_GEMINI_FLASH_MODEL;
}