mirror of
https://github.com/google-gemini/gemini-cli.git
synced 2026-04-29 06:25:16 -07:00
feat(routing): Initialize model routing architecture (#8153)
This commit is contained in:
@@ -206,9 +206,7 @@ describe('Server Config (config.ts)', () => {
|
||||
it('should refresh auth and update config', async () => {
|
||||
const config = new Config(baseParams);
|
||||
const authType = AuthType.USE_GEMINI;
|
||||
const newModel = 'gemini-flash';
|
||||
const mockContentConfig = {
|
||||
model: newModel,
|
||||
apiKey: 'test-key',
|
||||
};
|
||||
|
||||
@@ -226,10 +224,8 @@ describe('Server Config (config.ts)', () => {
|
||||
config,
|
||||
authType,
|
||||
);
|
||||
// Verify that contentGeneratorConfig is updated with the new model
|
||||
// Verify that contentGeneratorConfig is updated
|
||||
expect(config.getContentGeneratorConfig()).toEqual(mockContentConfig);
|
||||
expect(config.getContentGeneratorConfig().model).toBe(newModel);
|
||||
expect(config.getModel()).toBe(newModel); // getModel() should return the updated model
|
||||
expect(GeminiClient).toHaveBeenCalledWith(config);
|
||||
// Verify that fallback mode is reset
|
||||
expect(config.isInFallbackMode()).toBe(false);
|
||||
|
||||
@@ -44,6 +44,7 @@ import { StartSessionEvent } from '../telemetry/index.js';
|
||||
import {
|
||||
DEFAULT_GEMINI_EMBEDDING_MODEL,
|
||||
DEFAULT_GEMINI_FLASH_MODEL,
|
||||
DEFAULT_GEMINI_MODEL,
|
||||
} from './models.js';
|
||||
import { shouldAttemptBrowserLaunch } from '../utils/browser.js';
|
||||
import type { MCPOAuthConfig } from '../mcp/oauth-provider.js';
|
||||
@@ -62,6 +63,7 @@ import {
|
||||
RipgrepFallbackEvent,
|
||||
} from '../telemetry/types.js';
|
||||
import type { FallbackModelHandler } from '../fallback/types.js';
|
||||
import { ModelRouterService } from '../routing/modelRouterService.js';
|
||||
import { OutputFormat } from '../output/types.js';
|
||||
|
||||
// Re-export OAuth config type
|
||||
@@ -270,6 +272,7 @@ export class Config {
|
||||
private readonly usageStatisticsEnabled: boolean;
|
||||
private geminiClient!: GeminiClient;
|
||||
private baseLlmClient!: BaseLlmClient;
|
||||
private modelRouterService: ModelRouterService;
|
||||
private readonly fileFiltering: {
|
||||
respectGitIgnore: boolean;
|
||||
respectGeminiIgnore: boolean;
|
||||
@@ -282,7 +285,7 @@ export class Config {
|
||||
private readonly proxy: string | undefined;
|
||||
private readonly cwd: string;
|
||||
private readonly bugCommand: BugCommandSettings | undefined;
|
||||
private readonly model: string;
|
||||
private model: string;
|
||||
private readonly extensionContextFilePaths: string[];
|
||||
private readonly noBrowser: boolean;
|
||||
private readonly folderTrustFeature: boolean;
|
||||
@@ -372,7 +375,7 @@ export class Config {
|
||||
this.cwd = params.cwd ?? process.cwd();
|
||||
this.fileDiscoveryService = params.fileDiscoveryService ?? null;
|
||||
this.bugCommand = params.bugCommand;
|
||||
this.model = params.model;
|
||||
this.model = params.model || DEFAULT_GEMINI_MODEL;
|
||||
this.extensionContextFilePaths = params.extensionContextFilePaths ?? [];
|
||||
this.maxSessionTurns = params.maxSessionTurns ?? -1;
|
||||
this.experimentalZedIntegration =
|
||||
@@ -424,6 +427,7 @@ export class Config {
|
||||
setGlobalDispatcher(new ProxyAgent(this.getProxy() as string));
|
||||
}
|
||||
this.geminiClient = new GeminiClient(this);
|
||||
this.modelRouterService = new ModelRouterService(this);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -523,13 +527,16 @@ export class Config {
|
||||
}
|
||||
|
||||
getModel(): string {
|
||||
return this.contentGeneratorConfig?.model || this.model;
|
||||
return this.model;
|
||||
}
|
||||
|
||||
setModel(newModel: string): void {
|
||||
if (this.contentGeneratorConfig) {
|
||||
this.contentGeneratorConfig.model = newModel;
|
||||
// Do not allow Pro usage if the user is in fallback mode.
|
||||
if (newModel.includes('pro') && this.isInFallbackMode()) {
|
||||
return;
|
||||
}
|
||||
|
||||
this.model = newModel;
|
||||
}
|
||||
|
||||
isInFallbackMode(): boolean {
|
||||
@@ -699,6 +706,10 @@ export class Config {
|
||||
return this.geminiClient;
|
||||
}
|
||||
|
||||
getModelRouterService(): ModelRouterService {
|
||||
return this.modelRouterService;
|
||||
}
|
||||
|
||||
getEnableRecursiveFileSearch(): boolean {
|
||||
return this.fileFiltering.enableRecursiveFileSearch;
|
||||
}
|
||||
|
||||
@@ -0,0 +1,83 @@
|
||||
/**
|
||||
* @license
|
||||
* Copyright 2025 Google LLC
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
import { describe, it, expect } from 'vitest';
|
||||
import {
|
||||
getEffectiveModel,
|
||||
DEFAULT_GEMINI_MODEL,
|
||||
DEFAULT_GEMINI_FLASH_MODEL,
|
||||
DEFAULT_GEMINI_FLASH_LITE_MODEL,
|
||||
} from './models.js';
|
||||
|
||||
describe('getEffectiveModel', () => {
|
||||
describe('When NOT in fallback mode', () => {
|
||||
const isInFallbackMode = false;
|
||||
|
||||
it('should return the Pro model when Pro is requested', () => {
|
||||
const model = getEffectiveModel(isInFallbackMode, DEFAULT_GEMINI_MODEL);
|
||||
expect(model).toBe(DEFAULT_GEMINI_MODEL);
|
||||
});
|
||||
|
||||
it('should return the Flash model when Flash is requested', () => {
|
||||
const model = getEffectiveModel(
|
||||
isInFallbackMode,
|
||||
DEFAULT_GEMINI_FLASH_MODEL,
|
||||
);
|
||||
expect(model).toBe(DEFAULT_GEMINI_FLASH_MODEL);
|
||||
});
|
||||
|
||||
it('should return the Lite model when Lite is requested', () => {
|
||||
const model = getEffectiveModel(
|
||||
isInFallbackMode,
|
||||
DEFAULT_GEMINI_FLASH_LITE_MODEL,
|
||||
);
|
||||
expect(model).toBe(DEFAULT_GEMINI_FLASH_LITE_MODEL);
|
||||
});
|
||||
|
||||
it('should return a custom model name when requested', () => {
|
||||
const customModel = 'custom-model-v1';
|
||||
const model = getEffectiveModel(isInFallbackMode, customModel);
|
||||
expect(model).toBe(customModel);
|
||||
});
|
||||
});
|
||||
|
||||
describe('When IN fallback mode', () => {
|
||||
const isInFallbackMode = true;
|
||||
|
||||
it('should downgrade the Pro model to the Flash model', () => {
|
||||
const model = getEffectiveModel(isInFallbackMode, DEFAULT_GEMINI_MODEL);
|
||||
expect(model).toBe(DEFAULT_GEMINI_FLASH_MODEL);
|
||||
});
|
||||
|
||||
it('should return the Flash model when Flash is requested', () => {
|
||||
const model = getEffectiveModel(
|
||||
isInFallbackMode,
|
||||
DEFAULT_GEMINI_FLASH_MODEL,
|
||||
);
|
||||
expect(model).toBe(DEFAULT_GEMINI_FLASH_MODEL);
|
||||
});
|
||||
|
||||
it('should HONOR the Lite model when Lite is requested', () => {
|
||||
const model = getEffectiveModel(
|
||||
isInFallbackMode,
|
||||
DEFAULT_GEMINI_FLASH_LITE_MODEL,
|
||||
);
|
||||
expect(model).toBe(DEFAULT_GEMINI_FLASH_LITE_MODEL);
|
||||
});
|
||||
|
||||
it('should HONOR any model with "lite" in its name', () => {
|
||||
const customLiteModel = 'gemini-2.5-custom-lite-vNext';
|
||||
const model = getEffectiveModel(isInFallbackMode, customLiteModel);
|
||||
expect(model).toBe(customLiteModel);
|
||||
});
|
||||
|
||||
it('should downgrade any other custom model to the Flash model', () => {
|
||||
const customModel = 'custom-model-v1-unlisted';
|
||||
const model = getEffectiveModel(isInFallbackMode, customModel);
|
||||
expect(model).toBe(DEFAULT_GEMINI_FLASH_MODEL);
|
||||
});
|
||||
});
|
||||
});
|
||||
@@ -12,3 +12,35 @@ export const DEFAULT_GEMINI_EMBEDDING_MODEL = 'gemini-embedding-001';
|
||||
|
||||
// Some thinking models do not default to dynamic thinking which is done by a value of -1
|
||||
export const DEFAULT_THINKING_MODE = -1;
|
||||
|
||||
/**
|
||||
* Determines the effective model to use, applying fallback logic if necessary.
|
||||
*
|
||||
* When fallback mode is active, this function enforces the use of the standard
|
||||
* fallback model. However, it makes an exception for "lite" models (any model
|
||||
* with "lite" in its name), allowing them to be used to preserve cost savings.
|
||||
* This ensures that "pro" models are always downgraded, while "lite" model
|
||||
* requests are honored.
|
||||
*
|
||||
* @param isInFallbackMode Whether the application is in fallback mode.
|
||||
* @param requestedModel The model that was originally requested.
|
||||
* @returns The effective model name.
|
||||
*/
|
||||
export function getEffectiveModel(
|
||||
isInFallbackMode: boolean,
|
||||
requestedModel: string,
|
||||
): string {
|
||||
// If we are not in fallback mode, simply use the requested model.
|
||||
if (!isInFallbackMode) {
|
||||
return requestedModel;
|
||||
}
|
||||
|
||||
// If a "lite" model is requested, honor it. This allows for variations of
|
||||
// lite models without needing to list them all as constants.
|
||||
if (requestedModel.includes('lite')) {
|
||||
return requestedModel;
|
||||
}
|
||||
|
||||
// Default fallback for Gemini CLI.
|
||||
return DEFAULT_GEMINI_FLASH_MODEL;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user