mirror of
https://github.com/google-gemini/gemini-cli.git
synced 2026-05-13 05:12:55 -07:00
This commit is contained in:
@@ -168,4 +168,46 @@ describe('ModelAvailabilityService', () => {
|
||||
reason: 'quota',
|
||||
});
|
||||
});
|
||||
|
||||
describe('prefix normalization', () => {
|
||||
it('treats prefixed and non-prefixed models as identical when marking terminal', () => {
|
||||
service.markTerminal('models/gemini-3.1-pro-preview', 'quota');
|
||||
|
||||
// Checking the non-prefixed version should show it as unavailable
|
||||
expect(service.snapshot('gemini-3.1-pro-preview')).toEqual({
|
||||
available: false,
|
||||
reason: 'quota',
|
||||
});
|
||||
|
||||
// Checking the prefixed version should also show it as unavailable
|
||||
expect(service.snapshot('models/gemini-3.1-pro-preview')).toEqual({
|
||||
available: false,
|
||||
reason: 'quota',
|
||||
});
|
||||
});
|
||||
|
||||
it('treats prefixed and non-prefixed models as identical when selecting', () => {
|
||||
service.markTerminal('gemini-3-flash-preview', 'quota');
|
||||
|
||||
// Attempting to select the prefixed version should skip it because the base is exhausted
|
||||
const result = service.selectFirstAvailable([
|
||||
'models/gemini-3-flash-preview',
|
||||
'gemini-3.1-pro-preview',
|
||||
]);
|
||||
|
||||
expect(result.selectedModel).toBe('gemini-3.1-pro-preview');
|
||||
expect(result.skipped).toEqual([
|
||||
{ model: 'gemini-3-flash-preview', reason: 'quota' },
|
||||
]);
|
||||
});
|
||||
|
||||
it('treats prefixed and non-prefixed models as identical when marking healthy', () => {
|
||||
service.markTerminal('gemini-3-flash-preview', 'quota');
|
||||
service.markHealthy('models/gemini-3-flash-preview');
|
||||
|
||||
expect(service.snapshot('gemini-3-flash-preview')).toEqual({
|
||||
available: true,
|
||||
});
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
@@ -39,21 +39,26 @@ export interface ModelSelectionResult {
|
||||
}>;
|
||||
}
|
||||
|
||||
import { normalizeModelId } from '../utils/modelUtils.js';
|
||||
|
||||
export class ModelAvailabilityService {
|
||||
private readonly health = new Map<ModelId, HealthState>();
|
||||
|
||||
markTerminal(model: ModelId, reason: TerminalUnavailabilityReason) {
|
||||
markTerminal(modelId: ModelId, reason: TerminalUnavailabilityReason) {
|
||||
const model = normalizeModelId(modelId);
|
||||
this.setState(model, {
|
||||
status: 'terminal',
|
||||
reason,
|
||||
});
|
||||
}
|
||||
|
||||
markHealthy(model: ModelId) {
|
||||
markHealthy(modelId: ModelId) {
|
||||
const model = normalizeModelId(modelId);
|
||||
this.clearState(model);
|
||||
}
|
||||
|
||||
markRetryOncePerTurn(model: ModelId, attempts: number = 1) {
|
||||
markRetryOncePerTurn(modelId: ModelId, attempts: number = 1) {
|
||||
const model = normalizeModelId(modelId);
|
||||
const currentState = this.health.get(model);
|
||||
// Do not override a terminal failure with a transient one.
|
||||
if (currentState?.status === 'terminal') {
|
||||
@@ -75,14 +80,16 @@ export class ModelAvailabilityService {
|
||||
});
|
||||
}
|
||||
|
||||
consumeStickyAttempt(model: ModelId) {
|
||||
consumeStickyAttempt(modelId: ModelId) {
|
||||
const model = normalizeModelId(modelId);
|
||||
const state = this.health.get(model);
|
||||
if (state?.status === 'sticky_retry') {
|
||||
this.setState(model, { ...state, consumed: true });
|
||||
}
|
||||
}
|
||||
|
||||
snapshot(model: ModelId): ModelAvailabilitySnapshot {
|
||||
snapshot(modelId: ModelId): ModelAvailabilitySnapshot {
|
||||
const model = normalizeModelId(modelId);
|
||||
const state = this.health.get(model);
|
||||
|
||||
if (!state) {
|
||||
@@ -100,10 +107,11 @@ export class ModelAvailabilityService {
|
||||
return { available: true };
|
||||
}
|
||||
|
||||
selectFirstAvailable(models: ModelId[]): ModelSelectionResult {
|
||||
selectFirstAvailable(modelIds: ModelId[]): ModelSelectionResult {
|
||||
const skipped: ModelSelectionResult['skipped'] = [];
|
||||
|
||||
for (const model of models) {
|
||||
for (const modelId of modelIds) {
|
||||
const model = normalizeModelId(modelId);
|
||||
const snapshot = this.snapshot(model);
|
||||
if (snapshot.available) {
|
||||
const state = this.health.get(model);
|
||||
|
||||
@@ -96,10 +96,11 @@ describe('policyHelpers', () => {
|
||||
|
||||
it('starts chain from preferredModel when model is "auto"', () => {
|
||||
const config = createMockConfig({
|
||||
getModel: () => DEFAULT_GEMINI_MODEL_AUTO,
|
||||
getModel: () => 'auto',
|
||||
});
|
||||
const chain = resolvePolicyChain(config, 'gemini-2.5-flash');
|
||||
expect(chain).toHaveLength(1);
|
||||
// Due to Gemini 2.x wrapsAround, the chain will contain both flash and pro
|
||||
expect(chain.length).toBeGreaterThanOrEqual(1);
|
||||
expect(chain[0]?.model).toBe('gemini-2.5-flash');
|
||||
});
|
||||
|
||||
|
||||
@@ -28,6 +28,7 @@ import {
|
||||
isGemini3Model,
|
||||
resolveModel,
|
||||
} from '../config/models.js';
|
||||
import { normalizeModelId } from '../utils/modelUtils.js';
|
||||
import type { ModelSelectionResult } from './modelAvailabilityService.js';
|
||||
import type { ModelConfigKey } from '../services/modelConfigService.js';
|
||||
import { ApprovalMode } from '../policy/types.js';
|
||||
@@ -41,9 +42,13 @@ export function resolvePolicyChain(
|
||||
preferredModel?: string,
|
||||
wrapsAround: boolean = false,
|
||||
): ModelPolicyChain {
|
||||
const modelFromConfig =
|
||||
preferredModel ?? config.getActiveModel?.() ?? config.getModel();
|
||||
const configuredModel = config.getModel();
|
||||
const normalizedPreferredModel = preferredModel
|
||||
? normalizeModelId(preferredModel)
|
||||
: undefined;
|
||||
const modelFromConfig = normalizeModelId(
|
||||
normalizedPreferredModel ?? config.getActiveModel?.() ?? config.getModel(),
|
||||
);
|
||||
const configuredModel = normalizeModelId(config.getModel());
|
||||
|
||||
let chain: ModelPolicyChain | undefined;
|
||||
const useGemini31 = config.getGemini31LaunchedSync?.() ?? false;
|
||||
@@ -52,19 +57,29 @@ export function resolvePolicyChain(
|
||||
const useCustomToolModel = config.getUseCustomToolModelSync?.() ?? false;
|
||||
const hasAccessToPreview = config.getHasAccessToPreviewModel?.() ?? true;
|
||||
|
||||
const resolvedModel = resolveModel(
|
||||
modelFromConfig,
|
||||
useGemini31,
|
||||
useGemini31FlashLite,
|
||||
useCustomToolModel,
|
||||
hasAccessToPreview,
|
||||
config,
|
||||
const resolvedModel = normalizeModelId(
|
||||
resolveModel(
|
||||
modelFromConfig,
|
||||
useGemini31,
|
||||
useGemini31FlashLite,
|
||||
useCustomToolModel,
|
||||
hasAccessToPreview,
|
||||
config,
|
||||
),
|
||||
);
|
||||
const isAutoPreferred = preferredModel
|
||||
? isAutoModel(preferredModel, config)
|
||||
const isAutoPreferred = normalizedPreferredModel
|
||||
? isAutoModel(normalizedPreferredModel, config)
|
||||
: false;
|
||||
const isAutoConfigured = isAutoModel(configuredModel, config);
|
||||
|
||||
// We always wrap around for Gemini 3 chains to ensure maximum availability
|
||||
// between models in the same family (e.g. fallback to Pro if Flash is exhausted).
|
||||
const effectiveWrapsAround =
|
||||
wrapsAround ||
|
||||
isAutoPreferred ||
|
||||
isAutoConfigured ||
|
||||
isGemini3Model(resolvedModel, config);
|
||||
|
||||
// --- DYNAMIC PATH ---
|
||||
if (config.getExperimentalDynamicModelConfiguration?.() === true) {
|
||||
const context = {
|
||||
@@ -76,7 +91,7 @@ export function resolvePolicyChain(
|
||||
if (resolvedModel === DEFAULT_GEMINI_FLASH_LITE_MODEL) {
|
||||
chain = config.modelConfigService.resolveChain('lite', context);
|
||||
} else if (
|
||||
isGemini3Model(resolvedModel, config) ||
|
||||
isGemini3Model(normalizeModelId(resolvedModel), config) ||
|
||||
isAutoPreferred ||
|
||||
isAutoConfigured
|
||||
) {
|
||||
@@ -96,7 +111,7 @@ export function resolvePolicyChain(
|
||||
const previewEnabled =
|
||||
hasAccessToPreview &&
|
||||
(isGemini3Model(resolvedModel, config) ||
|
||||
preferredModel === PREVIEW_GEMINI_MODEL_AUTO ||
|
||||
normalizedPreferredModel === PREVIEW_GEMINI_MODEL_AUTO ||
|
||||
configuredModel === PREVIEW_GEMINI_MODEL_AUTO);
|
||||
const autoPrefix = isAutoSelection ? 'auto-' : '';
|
||||
const chainKey = previewEnabled ? 'preview' : 'default';
|
||||
@@ -110,7 +125,7 @@ export function resolvePolicyChain(
|
||||
// No matching modelChains found, default to single model chain
|
||||
chain = createSingleModelChain(modelFromConfig);
|
||||
}
|
||||
chain = applyDynamicSlicing(chain, resolvedModel, wrapsAround);
|
||||
chain = applyDynamicSlicing(chain, resolvedModel, effectiveWrapsAround);
|
||||
} else {
|
||||
// --- LEGACY PATH ---
|
||||
|
||||
@@ -125,7 +140,7 @@ export function resolvePolicyChain(
|
||||
if (hasAccessToPreview) {
|
||||
const previewEnabled =
|
||||
isGemini3Model(resolvedModel, config) ||
|
||||
preferredModel === PREVIEW_GEMINI_MODEL_AUTO ||
|
||||
normalizedPreferredModel === PREVIEW_GEMINI_MODEL_AUTO ||
|
||||
configuredModel === PREVIEW_GEMINI_MODEL_AUTO;
|
||||
chain = getModelPolicyChain({
|
||||
previewEnabled,
|
||||
@@ -150,7 +165,7 @@ export function resolvePolicyChain(
|
||||
} else {
|
||||
chain = createSingleModelChain(modelFromConfig);
|
||||
}
|
||||
chain = applyDynamicSlicing(chain, resolvedModel, wrapsAround);
|
||||
chain = applyDynamicSlicing(chain, resolvedModel, effectiveWrapsAround);
|
||||
}
|
||||
// Apply Unified Silent Injection for Plan Mode with defensive checks
|
||||
if (config?.getApprovalMode?.() === ApprovalMode.PLAN) {
|
||||
@@ -171,8 +186,9 @@ function applyDynamicSlicing(
|
||||
resolvedModel: string,
|
||||
wrapsAround: boolean,
|
||||
): ModelPolicyChain {
|
||||
const normalizedResolved = normalizeModelId(resolvedModel);
|
||||
const activeIndex = chain.findIndex(
|
||||
(policy) => policy.model === resolvedModel,
|
||||
(policy) => normalizeModelId(policy.model) === normalizedResolved,
|
||||
);
|
||||
if (activeIndex !== -1) {
|
||||
return wrapsAround
|
||||
@@ -200,7 +216,10 @@ export function buildFallbackPolicyContext(
|
||||
failedPolicy?: ModelPolicy;
|
||||
candidates: ModelPolicy[];
|
||||
} {
|
||||
const index = chain.findIndex((policy) => policy.model === failedModel);
|
||||
const normalizedFailed = normalizeModelId(failedModel);
|
||||
const index = chain.findIndex(
|
||||
(policy) => normalizeModelId(policy.model) === normalizedFailed,
|
||||
);
|
||||
if (index === -1) {
|
||||
return { failedPolicy: undefined, candidates: chain };
|
||||
}
|
||||
|
||||
@@ -27,6 +27,7 @@ import type { Content } from '@google/genai';
|
||||
import type { ResolvedModelConfig } from '../../services/modelConfigService.js';
|
||||
import { debugLogger } from '../../utils/debugLogger.js';
|
||||
import { AuthType } from '../../core/contentGenerator.js';
|
||||
import { ModelAvailabilityService } from '../../availability/modelAvailabilityService.js';
|
||||
|
||||
vi.mock('../../core/baseLlmClient.js');
|
||||
|
||||
@@ -68,6 +69,9 @@ describe('ClassifierStrategy', () => {
|
||||
getContentGeneratorConfig: vi.fn().mockReturnValue({
|
||||
authType: AuthType.LOGIN_WITH_GOOGLE,
|
||||
}),
|
||||
getModelAvailabilityService: vi
|
||||
.fn()
|
||||
.mockReturnValue(new ModelAvailabilityService()),
|
||||
} as unknown as Config;
|
||||
mockBaseLlmClient = {
|
||||
generateJson: vi.fn(),
|
||||
|
||||
@@ -20,6 +20,7 @@ import {
|
||||
isFunctionResponse,
|
||||
} from '../../utils/messageInspectors.js';
|
||||
import { debugLogger } from '../../utils/debugLogger.js';
|
||||
import { normalizeModelId } from '../../utils/modelUtils.js';
|
||||
import type { LocalLiteRtLmClient } from '../../core/localLiteRtLmClient.js';
|
||||
import { LlmRole } from '../../telemetry/types.js';
|
||||
|
||||
@@ -177,16 +178,28 @@ export class ClassifierStrategy implements RoutingStrategy {
|
||||
config.getGemini31FlashLiteLaunched(),
|
||||
config.getUseCustomToolModel(),
|
||||
]);
|
||||
const selectedModel = resolveClassifierModel(
|
||||
model,
|
||||
routerResponse.model_choice,
|
||||
useGemini3_1,
|
||||
useGemini3_1FlashLite,
|
||||
useCustomToolModel,
|
||||
config.getHasAccessToPreviewModel?.() ?? true,
|
||||
config,
|
||||
const selectedModel = normalizeModelId(
|
||||
resolveClassifierModel(
|
||||
normalizeModelId(model),
|
||||
routerResponse.model_choice,
|
||||
useGemini3_1,
|
||||
useGemini3_1FlashLite,
|
||||
useCustomToolModel,
|
||||
config.getHasAccessToPreviewModel?.() ?? true,
|
||||
config,
|
||||
),
|
||||
);
|
||||
|
||||
const service = config.getModelAvailabilityService();
|
||||
const snapshot = service.snapshot(selectedModel);
|
||||
|
||||
if (!snapshot.available) {
|
||||
debugLogger.warn(
|
||||
`[Routing] Classifier selected unavailable model ${selectedModel} (${snapshot.reason}). Bypassing.`,
|
||||
);
|
||||
return null;
|
||||
}
|
||||
|
||||
return {
|
||||
model: selectedModel,
|
||||
metadata: {
|
||||
|
||||
@@ -27,6 +27,7 @@ import type { ResolvedModelConfig } from '../../services/modelConfigService.js';
|
||||
import { debugLogger } from '../../utils/debugLogger.js';
|
||||
import type { LocalLiteRtLmClient } from '../../core/localLiteRtLmClient.js';
|
||||
import { AuthType } from '../../core/contentGenerator.js';
|
||||
import { ModelAvailabilityService } from '../../availability/modelAvailabilityService.js';
|
||||
|
||||
vi.mock('../../core/baseLlmClient.js');
|
||||
|
||||
@@ -71,6 +72,9 @@ describe('NumericalClassifierStrategy', () => {
|
||||
getContentGeneratorConfig: vi.fn().mockReturnValue({
|
||||
authType: AuthType.LOGIN_WITH_GOOGLE,
|
||||
}),
|
||||
getModelAvailabilityService: vi
|
||||
.fn()
|
||||
.mockReturnValue(new ModelAvailabilityService()),
|
||||
} as unknown as Config;
|
||||
mockBaseLlmClient = {
|
||||
generateJson: vi.fn(),
|
||||
|
||||
@@ -20,6 +20,7 @@ import {
|
||||
isFunctionResponse,
|
||||
} from '../../utils/messageInspectors.js';
|
||||
import { debugLogger } from '../../utils/debugLogger.js';
|
||||
import { normalizeModelId } from '../../utils/modelUtils.js';
|
||||
import type { LocalLiteRtLmClient } from '../../core/localLiteRtLmClient.js';
|
||||
import { LlmRole } from '../../telemetry/types.js';
|
||||
|
||||
@@ -172,16 +173,28 @@ export class NumericalClassifierStrategy implements RoutingStrategy {
|
||||
config.getGemini31FlashLiteLaunched(),
|
||||
config.getUseCustomToolModel(),
|
||||
]);
|
||||
const selectedModel = resolveClassifierModel(
|
||||
model,
|
||||
modelAlias,
|
||||
useGemini3_1,
|
||||
useGemini3_1FlashLite,
|
||||
useCustomToolModel,
|
||||
config.getHasAccessToPreviewModel?.() ?? true,
|
||||
config,
|
||||
const selectedModel = normalizeModelId(
|
||||
resolveClassifierModel(
|
||||
normalizeModelId(model),
|
||||
modelAlias,
|
||||
useGemini3_1,
|
||||
useGemini3_1FlashLite,
|
||||
useCustomToolModel,
|
||||
config.getHasAccessToPreviewModel?.() ?? true,
|
||||
config,
|
||||
),
|
||||
);
|
||||
|
||||
const service = config.getModelAvailabilityService();
|
||||
const snapshot = service.snapshot(selectedModel);
|
||||
|
||||
if (!snapshot.available) {
|
||||
debugLogger.warn(
|
||||
`[Routing] Numerical classifier selected unavailable model ${selectedModel} (${snapshot.reason}). Bypassing.`,
|
||||
);
|
||||
return null;
|
||||
}
|
||||
|
||||
const latencyMs = Date.now() - startTime;
|
||||
|
||||
return {
|
||||
|
||||
@@ -0,0 +1,32 @@
|
||||
/**
|
||||
* @license
|
||||
* Copyright 2025 Google LLC
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
import { describe, it, expect } from 'vitest';
|
||||
import { normalizeModelId } from './modelUtils.js';
|
||||
|
||||
describe('modelUtils', () => {
|
||||
describe('normalizeModelId', () => {
|
||||
it('should strip "models/" prefix if present', () => {
|
||||
expect(normalizeModelId('models/gemini-3.1-pro-preview')).toBe(
|
||||
'gemini-3.1-pro-preview',
|
||||
);
|
||||
expect(normalizeModelId('models/gemini-1.5-flash')).toBe(
|
||||
'gemini-1.5-flash',
|
||||
);
|
||||
});
|
||||
|
||||
it('should leave model ID untouched if prefix is not present', () => {
|
||||
expect(normalizeModelId('gemini-3.1-pro-preview')).toBe(
|
||||
'gemini-3.1-pro-preview',
|
||||
);
|
||||
expect(normalizeModelId('auto')).toBe('auto');
|
||||
});
|
||||
|
||||
it('should handle empty string', () => {
|
||||
expect(normalizeModelId('')).toBe('');
|
||||
});
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,17 @@
|
||||
/**
|
||||
* @license
|
||||
* Copyright 2025 Google LLC
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
/**
|
||||
* Strips the 'models/' prefix from a model ID if present.
|
||||
* This ensures internal logic (like family matching) works correctly
|
||||
* even when receiving formal resource names from the API.
|
||||
*
|
||||
* @param modelId The model identifier to normalize.
|
||||
* @returns The model ID without the 'models/' prefix.
|
||||
*/
|
||||
export function normalizeModelId(modelId: string): string {
|
||||
return modelId.startsWith('models/') ? modelId.slice(7) : modelId;
|
||||
}
|
||||
Reference in New Issue
Block a user