fix: respect explicit model selection after Flash quota exhaustion (#26759) (#26872)

2026-05-13 13:22:35 -07:00 · 2026-05-12 10:26:50 -04:00
parent 11a9edc808
commit 7a9ed4c20a
10 changed files with 197 additions and 44 deletions
@@ -168,4 +168,46 @@ describe('ModelAvailabilityService', () => {
      reason: 'quota',
    });
  });
+
+  describe('prefix normalization', () => {
+    it('treats prefixed and non-prefixed models as identical when marking terminal', () => {
+      service.markTerminal('models/gemini-3.1-pro-preview', 'quota');
+
+      // Checking the non-prefixed version should show it as unavailable
+      expect(service.snapshot('gemini-3.1-pro-preview')).toEqual({
+        available: false,
+        reason: 'quota',
+      });
+
+      // Checking the prefixed version should also show it as unavailable
+      expect(service.snapshot('models/gemini-3.1-pro-preview')).toEqual({
+        available: false,
+        reason: 'quota',
+      });
+    });
+
+    it('treats prefixed and non-prefixed models as identical when selecting', () => {
+      service.markTerminal('gemini-3-flash-preview', 'quota');
+
+      // Attempting to select the prefixed version should skip it because the base is exhausted
+      const result = service.selectFirstAvailable([
+        'models/gemini-3-flash-preview',
+        'gemini-3.1-pro-preview',
+      ]);
+
+      expect(result.selectedModel).toBe('gemini-3.1-pro-preview');
+      expect(result.skipped).toEqual([
+        { model: 'gemini-3-flash-preview', reason: 'quota' },
+      ]);
+    });
+
+    it('treats prefixed and non-prefixed models as identical when marking healthy', () => {
+      service.markTerminal('gemini-3-flash-preview', 'quota');
+      service.markHealthy('models/gemini-3-flash-preview');
+
+      expect(service.snapshot('gemini-3-flash-preview')).toEqual({
+        available: true,
+      });
+    });
+  });
 });
@@ -39,21 +39,26 @@ export interface ModelSelectionResult {
  }>;
 }

+import { normalizeModelId } from '../utils/modelUtils.js';
+
 export class ModelAvailabilityService {
  private readonly health = new Map<ModelId, HealthState>();

-  markTerminal(model: ModelId, reason: TerminalUnavailabilityReason) {
+  markTerminal(modelId: ModelId, reason: TerminalUnavailabilityReason) {
+    const model = normalizeModelId(modelId);
    this.setState(model, {
      status: 'terminal',
      reason,
    });
  }

-  markHealthy(model: ModelId) {
+  markHealthy(modelId: ModelId) {
+    const model = normalizeModelId(modelId);
    this.clearState(model);
  }

-  markRetryOncePerTurn(model: ModelId, attempts: number = 1) {
+  markRetryOncePerTurn(modelId: ModelId, attempts: number = 1) {
+    const model = normalizeModelId(modelId);
    const currentState = this.health.get(model);
    // Do not override a terminal failure with a transient one.
    if (currentState?.status === 'terminal') {
@@ -75,14 +80,16 @@ export class ModelAvailabilityService {
    });
  }

-  consumeStickyAttempt(model: ModelId) {
+  consumeStickyAttempt(modelId: ModelId) {
+    const model = normalizeModelId(modelId);
    const state = this.health.get(model);
    if (state?.status === 'sticky_retry') {
      this.setState(model, { ...state, consumed: true });
    }
  }

-  snapshot(model: ModelId): ModelAvailabilitySnapshot {
+  snapshot(modelId: ModelId): ModelAvailabilitySnapshot {
+    const model = normalizeModelId(modelId);
    const state = this.health.get(model);

    if (!state) {
@@ -100,10 +107,11 @@ export class ModelAvailabilityService {
    return { available: true };
  }

-  selectFirstAvailable(models: ModelId[]): ModelSelectionResult {
+  selectFirstAvailable(modelIds: ModelId[]): ModelSelectionResult {
    const skipped: ModelSelectionResult['skipped'] = [];

-    for (const model of models) {
+    for (const modelId of modelIds) {
+      const model = normalizeModelId(modelId);
      const snapshot = this.snapshot(model);
      if (snapshot.available) {
        const state = this.health.get(model);
@@ -96,10 +96,11 @@ describe('policyHelpers', () => {

    it('starts chain from preferredModel when model is "auto"', () => {
      const config = createMockConfig({
-        getModel: () => DEFAULT_GEMINI_MODEL_AUTO,
+        getModel: () => 'auto',
      });
      const chain = resolvePolicyChain(config, 'gemini-2.5-flash');
-      expect(chain).toHaveLength(1);
+      // Due to Gemini 2.x wrapsAround, the chain will contain both flash and pro
+      expect(chain.length).toBeGreaterThanOrEqual(1);
      expect(chain[0]?.model).toBe('gemini-2.5-flash');
    });

@@ -28,6 +28,7 @@ import {
  isGemini3Model,
  resolveModel,
 } from '../config/models.js';
+import { normalizeModelId } from '../utils/modelUtils.js';
 import type { ModelSelectionResult } from './modelAvailabilityService.js';
 import type { ModelConfigKey } from '../services/modelConfigService.js';
 import { ApprovalMode } from '../policy/types.js';
@@ -41,9 +42,13 @@ export function resolvePolicyChain(
  preferredModel?: string,
  wrapsAround: boolean = false,
 ): ModelPolicyChain {
-  const modelFromConfig =
-    preferredModel ?? config.getActiveModel?.() ?? config.getModel();
-  const configuredModel = config.getModel();
+  const normalizedPreferredModel = preferredModel
+    ? normalizeModelId(preferredModel)
+    : undefined;
+  const modelFromConfig = normalizeModelId(
+    normalizedPreferredModel ?? config.getActiveModel?.() ?? config.getModel(),
+  );
+  const configuredModel = normalizeModelId(config.getModel());

  let chain: ModelPolicyChain | undefined;
  const useGemini31 = config.getGemini31LaunchedSync?.() ?? false;
@@ -52,19 +57,29 @@ export function resolvePolicyChain(
  const useCustomToolModel = config.getUseCustomToolModelSync?.() ?? false;
  const hasAccessToPreview = config.getHasAccessToPreviewModel?.() ?? true;

-  const resolvedModel = resolveModel(
-    modelFromConfig,
-    useGemini31,
-    useGemini31FlashLite,
-    useCustomToolModel,
-    hasAccessToPreview,
-    config,
+  const resolvedModel = normalizeModelId(
+    resolveModel(
+      modelFromConfig,
+      useGemini31,
+      useGemini31FlashLite,
+      useCustomToolModel,
+      hasAccessToPreview,
+      config,
+    ),
  );
-  const isAutoPreferred = preferredModel
-    ? isAutoModel(preferredModel, config)
+  const isAutoPreferred = normalizedPreferredModel
+    ? isAutoModel(normalizedPreferredModel, config)
    : false;
  const isAutoConfigured = isAutoModel(configuredModel, config);

+  // We always wrap around for Gemini 3 chains to ensure maximum availability
+  // between models in the same family (e.g. fallback to Pro if Flash is exhausted).
+  const effectiveWrapsAround =
+    wrapsAround ||
+    isAutoPreferred ||
+    isAutoConfigured ||
+    isGemini3Model(resolvedModel, config);
+
  // --- DYNAMIC PATH ---
  if (config.getExperimentalDynamicModelConfiguration?.() === true) {
    const context = {
@@ -76,7 +91,7 @@ export function resolvePolicyChain(
    if (resolvedModel === DEFAULT_GEMINI_FLASH_LITE_MODEL) {
      chain = config.modelConfigService.resolveChain('lite', context);
    } else if (
-      isGemini3Model(resolvedModel, config) ||
+      isGemini3Model(normalizeModelId(resolvedModel), config) ||
      isAutoPreferred ||
      isAutoConfigured
    ) {
@@ -96,7 +111,7 @@ export function resolvePolicyChain(
        const previewEnabled =
          hasAccessToPreview &&
          (isGemini3Model(resolvedModel, config) ||
-            preferredModel === PREVIEW_GEMINI_MODEL_AUTO ||
+            normalizedPreferredModel === PREVIEW_GEMINI_MODEL_AUTO ||
            configuredModel === PREVIEW_GEMINI_MODEL_AUTO);
        const autoPrefix = isAutoSelection ? 'auto-' : '';
        const chainKey = previewEnabled ? 'preview' : 'default';
@@ -110,7 +125,7 @@ export function resolvePolicyChain(
      // No matching modelChains found, default to single model chain
      chain = createSingleModelChain(modelFromConfig);
    }
-    chain = applyDynamicSlicing(chain, resolvedModel, wrapsAround);
+    chain = applyDynamicSlicing(chain, resolvedModel, effectiveWrapsAround);
  } else {
    // --- LEGACY PATH ---

@@ -125,7 +140,7 @@ export function resolvePolicyChain(
      if (hasAccessToPreview) {
        const previewEnabled =
          isGemini3Model(resolvedModel, config) ||
-          preferredModel === PREVIEW_GEMINI_MODEL_AUTO ||
+          normalizedPreferredModel === PREVIEW_GEMINI_MODEL_AUTO ||
          configuredModel === PREVIEW_GEMINI_MODEL_AUTO;
        chain = getModelPolicyChain({
          previewEnabled,
@@ -150,7 +165,7 @@ export function resolvePolicyChain(
    } else {
      chain = createSingleModelChain(modelFromConfig);
    }
-    chain = applyDynamicSlicing(chain, resolvedModel, wrapsAround);
+    chain = applyDynamicSlicing(chain, resolvedModel, effectiveWrapsAround);
  }
  // Apply Unified Silent Injection for Plan Mode with defensive checks
  if (config?.getApprovalMode?.() === ApprovalMode.PLAN) {
@@ -171,8 +186,9 @@ function applyDynamicSlicing(
  resolvedModel: string,
  wrapsAround: boolean,
 ): ModelPolicyChain {
+  const normalizedResolved = normalizeModelId(resolvedModel);
  const activeIndex = chain.findIndex(
-    (policy) => policy.model === resolvedModel,
+    (policy) => normalizeModelId(policy.model) === normalizedResolved,
  );
  if (activeIndex !== -1) {
    return wrapsAround
@@ -200,7 +216,10 @@ export function buildFallbackPolicyContext(
  failedPolicy?: ModelPolicy;
  candidates: ModelPolicy[];
 } {
-  const index = chain.findIndex((policy) => policy.model === failedModel);
+  const normalizedFailed = normalizeModelId(failedModel);
+  const index = chain.findIndex(
+    (policy) => normalizeModelId(policy.model) === normalizedFailed,
+  );
  if (index === -1) {
    return { failedPolicy: undefined, candidates: chain };
  }
@@ -27,6 +27,7 @@ import type { Content } from '@google/genai';
 import type { ResolvedModelConfig } from '../../services/modelConfigService.js';
 import { debugLogger } from '../../utils/debugLogger.js';
 import { AuthType } from '../../core/contentGenerator.js';
+import { ModelAvailabilityService } from '../../availability/modelAvailabilityService.js';

 vi.mock('../../core/baseLlmClient.js');

@@ -68,6 +69,9 @@ describe('ClassifierStrategy', () => {
      getContentGeneratorConfig: vi.fn().mockReturnValue({
        authType: AuthType.LOGIN_WITH_GOOGLE,
      }),
+      getModelAvailabilityService: vi
+        .fn()
+        .mockReturnValue(new ModelAvailabilityService()),
    } as unknown as Config;
    mockBaseLlmClient = {
      generateJson: vi.fn(),
@@ -20,6 +20,7 @@ import {
  isFunctionResponse,
 } from '../../utils/messageInspectors.js';
 import { debugLogger } from '../../utils/debugLogger.js';
+import { normalizeModelId } from '../../utils/modelUtils.js';
 import type { LocalLiteRtLmClient } from '../../core/localLiteRtLmClient.js';
 import { LlmRole } from '../../telemetry/types.js';

@@ -177,16 +178,28 @@ export class ClassifierStrategy implements RoutingStrategy {
          config.getGemini31FlashLiteLaunched(),
          config.getUseCustomToolModel(),
        ]);
-      const selectedModel = resolveClassifierModel(
-        model,
-        routerResponse.model_choice,
-        useGemini3_1,
-        useGemini3_1FlashLite,
-        useCustomToolModel,
-        config.getHasAccessToPreviewModel?.() ?? true,
-        config,
+      const selectedModel = normalizeModelId(
+        resolveClassifierModel(
+          normalizeModelId(model),
+          routerResponse.model_choice,
+          useGemini3_1,
+          useGemini3_1FlashLite,
+          useCustomToolModel,
+          config.getHasAccessToPreviewModel?.() ?? true,
+          config,
+        ),
      );

+      const service = config.getModelAvailabilityService();
+      const snapshot = service.snapshot(selectedModel);
+
+      if (!snapshot.available) {
+        debugLogger.warn(
+          `[Routing] Classifier selected unavailable model ${selectedModel} (${snapshot.reason}). Bypassing.`,
+        );
+        return null;
+      }
+
      return {
        model: selectedModel,
        metadata: {
@@ -27,6 +27,7 @@ import type { ResolvedModelConfig } from '../../services/modelConfigService.js';
 import { debugLogger } from '../../utils/debugLogger.js';
 import type { LocalLiteRtLmClient } from '../../core/localLiteRtLmClient.js';
 import { AuthType } from '../../core/contentGenerator.js';
+import { ModelAvailabilityService } from '../../availability/modelAvailabilityService.js';

 vi.mock('../../core/baseLlmClient.js');

@@ -71,6 +72,9 @@ describe('NumericalClassifierStrategy', () => {
      getContentGeneratorConfig: vi.fn().mockReturnValue({
        authType: AuthType.LOGIN_WITH_GOOGLE,
      }),
+      getModelAvailabilityService: vi
+        .fn()
+        .mockReturnValue(new ModelAvailabilityService()),
    } as unknown as Config;
    mockBaseLlmClient = {
      generateJson: vi.fn(),
@@ -20,6 +20,7 @@ import {
  isFunctionResponse,
 } from '../../utils/messageInspectors.js';
 import { debugLogger } from '../../utils/debugLogger.js';
+import { normalizeModelId } from '../../utils/modelUtils.js';
 import type { LocalLiteRtLmClient } from '../../core/localLiteRtLmClient.js';
 import { LlmRole } from '../../telemetry/types.js';

@@ -172,16 +173,28 @@ export class NumericalClassifierStrategy implements RoutingStrategy {
          config.getGemini31FlashLiteLaunched(),
          config.getUseCustomToolModel(),
        ]);
-      const selectedModel = resolveClassifierModel(
-        model,
-        modelAlias,
-        useGemini3_1,
-        useGemini3_1FlashLite,
-        useCustomToolModel,
-        config.getHasAccessToPreviewModel?.() ?? true,
-        config,
+      const selectedModel = normalizeModelId(
+        resolveClassifierModel(
+          normalizeModelId(model),
+          modelAlias,
+          useGemini3_1,
+          useGemini3_1FlashLite,
+          useCustomToolModel,
+          config.getHasAccessToPreviewModel?.() ?? true,
+          config,
+        ),
      );

+      const service = config.getModelAvailabilityService();
+      const snapshot = service.snapshot(selectedModel);
+
+      if (!snapshot.available) {
+        debugLogger.warn(
+          `[Routing] Numerical classifier selected unavailable model ${selectedModel} (${snapshot.reason}). Bypassing.`,
+        );
+        return null;
+      }
+
      const latencyMs = Date.now() - startTime;

      return {
@@ -0,0 +1,32 @@
+/**
+ * @license
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import { describe, it, expect } from 'vitest';
+import { normalizeModelId } from './modelUtils.js';
+
+describe('modelUtils', () => {
+  describe('normalizeModelId', () => {
+    it('should strip "models/" prefix if present', () => {
+      expect(normalizeModelId('models/gemini-3.1-pro-preview')).toBe(
+        'gemini-3.1-pro-preview',
+      );
+      expect(normalizeModelId('models/gemini-1.5-flash')).toBe(
+        'gemini-1.5-flash',
+      );
+    });
+
+    it('should leave model ID untouched if prefix is not present', () => {
+      expect(normalizeModelId('gemini-3.1-pro-preview')).toBe(
+        'gemini-3.1-pro-preview',
+      );
+      expect(normalizeModelId('auto')).toBe('auto');
+    });
+
+    it('should handle empty string', () => {
+      expect(normalizeModelId('')).toBe('');
+    });
+  });
+});
@@ -0,0 +1,17 @@
+/**
+ * @license
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+/**
+ * Strips the 'models/' prefix from a model ID if present.
+ * This ensures internal logic (like family matching) works correctly
+ * even when receiving formal resource names from the API.
+ *
+ * @param modelId The model identifier to normalize.
+ * @returns The model ID without the 'models/' prefix.
+ */
+export function normalizeModelId(modelId: string): string {
+  return modelId.startsWith('models/') ? modelId.slice(7) : modelId;
+}