fallback to flash for TerminalQuota errors (#13791)

2026-04-27 13:34:15 -07:00 · 2025-11-25 16:17:22 -05:00
parent 6f9118dca6
commit d8a3d08f8e
5 changed files with 229 additions and 40 deletions
@@ -12,7 +12,7 @@ import { theme } from '../semantic-colors.js';
 import {
  DEFAULT_GEMINI_FLASH_LITE_MODEL,
  DEFAULT_GEMINI_FLASH_MODEL,
-  PREVIEW_GEMINI_MODEL,
+  DEFAULT_GEMINI_MODEL,
  UserTierId,
 } from '@google/gemini-cli-core';
@@ -127,7 +127,7 @@ export function ProQuotaDialog({
        <RadioButtonSelect items={items} onSelect={handleSelect} />
      </Box>
      <Text color={theme.text.primary}>
-        {failedModel === PREVIEW_GEMINI_MODEL && !isModelNotFoundError
+        {fallbackModel === DEFAULT_GEMINI_MODEL && !isModelNotFoundError
          ? 'Note: We will periodically retry Preview Model to see if congestion has cleared.'
          : 'Note: You can always use /model to select a different option.'}
      </Text>
@@ -27,6 +27,8 @@ import {
  RetryableQuotaError,
  PREVIEW_GEMINI_MODEL,
  ModelNotFoundError,
  DEFAULT_GEMINI_MODEL,
  DEFAULT_GEMINI_FLASH_MODEL,
 } from '@google/gemini-cli-core';
 import { useQuotaAndFallback } from './useQuotaAndFallback.js';
 import type { UseHistoryManagerReturn } from './useHistoryManager.js';
@@ -432,7 +434,7 @@ To disable Gemini 3, disable "Preview features" in /settings.`,
      await act(() => {
        promise = handler(
          PREVIEW_GEMINI_MODEL,
-          'gemini-flash',
+          DEFAULT_GEMINI_MODEL,
          new Error('preview model failed'),
        );
      });
@@ -447,7 +449,42 @@ To disable Gemini 3, disable "Preview features" in /settings.`,
      const lastCall = (mockHistoryManager.addItem as Mock).mock.calls[0][0];
      expect(lastCall.type).toBe(MessageType.INFO);
      expect(lastCall.text).toContain(
-        `Switched to fallback model gemini-flash. We will periodically check if ${PREVIEW_GEMINI_MODEL} is available again.`,
+        `Switched to fallback model gemini-2.5-pro. We will periodically check if ${PREVIEW_GEMINI_MODEL} is available again.`,
      );
    });
    it('should show a special message when falling back from the preview model, but do not show periodical check message for flash model fallback', async () => {
      const { result } = renderHook(() =>
        useQuotaAndFallback({
          config: mockConfig,
          historyManager: mockHistoryManager,
          userTier: UserTierId.FREE,
          setModelSwitchedFromQuotaError: mockSetModelSwitchedFromQuotaError,
        }),
      );
      const handler = setFallbackHandlerSpy.mock
        .calls[0][0] as FallbackModelHandler;
      let promise: Promise<FallbackIntent | null>;
      await act(() => {
        promise = handler(
          PREVIEW_GEMINI_MODEL,
          DEFAULT_GEMINI_FLASH_MODEL,
          new Error('preview model failed'),
        );
      });
      await act(() => {
        result.current.handleProQuotaChoice('retry_always');
      });
      await promise!;
      expect(mockHistoryManager.addItem).toHaveBeenCalledTimes(1);
      const lastCall = (mockHistoryManager.addItem as Mock).mock.calls[0][0];
      expect(lastCall.type).toBe(MessageType.INFO);
      expect(lastCall.text).toContain(
        `Switched to fallback model gemini-2.5-flash.`,
      );
    });
  });
@@ -13,6 +13,7 @@ import {
  ModelNotFoundError,
  type UserTierId,
  PREVIEW_GEMINI_MODEL,
  DEFAULT_GEMINI_MODEL,
 } from '@google/gemini-cli-core';
 import { useCallback, useEffect, useRef, useState } from 'react';
 import { type UseHistoryManagerReturn } from './useHistoryManager.js';
@@ -55,11 +56,16 @@ export function useQuotaAndFallback({
      let message: string;
      let isTerminalQuotaError = false;
      let isModelNotFoundError = false;
      const usageLimitReachedModel =
        failedModel === DEFAULT_GEMINI_MODEL ||
        failedModel === PREVIEW_GEMINI_MODEL
          ? 'all Pro models'
          : failedModel;
      if (error instanceof TerminalQuotaError) {
        isTerminalQuotaError = true;
        // Common part of the message for both tiers
        const messageLines = [
-          `Usage limit reached for ${failedModel}.`,
+          `Usage limit reached for ${usageLimitReachedModel}.`,
          error.retryDelayMs ? getResetTimeMessage(error.retryDelayMs) : null,
          `/stats for usage details`,
          `/auth to switch to API key.`,
@@ -116,10 +122,13 @@ export function useQuotaAndFallback({
      if (choice === 'retry_always') {
        // If we were recovering from a Preview Model failure, show a specific message.
        if (proQuotaRequest.failedModel === PREVIEW_GEMINI_MODEL) {
          const showPeriodicalCheckMessage =
            !proQuotaRequest.isModelNotFoundError &&
            proQuotaRequest.fallbackModel === DEFAULT_GEMINI_MODEL;
          historyManager.addItem(
            {
              type: MessageType.INFO,
-              text: `Switched to fallback model ${proQuotaRequest.fallbackModel}. ${!proQuotaRequest.isModelNotFoundError ? `We will periodically check if ${PREVIEW_GEMINI_MODEL} is available again.` : ''}`,
+              text: `Switched to fallback model ${proQuotaRequest.fallbackModel}. ${showPeriodicalCheckMessage ? `We will periodically check if ${PREVIEW_GEMINI_MODEL} is available again.` : ''}`,
            },
            Date.now(),
          );
@@ -25,6 +25,10 @@ import {
 import { logFlashFallback } from '../telemetry/index.js';
 import type { FallbackModelHandler } from './types.js';
 import { ModelNotFoundError } from '../utils/httpErrors.js';
 import {
  RetryableQuotaError,
  TerminalQuotaError,
 } from '../utils/googleQuotaErrors.js';
 // Mock the telemetry logger and event class
 vi.mock('../telemetry/index.js', () => ({
@@ -104,7 +108,7 @@ describe('handleFallback', () => {
    expect(result).toBeNull();
  });
-  describe('when handler returns "retry"', () => {
+  describe('when handler returns "retry_always"', () => {
    it('should activate fallback mode, log telemetry, and return true', async () => {
      mockHandler.mockResolvedValue('retry_always');
@@ -212,65 +216,175 @@ describe('handleFallback', () => {
  describe('Preview Model Fallback Logic', () => {
    const previewModel = PREVIEW_GEMINI_MODEL;
-    it('should always set Preview Model bypass mode on failure', async () => {
+    it('should only set Preview Model bypass mode on retryable quota failure', async () => {
-      await handleFallback(mockConfig, previewModel, AUTH_OAUTH);
+      const mockGoogleApiError = {
        code: 429,
        message: 'mock error',
        details: [],
      };
      const retryableQuotaError = new RetryableQuotaError(
        'Capacity error',
        mockGoogleApiError,
        5,
      );
      await handleFallback(
        mockConfig,
        previewModel,
        AUTH_OAUTH,
        retryableQuotaError,
      );
      expect(mockConfig.setPreviewModelBypassMode).toHaveBeenCalledWith(true);
    });
-    it('should silently retry if Preview Model fallback mode is already active', async () => {
+    it('should not set Preview Model bypass mode on non-retryable quota failure', async () => {
-      vi.spyOn(mockConfig, 'isPreviewModelFallbackMode').mockReturnValue(true);
+      const mockGoogleApiError = {
        code: 429,
        message: 'mock error',
        details: [],
      };
      const terminalQuotaError = new TerminalQuotaError(
        'quota error',
        mockGoogleApiError,
        5,
      );
      await handleFallback(
        mockConfig,
        previewModel,
        AUTH_OAUTH,
        terminalQuotaError,
      );
-      const result = await handleFallback(mockConfig, previewModel, AUTH_OAUTH);
+      expect(mockConfig.setPreviewModelBypassMode).not.toHaveBeenCalled();
    });
    it('should silently retry if Preview Model fallback mode is already active and error is retryable error', async () => {
      vi.spyOn(mockConfig, 'isPreviewModelFallbackMode').mockReturnValue(true);
      const mockGoogleApiError = {
        code: 429,
        message: 'mock error',
        details: [],
      };
      const retryableQuotaError = new RetryableQuotaError(
        'Capacity error',
        mockGoogleApiError,
        5,
      );
      const result = await handleFallback(
        mockConfig,
        previewModel,
        AUTH_OAUTH,
        retryableQuotaError,
      );
      expect(result).toBe(true);
      expect(mockHandler).not.toHaveBeenCalled();
    });
-    it('should activate Preview Model fallback mode when handler returns "retry_always"', async () => {
+    it('should activate Preview Model fallback mode when handler returns "retry_always" and is RetryableQuotaError', async () => {
      mockHandler.mockResolvedValue('retry_always');
-
+      const mockGoogleApiError = {
-      const result = await handleFallback(mockConfig, previewModel, AUTH_OAUTH);
+        code: 429,
        message: 'mock error',
        details: [],
      };
      const retryableQuotaError = new RetryableQuotaError(
        'Capacity error',
        mockGoogleApiError,
        5,
      );
      const result = await handleFallback(
        mockConfig,
        previewModel,
        AUTH_OAUTH,
        retryableQuotaError,
      );
      expect(result).toBe(true);
      expect(mockConfig.setPreviewModelBypassMode).toHaveBeenCalledWith(true);
      expect(mockConfig.setPreviewModelFallbackMode).toHaveBeenCalledWith(true);
    });
    it('should activate regular fallback when handler returns "retry_always" and is TerminalQuotaError', async () => {
      mockHandler.mockResolvedValue('retry_always');
      const mockGoogleApiError = {
        code: 503,
        message: 'mock error',
        details: [],
      };
      const terminalError = new TerminalQuotaError(
        'Quota error',
        mockGoogleApiError,
        5,
      );
      const result = await handleFallback(
        mockConfig,
        previewModel,
        AUTH_OAUTH,
        terminalError,
      );
      expect(result).toBe(true);
      expect(mockConfig.setPreviewModelFallbackMode).not.toBeCalled();
      expect(mockConfig.setFallbackMode).toHaveBeenCalledWith(true);
    });
    it('should NOT set fallback mode if user chooses "retry_once"', async () => {
      const mockGoogleApiError = {
        code: 429,
        message: 'mock error',
        details: [],
      };
      const terminalQuotaError = new TerminalQuotaError(
        'quota error',
        mockGoogleApiError,
        5,
      );
      mockHandler.mockResolvedValue('retry_once');
      const result = await handleFallback(
        mockConfig,
        PREVIEW_GEMINI_MODEL,
        AuthType.LOGIN_WITH_GOOGLE,
-        new Error('Capacity'),
+        terminalQuotaError,
      );
      expect(result).toBe(true);
-      expect(mockConfig.setPreviewModelBypassMode).toHaveBeenCalledWith(true);
+      expect(mockConfig.setPreviewModelBypassMode).not.toHaveBeenCalled();
      expect(mockConfig.setPreviewModelFallbackMode).not.toHaveBeenCalled();
      expect(mockConfig.setFallbackMode).not.toHaveBeenCalled();
    });
-    it('should set fallback mode if user chooses "retry_always"', async () => {
+    it('should pass DEFAULT_GEMINI_MODEL as fallback when Preview Model fails with Retryable Error', async () => {
      mockHandler.mockResolvedValue('retry_always');
      const result = await handleFallback(
        mockConfig,
        PREVIEW_GEMINI_MODEL,
        AuthType.LOGIN_WITH_GOOGLE,
        new Error('Capacity'),
      );
      expect(result).toBe(true);
      expect(mockConfig.setPreviewModelBypassMode).toHaveBeenCalledWith(true);
      expect(mockConfig.setPreviewModelFallbackMode).toHaveBeenCalledWith(true);
    });
    it('should pass DEFAULT_GEMINI_MODEL as fallback when Preview Model fails', async () => {
      const mockFallbackHandler = vi.fn().mockResolvedValue('stop');
      vi.mocked(mockConfig.fallbackModelHandler!).mockImplementation(
        mockFallbackHandler,
      );
      const mockGoogleApiError = {
        code: 429,
        message: 'mock error',
        details: [],
      };
      const retryableQuotaError = new RetryableQuotaError(
        'Capacity error',
        mockGoogleApiError,
        5,
      );
      await handleFallback(
        mockConfig,
        PREVIEW_GEMINI_MODEL,
        AuthType.LOGIN_WITH_GOOGLE,
        retryableQuotaError,
      );
      expect(mockConfig.fallbackModelHandler).toHaveBeenCalledWith(
        PREVIEW_GEMINI_MODEL,
        DEFAULT_GEMINI_MODEL,
        retryableQuotaError,
      );
    });
    it('should pass DEFAULT_GEMINI_MODEL as fallback when Preview Model fails with other error', async () => {
      await handleFallback(
        mockConfig,
        PREVIEW_GEMINI_MODEL,
@@ -283,6 +397,31 @@ describe('handleFallback', () => {
        undefined,
      );
    });
    it('should pass DEFAULT_GEMINI_FLASH_MODEL as fallback when Preview Model fails with other error', async () => {
      const mockGoogleApiError = {
        code: 429,
        message: 'mock error',
        details: [],
      };
      const terminalQuotaError = new TerminalQuotaError(
        'quota error',
        mockGoogleApiError,
        5,
      );
      await handleFallback(
        mockConfig,
        PREVIEW_GEMINI_MODEL,
        AuthType.LOGIN_WITH_GOOGLE,
        terminalQuotaError,
      );
      expect(mockConfig.fallbackModelHandler).toHaveBeenCalledWith(
        PREVIEW_GEMINI_MODEL,
        DEFAULT_GEMINI_FLASH_MODEL,
        terminalQuotaError,
      );
    });
  });
  it('should return null if ModelNotFoundError occurs for a non-preview model', async () => {
@@ -17,6 +17,7 @@ import { openBrowserSecurely } from '../utils/secure-browser-launcher.js';
 import { debugLogger } from '../utils/debugLogger.js';
 import { getErrorMessage } from '../utils/errors.js';
 import { ModelNotFoundError } from '../utils/httpErrors.js';
 import { TerminalQuotaError } from '../utils/googleQuotaErrors.js';
 const UPGRADE_URL_PAGE = 'https://goo.gle/set-up-gemini-code-assist';
@@ -36,10 +37,12 @@ export async function handleFallback(
  ) {
    return null;
  }
-
+  const shouldActivatePreviewFallback =
    failedModel === PREVIEW_GEMINI_MODEL &&
    !(error instanceof TerminalQuotaError);
  // Preview Model Specific Logic
-  if (failedModel === PREVIEW_GEMINI_MODEL) {
+  if (shouldActivatePreviewFallback) {
-    // Always set bypass mode for the immediate retry.
+    // Always set bypass mode for the immediate retry, for non-TerminalQuotaErrors.
    // This ensures the next attempt uses 2.5 Pro.
    config.setPreviewModelBypassMode(true);
@@ -50,10 +53,9 @@ export async function handleFallback(
    }
  }
-  const fallbackModel =
+  const fallbackModel = shouldActivatePreviewFallback
-    failedModel === PREVIEW_GEMINI_MODEL
+    ? DEFAULT_GEMINI_MODEL
-      ? DEFAULT_GEMINI_MODEL
+    : DEFAULT_GEMINI_FLASH_MODEL;
      : DEFAULT_GEMINI_FLASH_MODEL;
  // Consult UI Handler for Intent
  const fallbackModelHandler = config.fallbackModelHandler;
@@ -70,7 +72,9 @@ export async function handleFallback(
    // Process Intent and Update State
    switch (intent) {
      case 'retry_always':
-        if (failedModel === PREVIEW_GEMINI_MODEL) {
+        // If the error is non-retryable, e.g. TerminalQuota Error, trigger a regular fallback to flash.
        // For all other errors, activate previewModel fallback.
        if (shouldActivatePreviewFallback) {
          activatePreviewModelFallbackMode(config);
        } else {
          activateFallbackMode(config, authType);