fix: handle request retries and model fallback correctly (#11624)

2026-05-13 05:12:55 -07:00 · 2025-10-24 11:09:06 -07:00
parent c2104a14fb
commit ee92db7533
14 changed files with 1357 additions and 804 deletions
@@ -19,25 +19,15 @@ import {
  type FallbackModelHandler,
  UserTierId,
  AuthType,
-  isGenericQuotaExceededError,
-  isProQuotaExceededError,
+  TerminalQuotaError,
  makeFakeConfig,
+  type GoogleApiError,
+  RetryableQuotaError,
 } from '@google/gemini-cli-core';
 import { useQuotaAndFallback } from './useQuotaAndFallback.js';
 import type { UseHistoryManagerReturn } from './useHistoryManager.js';
 import { AuthState, MessageType } from '../types.js';

-// Mock the error checking functions from the core package to control test scenarios
-vi.mock('@google/gemini-cli-core', async (importOriginal) => {
-  const original =
-    await importOriginal<typeof import('@google/gemini-cli-core')>();
-  return {
-    ...original,
-    isGenericQuotaExceededError: vi.fn(),
-    isProQuotaExceededError: vi.fn(),
-  };
-});
-
 // Use a type alias for SpyInstance as it's not directly exported
 type SpyInstance = ReturnType<typeof vi.spyOn>;

@@ -47,12 +37,15 @@ describe('useQuotaAndFallback', () => {
  let mockSetAuthState: Mock;
  let mockSetModelSwitchedFromQuotaError: Mock;
  let setFallbackHandlerSpy: SpyInstance;
-
-  const mockedIsGenericQuotaExceededError = isGenericQuotaExceededError as Mock;
-  const mockedIsProQuotaExceededError = isProQuotaExceededError as Mock;
+  let mockGoogleApiError: GoogleApiError;

  beforeEach(() => {
    mockConfig = makeFakeConfig();
+    mockGoogleApiError = {
+      code: 429,
+      message: 'mock error',
+      details: [],
+    };

    // Spy on the method that requires the private field and mock its return.
    // This is cleaner than modifying the config class for tests.
@@ -72,9 +65,6 @@ describe('useQuotaAndFallback', () => {

    setFallbackHandlerSpy = vi.spyOn(mockConfig, 'setFallbackModelHandler');
    vi.spyOn(mockConfig, 'setQuotaErrorOccurred');
-
-    mockedIsGenericQuotaExceededError.mockReturnValue(false);
-    mockedIsProQuotaExceededError.mockReturnValue(false);
  });

  afterEach(() => {
@@ -140,51 +130,62 @@ describe('useQuotaAndFallback', () => {
    describe('Automatic Fallback Scenarios', () => {
      const testCases = [
        {
-          errorType: 'generic',
+          description: 'other error for FREE tier',
          tier: UserTierId.FREE,
+          error: new Error('some error'),
          expectedMessageSnippets: [
-            'Automatically switching from model-A to model-B',
+            'Automatically switching from model-A to model-B for faster responses',
            'upgrade to a Gemini Code Assist Standard or Enterprise plan',
          ],
        },
        {
-          errorType: 'generic',
-          tier: UserTierId.STANDARD, // Paid tier
+          description: 'other error for LEGACY tier',
+          tier: UserTierId.LEGACY, // Paid tier
+          error: new Error('some error'),
          expectedMessageSnippets: [
-            'Automatically switching from model-A to model-B',
+            'Automatically switching from model-A to model-B for faster responses',
            'switch to using a paid API key from AI Studio',
          ],
        },
        {
-          errorType: 'other',
+          description: 'retryable quota error for FREE tier',
          tier: UserTierId.FREE,
+          error: new RetryableQuotaError(
+            'retryable quota',
+            mockGoogleApiError,
+            5,
+          ),
          expectedMessageSnippets: [
-            'Automatically switching from model-A to model-B for faster responses',
-            'upgrade to a Gemini Code Assist Standard or Enterprise plan',
+            'Your requests are being throttled right now due to server being at capacity for model-A',
+            'Automatically switching from model-A to model-B',
+            'upgrading to a Gemini Code Assist Standard or Enterprise plan',
          ],
        },
        {
-          errorType: 'other',
+          description: 'retryable quota error for LEGACY tier',
          tier: UserTierId.LEGACY, // Paid tier
+          error: new RetryableQuotaError(
+            'retryable quota',
+            mockGoogleApiError,
+            5,
+          ),
          expectedMessageSnippets: [
-            'Automatically switching from model-A to model-B for faster responses',
+            'Your requests are being throttled right now due to server being at capacity for model-A',
+            'Automatically switching from model-A to model-B',
            'switch to using a paid API key from AI Studio',
          ],
        },
      ];

-      for (const { errorType, tier, expectedMessageSnippets } of testCases) {
-        it(`should handle ${errorType} error for ${tier} tier correctly`, async () => {
-          mockedIsGenericQuotaExceededError.mockReturnValue(
-            errorType === 'generic',
-          );
-
+      for (const {
+        description,
+        tier,
+        error,
+        expectedMessageSnippets,
+      } of testCases) {
+        it(`should handle ${description} correctly`, async () => {
          const handler = getRegisteredHandler(tier);
-          const result = await handler(
-            'model-A',
-            'model-B',
-            new Error('quota exceeded'),
-          );
+          const result = await handler('model-A', 'model-B', error);

          // Automatic fallbacks should return 'stop'
          expect(result).toBe('stop');
@@ -207,10 +208,6 @@ describe('useQuotaAndFallback', () => {
    });

    describe('Interactive Fallback (Pro Quota Error)', () => {
-      beforeEach(() => {
-        mockedIsProQuotaExceededError.mockReturnValue(true);
-      });
-
      it('should set an interactive request and wait for user choice', async () => {
        const { result } = renderHook(() =>
          useQuotaAndFallback({
@@ -229,7 +226,7 @@ describe('useQuotaAndFallback', () => {
        const promise = handler(
          'gemini-pro',
          'gemini-flash',
-          new Error('pro quota'),
+          new TerminalQuotaError('pro quota', mockGoogleApiError),
        );

        await act(async () => {});
@@ -268,7 +265,7 @@ describe('useQuotaAndFallback', () => {
        const promise1 = handler(
          'gemini-pro',
          'gemini-flash',
-          new Error('pro quota 1'),
+          new TerminalQuotaError('pro quota 1', mockGoogleApiError),
        );
        await act(async () => {});

@@ -278,7 +275,7 @@ describe('useQuotaAndFallback', () => {
        const result2 = await handler(
          'gemini-pro',
          'gemini-flash',
-          new Error('pro quota 2'),
+          new TerminalQuotaError('pro quota 2', mockGoogleApiError),
        );

        // The lock should have stopped the second request
@@ -297,10 +294,6 @@ describe('useQuotaAndFallback', () => {
  });

  describe('handleProQuotaChoice', () => {
-    beforeEach(() => {
-      mockedIsProQuotaExceededError.mockReturnValue(true);
-    });
-
    it('should do nothing if there is no pending pro quota request', () => {
      const { result } = renderHook(() =>
        useQuotaAndFallback({
@@ -336,7 +329,7 @@ describe('useQuotaAndFallback', () => {
      const promise = handler(
        'gemini-pro',
        'gemini-flash',
-        new Error('pro quota'),
+        new TerminalQuotaError('pro quota', mockGoogleApiError),
      );
      await act(async () => {}); // Allow state to update

@@ -367,7 +360,7 @@ describe('useQuotaAndFallback', () => {
      const promise = handler(
        'gemini-pro',
        'gemini-flash',
-        new Error('pro quota'),
+        new TerminalQuotaError('pro quota', mockGoogleApiError),
      );
      await act(async () => {}); // Allow state to update

@@ -9,9 +9,9 @@ import {
  type Config,
  type FallbackModelHandler,
  type FallbackIntent,
-  isGenericQuotaExceededError,
-  isProQuotaExceededError,
+  TerminalQuotaError,
  UserTierId,
+  RetryableQuotaError,
 } from '@google/gemini-cli-core';
 import { useCallback, useEffect, useRef, useState } from 'react';
 import { type UseHistoryManagerReturn } from './useHistoryManager.js';
@@ -63,7 +63,7 @@ export function useQuotaAndFallback({

      let message: string;

-      if (error && isProQuotaExceededError(error)) {
+      if (error instanceof TerminalQuotaError) {
        // Pro Quota specific messages (Interactive)
        if (isPaidTier) {
          message = `⚡ You have reached your daily ${failedModel} quota limit.
@@ -76,31 +76,30 @@ export function useQuotaAndFallback({
 ⚡ Or you can utilize a Gemini API Key. See: https://goo.gle/gemini-cli-docs-auth#gemini-api-key
 ⚡ You can switch authentication methods by typing /auth`;
        }
-      } else if (error && isGenericQuotaExceededError(error)) {
-        // Generic Quota (Automatic fallback)
-        const actionMessage = `⚡ You have reached your daily quota limit.\n⚡ Automatically switching from ${failedModel} to ${fallbackModel} for the remainder of this session.`;
+      } else if (error instanceof RetryableQuotaError) {
+        // Short term quota retries exhausted (Automatic fallback)
+        const actionMessage = `⚡ Your requests are being throttled right now due to server being at capacity for ${failedModel}.\n⚡ Automatically switching from ${failedModel} to ${fallbackModel} for the remainder of this session.`;

        if (isPaidTier) {
          message = `${actionMessage}
-⚡ To continue accessing the ${failedModel} model today, consider using /auth to switch to using a paid API key from AI Studio at https://aistudio.google.com/apikey`;
+⚡ To continue accessing the ${failedModel} model, retry your request after some time or consider using /auth to switch to using a paid API key from AI Studio at https://aistudio.google.com/apikey`;
        } else {
          message = `${actionMessage}
-⚡ To increase your limits, upgrade to a Gemini Code Assist Standard or Enterprise plan with higher limits at https://goo.gle/set-up-gemini-code-assist
-⚡ Or you can utilize a Gemini API Key. See: https://goo.gle/gemini-cli-docs-auth#gemini-api-key
+⚡ Retry your requests after some time. Otherwise consider upgrading to a Gemini Code Assist Standard or Enterprise plan with higher limits at https://goo.gle/set-up-gemini-code-assist
 ⚡ You can switch authentication methods by typing /auth`;
        }
      } else {
-        // Consecutive 429s or other errors (Automatic fallback)
+        // Other errors (Automatic fallback)
        const actionMessage = `⚡ Automatically switching from ${failedModel} to ${fallbackModel} for faster responses for the remainder of this session.`;

        if (isPaidTier) {
          message = `${actionMessage}
-⚡ Possible reasons for this are that you have received multiple consecutive capacity errors or you have reached your daily ${failedModel} quota limit
-⚡ To continue accessing the ${failedModel} model today, consider using /auth to switch to using a paid API key from AI Studio at https://aistudio.google.com/apikey`;
+⚡ Your requests are being throttled temporarily due to server being at capacity for ${failedModel} or there is a service outage.
+⚡ To continue accessing the ${failedModel} model, you can retry your request after some time or consider using /auth to switch to using a paid API key from AI Studio at https://aistudio.google.com/apikey`;
        } else {
          message = `${actionMessage}
-⚡ Possible reasons for this are that you have received multiple consecutive capacity errors or you have reached your daily ${failedModel} quota limit
-⚡ To increase your limits, upgrade to a Gemini Code Assist Standard or Enterprise plan with higher limits at https://goo.gle/set-up-gemini-code-assist
+⚡ Your requests are being throttled temporarily due to server being at capacity for ${failedModel} or there is a service outage.
+⚡ To avoid being throttled, you can retry your request after some time or upgrade to a Gemini Code Assist Standard or Enterprise plan with higher limits at https://goo.gle/set-up-gemini-code-assist
 ⚡ Or you can utilize a Gemini API Key. See: https://goo.gle/gemini-cli-docs-auth#gemini-api-key
 ⚡ You can switch authentication methods by typing /auth`;
        }
@@ -119,7 +118,7 @@ export function useQuotaAndFallback({
      config.setQuotaErrorOccurred(true);

      // Interactive Fallback for Pro quota
-      if (error && isProQuotaExceededError(error)) {
+      if (error instanceof TerminalQuotaError) {
        if (isDialogPending.current) {
          return 'stop'; // A dialog is already active, so just stop this request.
        }