Revert "fix: handle request retries and model fallback correctly" (#11164)

2026-05-13 21:32:56 -07:00 · 2025-10-14 16:30:59 -07:00
parent bd5c158a62
commit 996c9f5955
13 changed files with 821 additions and 1013 deletions
@@ -19,14 +19,25 @@ import {
  type FallbackModelHandler,
  UserTierId,
  AuthType,
-  TerminalQuotaError,
+  isGenericQuotaExceededError,
+  isProQuotaExceededError,
  makeFakeConfig,
-  type GoogleApiError,
 } from '@google/gemini-cli-core';
 import { useQuotaAndFallback } from './useQuotaAndFallback.js';
 import type { UseHistoryManagerReturn } from './useHistoryManager.js';
 import { AuthState, MessageType } from '../types.js';

+// Mock the error checking functions from the core package to control test scenarios
+vi.mock('@google/gemini-cli-core', async (importOriginal) => {
+  const original =
+    await importOriginal<typeof import('@google/gemini-cli-core')>();
+  return {
+    ...original,
+    isGenericQuotaExceededError: vi.fn(),
+    isProQuotaExceededError: vi.fn(),
+  };
+});
+
 // Use a type alias for SpyInstance as it's not directly exported
 type SpyInstance = ReturnType<typeof vi.spyOn>;

@@ -36,15 +47,12 @@ describe('useQuotaAndFallback', () => {
  let mockSetAuthState: Mock;
  let mockSetModelSwitchedFromQuotaError: Mock;
  let setFallbackHandlerSpy: SpyInstance;
-  let mockGoogleApiError: GoogleApiError;
+
+  const mockedIsGenericQuotaExceededError = isGenericQuotaExceededError as Mock;
+  const mockedIsProQuotaExceededError = isProQuotaExceededError as Mock;

  beforeEach(() => {
    mockConfig = makeFakeConfig();
-    mockGoogleApiError = {
-      code: 429,
-      message: 'mock error',
-      details: [],
-    };

    // Spy on the method that requires the private field and mock its return.
    // This is cleaner than modifying the config class for tests.
@@ -64,6 +72,9 @@ describe('useQuotaAndFallback', () => {

    setFallbackHandlerSpy = vi.spyOn(mockConfig, 'setFallbackModelHandler');
    vi.spyOn(mockConfig, 'setQuotaErrorOccurred');
+
+    mockedIsGenericQuotaExceededError.mockReturnValue(false);
+    mockedIsProQuotaExceededError.mockReturnValue(false);
  });

  afterEach(() => {
@@ -128,6 +139,22 @@ describe('useQuotaAndFallback', () => {

    describe('Automatic Fallback Scenarios', () => {
      const testCases = [
+        {
+          errorType: 'generic',
+          tier: UserTierId.FREE,
+          expectedMessageSnippets: [
+            'Automatically switching from model-A to model-B',
+            'upgrade to a Gemini Code Assist Standard or Enterprise plan',
+          ],
+        },
+        {
+          errorType: 'generic',
+          tier: UserTierId.STANDARD, // Paid tier
+          expectedMessageSnippets: [
+            'Automatically switching from model-A to model-B',
+            'switch to using a paid API key from AI Studio',
+          ],
+        },
        {
          errorType: 'other',
          tier: UserTierId.FREE,
@@ -148,11 +175,15 @@ describe('useQuotaAndFallback', () => {

      for (const { errorType, tier, expectedMessageSnippets } of testCases) {
        it(`should handle ${errorType} error for ${tier} tier correctly`, async () => {
+          mockedIsGenericQuotaExceededError.mockReturnValue(
+            errorType === 'generic',
+          );
+
          const handler = getRegisteredHandler(tier);
          const result = await handler(
            'model-A',
            'model-B',
-            new Error('some error'),
+            new Error('quota exceeded'),
          );

          // Automatic fallbacks should return 'stop'
@@ -176,6 +207,10 @@ describe('useQuotaAndFallback', () => {
    });

    describe('Interactive Fallback (Pro Quota Error)', () => {
+      beforeEach(() => {
+        mockedIsProQuotaExceededError.mockReturnValue(true);
+      });
+
      it('should set an interactive request and wait for user choice', async () => {
        const { result } = renderHook(() =>
          useQuotaAndFallback({
@@ -194,7 +229,7 @@ describe('useQuotaAndFallback', () => {
        const promise = handler(
          'gemini-pro',
          'gemini-flash',
-          new TerminalQuotaError('pro quota', mockGoogleApiError),
+          new Error('pro quota'),
        );

        await act(async () => {});
@@ -233,7 +268,7 @@ describe('useQuotaAndFallback', () => {
        const promise1 = handler(
          'gemini-pro',
          'gemini-flash',
-          new TerminalQuotaError('pro quota 1', mockGoogleApiError),
+          new Error('pro quota 1'),
        );
        await act(async () => {});

@@ -243,7 +278,7 @@ describe('useQuotaAndFallback', () => {
        const result2 = await handler(
          'gemini-pro',
          'gemini-flash',
-          new TerminalQuotaError('pro quota 2', mockGoogleApiError),
+          new Error('pro quota 2'),
        );

        // The lock should have stopped the second request
@@ -262,6 +297,10 @@ describe('useQuotaAndFallback', () => {
  });

  describe('handleProQuotaChoice', () => {
+    beforeEach(() => {
+      mockedIsProQuotaExceededError.mockReturnValue(true);
+    });
+
    it('should do nothing if there is no pending pro quota request', () => {
      const { result } = renderHook(() =>
        useQuotaAndFallback({
@@ -297,7 +336,7 @@ describe('useQuotaAndFallback', () => {
      const promise = handler(
        'gemini-pro',
        'gemini-flash',
-        new TerminalQuotaError('pro quota', mockGoogleApiError),
+        new Error('pro quota'),
      );
      await act(async () => {}); // Allow state to update

@@ -328,7 +367,7 @@ describe('useQuotaAndFallback', () => {
      const promise = handler(
        'gemini-pro',
        'gemini-flash',
-        new TerminalQuotaError('pro quota', mockGoogleApiError),
+        new Error('pro quota'),
      );
      await act(async () => {}); // Allow state to update

@@ -9,7 +9,8 @@ import {
  type Config,
  type FallbackModelHandler,
  type FallbackIntent,
-  TerminalQuotaError,
+  isGenericQuotaExceededError,
+  isProQuotaExceededError,
  UserTierId,
 } from '@google/gemini-cli-core';
 import { useCallback, useEffect, useRef, useState } from 'react';
@@ -62,7 +63,7 @@ export function useQuotaAndFallback({

      let message: string;

-      if (error instanceof TerminalQuotaError) {
+      if (error && isProQuotaExceededError(error)) {
        // Pro Quota specific messages (Interactive)
        if (isPaidTier) {
          message = `⚡ You have reached your daily ${failedModel} quota limit.
@@ -73,6 +74,19 @@ export function useQuotaAndFallback({
 ⚡ You can choose to authenticate with a paid API key or continue with the fallback model.
 ⚡ To increase your limits, upgrade to a Gemini Code Assist Standard or Enterprise plan with higher limits at https://goo.gle/set-up-gemini-code-assist
 ⚡ Or you can utilize a Gemini API Key. See: https://goo.gle/gemini-cli-docs-auth#gemini-api-key
+⚡ You can switch authentication methods by typing /auth`;
+        }
+      } else if (error && isGenericQuotaExceededError(error)) {
+        // Generic Quota (Automatic fallback)
+        const actionMessage = `⚡ You have reached your daily quota limit.\n⚡ Automatically switching from ${failedModel} to ${fallbackModel} for the remainder of this session.`;
+
+        if (isPaidTier) {
+          message = `${actionMessage}
+⚡ To continue accessing the ${failedModel} model today, consider using /auth to switch to using a paid API key from AI Studio at https://aistudio.google.com/apikey`;
+        } else {
+          message = `${actionMessage}
+⚡ To increase your limits, upgrade to a Gemini Code Assist Standard or Enterprise plan with higher limits at https://goo.gle/set-up-gemini-code-assist
+⚡ Or you can utilize a Gemini API Key. See: https://goo.gle/gemini-cli-docs-auth#gemini-api-key
 ⚡ You can switch authentication methods by typing /auth`;
        }
      } else {
@@ -105,7 +119,7 @@ export function useQuotaAndFallback({
      config.setQuotaErrorOccurred(true);

      // Interactive Fallback for Pro quota
-      if (error instanceof TerminalQuotaError) {
+      if (error && isProQuotaExceededError(error)) {
        if (isDialogPending.current) {
          return 'stop'; // A dialog is already active, so just stop this request.
        }
@@ -44,5 +44,3 @@ export { makeFakeConfig } from './src/test-utils/config.js';
 export * from './src/utils/pathReader.js';
 export { ClearcutLogger } from './src/telemetry/clearcut-logger/clearcut-logger.js';
 export { logModelSlashCommand } from './src/telemetry/loggers.js';
-export * from './src/utils/googleQuotaErrors.js';
-export type { GoogleApiError } from './src/utils/googleErrors.js';
@@ -6,7 +6,9 @@

 import { describe, it, expect } from 'vitest';
 import { parseAndFormatApiError } from './errorParsing.js';
+import { isProQuotaExceededError } from './quotaErrorDetection.js';
 import { DEFAULT_GEMINI_FLASH_MODEL } from '../config/models.js';
+import { UserTierId } from '../code_assist/types.js';
 import { AuthType } from '../core/contentGenerator.js';
 import type { StructuredError } from '../core/turn.js';

@@ -38,6 +40,22 @@ describe('parseAndFormatApiError', () => {
    );
  });

+  it('should format a 429 API error with the personal message', () => {
+    const errorMessage =
+      'got status: 429 Too Many Requests. {"error":{"code":429,"message":"Rate limit exceeded","status":"RESOURCE_EXHAUSTED"}}';
+    const result = parseAndFormatApiError(
+      errorMessage,
+      AuthType.LOGIN_WITH_GOOGLE,
+      undefined,
+      'gemini-2.5-pro',
+      DEFAULT_GEMINI_FLASH_MODEL,
+    );
+    expect(result).toContain('[API Error: Rate limit exceeded');
+    expect(result).toContain(
+      'Possible quota limitations in place or slow response times detected. Switching to the gemini-2.5-flash model',
+    );
+  });
+
  it('should format a 429 API error with the vertex message', () => {
    const errorMessage =
      'got status: 429 Too Many Requests. {"error":{"code":429,"message":"Rate limit exceeded","status":"RESOURCE_EXHAUSTED"}}';
@@ -114,4 +132,230 @@ describe('parseAndFormatApiError', () => {
    const expected = '[API Error: An unknown error occurred.]';
    expect(parseAndFormatApiError(error)).toBe(expected);
  });
+
+  it('should format a 429 API error with Pro quota exceeded message for Google auth (Free tier)', () => {
+    const errorMessage =
+      'got status: 429 Too Many Requests. {"error":{"code":429,"message":"Quota exceeded for quota metric \'Gemini 2.5 Pro Requests\' and limit \'RequestsPerDay\' of service \'generativelanguage.googleapis.com\' for consumer \'project_number:123456789\'.","status":"RESOURCE_EXHAUSTED"}}';
+    const result = parseAndFormatApiError(
+      errorMessage,
+      AuthType.LOGIN_WITH_GOOGLE,
+      undefined,
+      'gemini-2.5-pro',
+      DEFAULT_GEMINI_FLASH_MODEL,
+    );
+    expect(result).toContain(
+      "[API Error: Quota exceeded for quota metric 'Gemini 2.5 Pro Requests'",
+    );
+    expect(result).toContain(
+      'You have reached your daily gemini-2.5-pro quota limit',
+    );
+    expect(result).toContain('upgrade to get higher limits');
+  });
+
+  it('should format a regular 429 API error with standard message for Google auth', () => {
+    const errorMessage =
+      'got status: 429 Too Many Requests. {"error":{"code":429,"message":"Rate limit exceeded","status":"RESOURCE_EXHAUSTED"}}';
+    const result = parseAndFormatApiError(
+      errorMessage,
+      AuthType.LOGIN_WITH_GOOGLE,
+      undefined,
+      'gemini-2.5-pro',
+      DEFAULT_GEMINI_FLASH_MODEL,
+    );
+    expect(result).toContain('[API Error: Rate limit exceeded');
+    expect(result).toContain(
+      'Possible quota limitations in place or slow response times detected. Switching to the gemini-2.5-flash model',
+    );
+    expect(result).not.toContain(
+      'You have reached your daily gemini-2.5-pro quota limit',
+    );
+  });
+
+  it('should format a 429 API error with generic quota exceeded message for Google auth', () => {
+    const errorMessage =
+      'got status: 429 Too Many Requests. {"error":{"code":429,"message":"Quota exceeded for quota metric \'GenerationRequests\' and limit \'RequestsPerDay\' of service \'generativelanguage.googleapis.com\' for consumer \'project_number:123456789\'.","status":"RESOURCE_EXHAUSTED"}}';
+    const result = parseAndFormatApiError(
+      errorMessage,
+      AuthType.LOGIN_WITH_GOOGLE,
+      undefined,
+      'gemini-2.5-pro',
+      DEFAULT_GEMINI_FLASH_MODEL,
+    );
+    expect(result).toContain(
+      "[API Error: Quota exceeded for quota metric 'GenerationRequests'",
+    );
+    expect(result).toContain('You have reached your daily quota limit');
+    expect(result).not.toContain(
+      'You have reached your daily Gemini 2.5 Pro quota limit',
+    );
+  });
+
+  it('should prioritize Pro quota message over generic quota message for Google auth', () => {
+    const errorMessage =
+      'got status: 429 Too Many Requests. {"error":{"code":429,"message":"Quota exceeded for quota metric \'Gemini 2.5 Pro Requests\' and limit \'RequestsPerDay\' of service \'generativelanguage.googleapis.com\' for consumer \'project_number:123456789\'.","status":"RESOURCE_EXHAUSTED"}}';
+    const result = parseAndFormatApiError(
+      errorMessage,
+      AuthType.LOGIN_WITH_GOOGLE,
+      undefined,
+      'gemini-2.5-pro',
+      DEFAULT_GEMINI_FLASH_MODEL,
+    );
+    expect(result).toContain(
+      "[API Error: Quota exceeded for quota metric 'Gemini 2.5 Pro Requests'",
+    );
+    expect(result).toContain(
+      'You have reached your daily gemini-2.5-pro quota limit',
+    );
+    expect(result).not.toContain('You have reached your daily quota limit');
+  });
+
+  it('should format a 429 API error with Pro quota exceeded message for Google auth (Standard tier)', () => {
+    const errorMessage =
+      'got status: 429 Too Many Requests. {"error":{"code":429,"message":"Quota exceeded for quota metric \'Gemini 2.5 Pro Requests\' and limit \'RequestsPerDay\' of service \'generativelanguage.googleapis.com\' for consumer \'project_number:123456789\'.","status":"RESOURCE_EXHAUSTED"}}';
+    const result = parseAndFormatApiError(
+      errorMessage,
+      AuthType.LOGIN_WITH_GOOGLE,
+      UserTierId.STANDARD,
+      'gemini-2.5-pro',
+      DEFAULT_GEMINI_FLASH_MODEL,
+    );
+    expect(result).toContain(
+      "[API Error: Quota exceeded for quota metric 'Gemini 2.5 Pro Requests'",
+    );
+    expect(result).toContain(
+      'You have reached your daily gemini-2.5-pro quota limit',
+    );
+    expect(result).toContain(
+      'We appreciate you for choosing Gemini Code Assist and the Gemini CLI',
+    );
+    expect(result).not.toContain('upgrade to get higher limits');
+  });
+
+  it('should format a 429 API error with Pro quota exceeded message for Google auth (Legacy tier)', () => {
+    const errorMessage =
+      'got status: 429 Too Many Requests. {"error":{"code":429,"message":"Quota exceeded for quota metric \'Gemini 2.5 Pro Requests\' and limit \'RequestsPerDay\' of service \'generativelanguage.googleapis.com\' for consumer \'project_number:123456789\'.","status":"RESOURCE_EXHAUSTED"}}';
+    const result = parseAndFormatApiError(
+      errorMessage,
+      AuthType.LOGIN_WITH_GOOGLE,
+      UserTierId.LEGACY,
+      'gemini-2.5-pro',
+      DEFAULT_GEMINI_FLASH_MODEL,
+    );
+    expect(result).toContain(
+      "[API Error: Quota exceeded for quota metric 'Gemini 2.5 Pro Requests'",
+    );
+    expect(result).toContain(
+      'You have reached your daily gemini-2.5-pro quota limit',
+    );
+    expect(result).toContain(
+      'We appreciate you for choosing Gemini Code Assist and the Gemini CLI',
+    );
+    expect(result).not.toContain('upgrade to get higher limits');
+  });
+
+  it('should handle different Gemini 2.5 version strings in Pro quota exceeded errors', () => {
+    const errorMessage25 =
+      'got status: 429 Too Many Requests. {"error":{"code":429,"message":"Quota exceeded for quota metric \'Gemini 2.5 Pro Requests\' and limit \'RequestsPerDay\' of service \'generativelanguage.googleapis.com\' for consumer \'project_number:123456789\'.","status":"RESOURCE_EXHAUSTED"}}';
+    const errorMessagePreview =
+      'got status: 429 Too Many Requests. {"error":{"code":429,"message":"Quota exceeded for quota metric \'Gemini 2.5-preview Pro Requests\' and limit \'RequestsPerDay\' of service \'generativelanguage.googleapis.com\' for consumer \'project_number:123456789\'.","status":"RESOURCE_EXHAUSTED"}}';
+
+    const result25 = parseAndFormatApiError(
+      errorMessage25,
+      AuthType.LOGIN_WITH_GOOGLE,
+      undefined,
+      'gemini-2.5-pro',
+      DEFAULT_GEMINI_FLASH_MODEL,
+    );
+    const resultPreview = parseAndFormatApiError(
+      errorMessagePreview,
+      AuthType.LOGIN_WITH_GOOGLE,
+      undefined,
+      'gemini-2.5-preview-pro',
+      DEFAULT_GEMINI_FLASH_MODEL,
+    );
+
+    expect(result25).toContain(
+      'You have reached your daily gemini-2.5-pro quota limit',
+    );
+    expect(resultPreview).toContain(
+      'You have reached your daily gemini-2.5-preview-pro quota limit',
+    );
+    expect(result25).toContain('upgrade to get higher limits');
+    expect(resultPreview).toContain('upgrade to get higher limits');
+  });
+
+  it('should not match non-Pro models with similar version strings', () => {
+    // Test that Flash models with similar version strings don't match
+    expect(
+      isProQuotaExceededError(
+        "Quota exceeded for quota metric 'Gemini 2.5 Flash Requests' and limit",
+      ),
+    ).toBe(false);
+    expect(
+      isProQuotaExceededError(
+        "Quota exceeded for quota metric 'Gemini 2.5-preview Flash Requests' and limit",
+      ),
+    ).toBe(false);
+
+    // Test other model types
+    expect(
+      isProQuotaExceededError(
+        "Quota exceeded for quota metric 'Gemini 2.5 Ultra Requests' and limit",
+      ),
+    ).toBe(false);
+    expect(
+      isProQuotaExceededError(
+        "Quota exceeded for quota metric 'Gemini 2.5 Standard Requests' and limit",
+      ),
+    ).toBe(false);
+
+    // Test generic quota messages
+    expect(
+      isProQuotaExceededError(
+        "Quota exceeded for quota metric 'GenerationRequests' and limit",
+      ),
+    ).toBe(false);
+    expect(
+      isProQuotaExceededError(
+        "Quota exceeded for quota metric 'EmbeddingRequests' and limit",
+      ),
+    ).toBe(false);
+  });
+
+  it('should format a generic quota exceeded message for Google auth (Standard tier)', () => {
+    const errorMessage =
+      'got status: 429 Too Many Requests. {"error":{"code":429,"message":"Quota exceeded for quota metric \'GenerationRequests\' and limit \'RequestsPerDay\' of service \'generativelanguage.googleapis.com\' for consumer \'project_number:123456789\'.","status":"RESOURCE_EXHAUSTED"}}';
+    const result = parseAndFormatApiError(
+      errorMessage,
+      AuthType.LOGIN_WITH_GOOGLE,
+      UserTierId.STANDARD,
+      'gemini-2.5-pro',
+      DEFAULT_GEMINI_FLASH_MODEL,
+    );
+    expect(result).toContain(
+      "[API Error: Quota exceeded for quota metric 'GenerationRequests'",
+    );
+    expect(result).toContain('You have reached your daily quota limit');
+    expect(result).toContain(
+      'We appreciate you for choosing Gemini Code Assist and the Gemini CLI',
+    );
+    expect(result).not.toContain('upgrade to get higher limits');
+  });
+
+  it('should format a regular 429 API error with standard message for Google auth (Standard tier)', () => {
+    const errorMessage =
+      'got status: 429 Too Many Requests. {"error":{"code":429,"message":"Rate limit exceeded","status":"RESOURCE_EXHAUSTED"}}';
+    const result = parseAndFormatApiError(
+      errorMessage,
+      AuthType.LOGIN_WITH_GOOGLE,
+      UserTierId.STANDARD,
+      'gemini-2.5-pro',
+      DEFAULT_GEMINI_FLASH_MODEL,
+    );
+    expect(result).toContain('[API Error: Rate limit exceeded');
+    expect(result).toContain(
+      'We appreciate you for choosing Gemini Code Assist and the Gemini CLI',
+    );
+    expect(result).not.toContain('upgrade to get higher limits');
+  });
 });
@@ -4,11 +4,50 @@
 * SPDX-License-Identifier: Apache-2.0
 */

-import { isApiError, isStructuredError } from './quotaErrorDetection.js';
-import { DEFAULT_GEMINI_FLASH_MODEL } from '../config/models.js';
-import type { UserTierId } from '../code_assist/types.js';
+import {
+  isProQuotaExceededError,
+  isGenericQuotaExceededError,
+  isApiError,
+  isStructuredError,
+} from './quotaErrorDetection.js';
+import {
+  DEFAULT_GEMINI_MODEL,
+  DEFAULT_GEMINI_FLASH_MODEL,
+} from '../config/models.js';
+import { UserTierId } from '../code_assist/types.js';
 import { AuthType } from '../core/contentGenerator.js';

+// Free Tier message functions
+const getRateLimitErrorMessageGoogleFree = (
+  fallbackModel: string = DEFAULT_GEMINI_FLASH_MODEL,
+) =>
+  `\nPossible quota limitations in place or slow response times detected. Switching to the ${fallbackModel} model for the rest of this session.`;
+
+const getRateLimitErrorMessageGoogleProQuotaFree = (
+  currentModel: string = DEFAULT_GEMINI_MODEL,
+  fallbackModel: string = DEFAULT_GEMINI_FLASH_MODEL,
+) =>
+  `\nYou have reached your daily ${currentModel} quota limit. You will be switched to the ${fallbackModel} model for the rest of this session. To increase your limits, upgrade to get higher limits at https://goo.gle/set-up-gemini-code-assist, or use /auth to switch to using a paid API key from AI Studio at https://aistudio.google.com/apikey`;
+
+const getRateLimitErrorMessageGoogleGenericQuotaFree = () =>
+  `\nYou have reached your daily quota limit. To increase your limits, upgrade to get higher limits at https://goo.gle/set-up-gemini-code-assist, or use /auth to switch to using a paid API key from AI Studio at https://aistudio.google.com/apikey`;
+
+// Legacy/Standard Tier message functions
+const getRateLimitErrorMessageGooglePaid = (
+  fallbackModel: string = DEFAULT_GEMINI_FLASH_MODEL,
+) =>
+  `\nPossible quota limitations in place or slow response times detected. Switching to the ${fallbackModel} model for the rest of this session. We appreciate you for choosing Gemini Code Assist and the Gemini CLI.`;
+
+const getRateLimitErrorMessageGoogleProQuotaPaid = (
+  currentModel: string = DEFAULT_GEMINI_MODEL,
+  fallbackModel: string = DEFAULT_GEMINI_FLASH_MODEL,
+) =>
+  `\nYou have reached your daily ${currentModel} quota limit. You will be switched to the ${fallbackModel} model for the rest of this session. We appreciate you for choosing Gemini Code Assist and the Gemini CLI. To continue accessing the ${currentModel} model today, consider using /auth to switch to using a paid API key from AI Studio at https://aistudio.google.com/apikey`;
+
+const getRateLimitErrorMessageGoogleGenericQuotaPaid = (
+  currentModel: string = DEFAULT_GEMINI_MODEL,
+) =>
+  `\nYou have reached your daily quota limit. We appreciate you for choosing Gemini Code Assist and the Gemini CLI. To continue accessing the ${currentModel} model today, consider using /auth to switch to using a paid API key from AI Studio at https://aistudio.google.com/apikey`;
 const RATE_LIMIT_ERROR_MESSAGE_USE_GEMINI =
  '\nPlease wait and try again later. To increase your limits, request a quota increase through AI Studio, or switch to another /auth method';
 const RATE_LIMIT_ERROR_MESSAGE_VERTEX =
@@ -20,9 +59,39 @@ const getRateLimitErrorMessageDefault = (

 function getRateLimitMessage(
  authType?: AuthType,
+  error?: unknown,
+  userTier?: UserTierId,
+  currentModel?: string,
  fallbackModel?: string,
 ): string {
  switch (authType) {
+    case AuthType.LOGIN_WITH_GOOGLE: {
+      // Determine if user is on a paid tier (Legacy or Standard) - default to FREE if not specified
+      const isPaidTier =
+        userTier === UserTierId.LEGACY || userTier === UserTierId.STANDARD;
+
+      if (isProQuotaExceededError(error)) {
+        return isPaidTier
+          ? getRateLimitErrorMessageGoogleProQuotaPaid(
+              currentModel || DEFAULT_GEMINI_MODEL,
+              fallbackModel,
+            )
+          : getRateLimitErrorMessageGoogleProQuotaFree(
+              currentModel || DEFAULT_GEMINI_MODEL,
+              fallbackModel,
+            );
+      } else if (isGenericQuotaExceededError(error)) {
+        return isPaidTier
+          ? getRateLimitErrorMessageGoogleGenericQuotaPaid(
+              currentModel || DEFAULT_GEMINI_MODEL,
+            )
+          : getRateLimitErrorMessageGoogleGenericQuotaFree();
+      } else {
+        return isPaidTier
+          ? getRateLimitErrorMessageGooglePaid(fallbackModel)
+          : getRateLimitErrorMessageGoogleFree(fallbackModel);
+      }
+    }
    case AuthType.USE_GEMINI:
      return RATE_LIMIT_ERROR_MESSAGE_USE_GEMINI;
    case AuthType.USE_VERTEX_AI:
@@ -42,7 +111,13 @@ export function parseAndFormatApiError(
  if (isStructuredError(error)) {
    let text = `[API Error: ${error.message}]`;
    if (error.status === 429) {
-      text += getRateLimitMessage(authType, fallbackModel);
+      text += getRateLimitMessage(
+        authType,
+        error,
+        userTier,
+        currentModel,
+        fallbackModel,
+      );
    }
    return text;
  }
@@ -71,7 +146,13 @@ export function parseAndFormatApiError(
        }
        let text = `[API Error: ${finalMessage} (Status: ${parsedError.error.status})]`;
        if (parsedError.error.code === 429) {
-          text += getRateLimitMessage(authType, fallbackModel);
+          text += getRateLimitMessage(
+            authType,
+            parsedError,
+            userTier,
+            currentModel,
+            fallbackModel,
+          );
        }
        return text;
      }
@@ -11,6 +11,7 @@ import {
  setSimulate429,
  disableSimulationAfterFallback,
  shouldSimulate429,
+  createSimulated429Error,
  resetRequestCounter,
 } from './testUtils.js';
 import { DEFAULT_GEMINI_FLASH_MODEL } from '../config/models.js';
@@ -18,15 +19,12 @@ import { retryWithBackoff } from './retry.js';
 import { AuthType } from '../core/contentGenerator.js';
 // Import the new types (Assuming this test file is in packages/core/src/utils/)
 import type { FallbackModelHandler } from '../fallback/types.js';
-import type { GoogleApiError } from './googleErrors.js';
-import { TerminalQuotaError } from './googleQuotaErrors.js';

 vi.mock('node:fs');

 // Update the description to reflect that this tests the retry utility's integration
 describe('Retry Utility Fallback Integration', () => {
  let config: Config;
-  let mockGoogleApiError: GoogleApiError;

  beforeEach(() => {
    vi.mocked(fs.existsSync).mockReturnValue(true);
@@ -40,11 +38,6 @@ describe('Retry Utility Fallback Integration', () => {
      cwd: '/test',
      model: 'gemini-2.5-pro',
    });
-    mockGoogleApiError = {
-      code: 429,
-      message: 'mock error',
-      details: [],
-    };

    // Reset simulation state for each test
    setSimulate429(false);
@@ -63,7 +56,6 @@ describe('Retry Utility Fallback Integration', () => {
    const result = await config.fallbackModelHandler!(
      'gemini-2.5-pro',
      DEFAULT_GEMINI_FLASH_MODEL,
-      new Error('test'),
    );

    // Verify it returns the correct intent
@@ -71,61 +63,81 @@ describe('Retry Utility Fallback Integration', () => {
  });

  // This test validates the retry utility's logic for triggering the callback.
-  it('should trigger onPersistent429 on TerminalQuotaError for OAuth users', async () => {
+  it('should trigger onPersistent429 after 2 consecutive 429 errors for OAuth users', async () => {
    let fallbackCalled = false;
+    // Removed fallbackModel variable as it's no longer relevant here.

+    // Mock function that simulates exactly 2 429 errors, then succeeds after fallback
    const mockApiCall = vi
      .fn()
-      .mockRejectedValueOnce(
-        new TerminalQuotaError('Daily limit', mockGoogleApiError),
-      )
-      .mockRejectedValueOnce(
-        new TerminalQuotaError('Daily limit', mockGoogleApiError),
-      )
+      .mockRejectedValueOnce(createSimulated429Error())
+      .mockRejectedValueOnce(createSimulated429Error())
      .mockResolvedValueOnce('success after fallback');

+    // Mock the onPersistent429 callback (this is what client.ts/geminiChat.ts provides)
    const mockPersistent429Callback = vi.fn(async (_authType?: string) => {
      fallbackCalled = true;
+      // Return true to signal retryWithBackoff to reset attempts and continue.
      return true;
    });

+    // Test with OAuth personal auth type, with maxAttempts = 2 to ensure fallback triggers
    const result = await retryWithBackoff(mockApiCall, {
      maxAttempts: 2,
      initialDelayMs: 1,
      maxDelayMs: 10,
+      shouldRetryOnError: (error: Error) => {
+        const status = (error as Error & { status?: number }).status;
+        return status === 429;
+      },
      onPersistent429: mockPersistent429Callback,
      authType: AuthType.LOGIN_WITH_GOOGLE,
    });

+    // Verify fallback mechanism was triggered
    expect(fallbackCalled).toBe(true);
    expect(mockPersistent429Callback).toHaveBeenCalledWith(
      AuthType.LOGIN_WITH_GOOGLE,
-      expect.any(TerminalQuotaError),
+      expect.any(Error),
    );
    expect(result).toBe('success after fallback');
+    // Should have: 2 failures, then fallback triggered, then 1 success after retry reset
    expect(mockApiCall).toHaveBeenCalledTimes(3);
  });

  it('should not trigger onPersistent429 for API key users', async () => {
-    const fallbackCallback = vi.fn();
+    let fallbackCalled = false;

-    const mockApiCall = vi
-      .fn()
-      .mockRejectedValueOnce(
-        new TerminalQuotaError('Daily limit', mockGoogleApiError),
-      );
+    // Mock function that simulates 429 errors
+    const mockApiCall = vi.fn().mockRejectedValue(createSimulated429Error());

-    const promise = retryWithBackoff(mockApiCall, {
-      maxAttempts: 2,
-      initialDelayMs: 1,
-      maxDelayMs: 10,
-      onPersistent429: fallbackCallback,
-      authType: AuthType.USE_GEMINI, // API key auth type
+    // Mock the callback
+    const mockPersistent429Callback = vi.fn(async () => {
+      fallbackCalled = true;
+      return true;
    });

-    await expect(promise).rejects.toThrow('Daily limit');
-    expect(fallbackCallback).not.toHaveBeenCalled();
-    expect(mockApiCall).toHaveBeenCalledTimes(1);
+    // Test with API key auth type - should not trigger fallback
+    try {
+      await retryWithBackoff(mockApiCall, {
+        maxAttempts: 5,
+        initialDelayMs: 10,
+        maxDelayMs: 100,
+        shouldRetryOnError: (error: Error) => {
+          const status = (error as Error & { status?: number }).status;
+          return status === 429;
+        },
+        onPersistent429: mockPersistent429Callback,
+        authType: AuthType.USE_GEMINI, // API key auth type
+      });
+    } catch (error) {
+      // Expected to throw after max attempts
+      expect((error as Error).message).toContain('Rate limit exceeded');
+    }
+
+    // Verify fallback was NOT triggered for API key users
+    expect(fallbackCalled).toBe(false);
+    expect(mockPersistent429Callback).not.toHaveBeenCalled();
  });

  // This test validates the test utilities themselves.
@@ -1,250 +0,0 @@
-/**
- * @license
- * Copyright 2025 Google LLC
- * SPDX-License-Identifier: Apache-2.0
- */
-
-import { describe, it, expect } from 'vitest';
-import { parseGoogleApiError } from './googleErrors.js';
-import type { QuotaFailure } from './googleErrors.js';
-
-describe('parseGoogleApiError', () => {
-  it('should return null for non-gaxios errors', () => {
-    expect(parseGoogleApiError(new Error('vanilla error'))).toBeNull();
-    expect(parseGoogleApiError(null)).toBeNull();
-    expect(parseGoogleApiError({})).toBeNull();
-  });
-
-  it('should parse a standard gaxios error', () => {
-    const mockError = {
-      response: {
-        status: 429,
-        data: {
-          error: {
-            code: 429,
-            message: 'Quota exceeded',
-            details: [
-              {
-                '@type': 'type.googleapis.com/google.rpc.QuotaFailure',
-                violations: [{ subject: 'user', description: 'daily limit' }],
-              },
-            ],
-          },
-        },
-      },
-    };
-
-    const parsed = parseGoogleApiError(mockError);
-    expect(parsed).not.toBeNull();
-    expect(parsed?.code).toBe(429);
-    expect(parsed?.message).toBe('Quota exceeded');
-    expect(parsed?.details).toHaveLength(1);
-    const detail = parsed?.details[0] as QuotaFailure;
-    expect(detail['@type']).toBe('type.googleapis.com/google.rpc.QuotaFailure');
-    expect(detail.violations[0].description).toBe('daily limit');
-  });
-
-  it('should parse an error with details stringified in the message', () => {
-    const innerError = {
-      error: {
-        code: 429,
-        message: 'Inner quota message',
-        details: [
-          {
-            '@type': 'type.googleapis.com/google.rpc.RetryInfo',
-            retryDelay: '10s',
-          },
-        ],
-      },
-    };
-
-    const mockError = {
-      response: {
-        status: 429,
-        data: {
-          error: {
-            code: 429,
-            message: JSON.stringify(innerError),
-            details: [], // Top-level details are empty
-          },
-        },
-      },
-    };
-
-    const parsed = parseGoogleApiError(mockError);
-    expect(parsed).not.toBeNull();
-    expect(parsed?.code).toBe(429);
-    expect(parsed?.message).toBe('Inner quota message');
-    expect(parsed?.details).toHaveLength(1);
-    expect(parsed?.details[0]['@type']).toBe(
-      'type.googleapis.com/google.rpc.RetryInfo',
-    );
-  });
-
-  it('should return null if details are not in the expected format', () => {
-    const mockError = {
-      response: {
-        status: 400,
-        data: {
-          error: {
-            code: 400,
-            message: 'Bad Request',
-            details: 'just a string', // Invalid details format
-          },
-        },
-      },
-    };
-    expect(parseGoogleApiError(mockError)).toBeNull();
-  });
-
-  it('should return null if there are no valid details', () => {
-    const mockError = {
-      response: {
-        status: 400,
-        data: {
-          error: {
-            code: 400,
-            message: 'Bad Request',
-            details: [
-              {
-                // missing '@type'
-                reason: 'some reason',
-              },
-            ],
-          },
-        },
-      },
-    };
-    expect(parseGoogleApiError(mockError)).toBeNull();
-  });
-
-  it('should parse a doubly nested error in the message', () => {
-    const innerError = {
-      error: {
-        code: 429,
-        message: 'Innermost quota message',
-        details: [
-          {
-            '@type': 'type.googleapis.com/google.rpc.RetryInfo',
-            retryDelay: '20s',
-          },
-        ],
-      },
-    };
-
-    const middleError = {
-      error: {
-        code: 429,
-        message: JSON.stringify(innerError),
-        details: [],
-      },
-    };
-
-    const mockError = {
-      response: {
-        status: 429,
-        data: {
-          error: {
-            code: 429,
-            message: JSON.stringify(middleError),
-            details: [],
-          },
-        },
-      },
-    };
-
-    const parsed = parseGoogleApiError(mockError);
-    expect(parsed).not.toBeNull();
-    expect(parsed?.code).toBe(429);
-    expect(parsed?.message).toBe('Innermost quota message');
-    expect(parsed?.details).toHaveLength(1);
-    expect(parsed?.details[0]['@type']).toBe(
-      'type.googleapis.com/google.rpc.RetryInfo',
-    );
-  });
-
-  it('should parse an error that is not in a response object', () => {
-    const innerError = {
-      error: {
-        code: 429,
-        message: 'Innermost quota message',
-        details: [
-          {
-            '@type': 'type.googleapis.com/google.rpc.RetryInfo',
-            retryDelay: '20s',
-          },
-        ],
-      },
-    };
-
-    const mockError = {
-      error: {
-        code: 429,
-        message: JSON.stringify(innerError),
-        details: [],
-      },
-    };
-
-    const parsed = parseGoogleApiError(mockError);
-    expect(parsed).not.toBeNull();
-    expect(parsed?.code).toBe(429);
-    expect(parsed?.message).toBe('Innermost quota message');
-    expect(parsed?.details).toHaveLength(1);
-    expect(parsed?.details[0]['@type']).toBe(
-      'type.googleapis.com/google.rpc.RetryInfo',
-    );
-  });
-
-  it('should parse an error that is a JSON string', () => {
-    const innerError = {
-      error: {
-        code: 429,
-        message: 'Innermost quota message',
-        details: [
-          {
-            '@type': 'type.googleapis.com/google.rpc.RetryInfo',
-            retryDelay: '20s',
-          },
-        ],
-      },
-    };
-
-    const mockError = {
-      error: {
-        code: 429,
-        message: JSON.stringify(innerError),
-        details: [],
-      },
-    };
-
-    const parsed = parseGoogleApiError(JSON.stringify(mockError));
-    expect(parsed).not.toBeNull();
-    expect(parsed?.code).toBe(429);
-    expect(parsed?.message).toBe('Innermost quota message');
-    expect(parsed?.details).toHaveLength(1);
-    expect(parsed?.details[0]['@type']).toBe(
-      'type.googleapis.com/google.rpc.RetryInfo',
-    );
-  });
-
-  it('should parse the user-provided nested error string', () => {
-    const userErrorString =
-      '{"error":{"message":"{\\n  \\"error\\": {\\n    \\"code\\": 429,\\n    \\"message\\": \\"You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits.\\\\n* Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_paid_tier_input_token_count, limit: 10000\\\\nPlease retry in 40.025771073s.\\",\\n    \\"status\\": \\"RESOURCE_EXHAUSTED\\",\\n    \\"details\\": [\\n      {\\n        \\"@type\\": \\"type.googleapis.com/google.rpc.DebugInfo\\",\\n        \\"detail\\": \\"[ORIGINAL ERROR] generic::resource_exhausted: You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits.\\\\n* Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_paid_tier_input_token_count, limit: 10000\\\\nPlease retry in 40.025771073s. [google.rpc.error_details_ext] { message: \\\\\\"You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits.\\\\\\\\n* Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_paid_tier_input_token_count, limit: 10000\\\\\\\\nPlease retry in 40.025771073s.\\\\\\" }\\"\\n      },\\n      {\\n        \\"@type\\": \\"type.googleapis.com/google.rpc.QuotaFailure\\",\\n        \\"violations\\": [\\n          {\\n            \\"quotaMetric\\": \\"generativelanguage.googleapis.com/generate_content_paid_tier_input_token_count\\",\\n            \\"quotaId\\": \\"GenerateContentPaidTierInputTokensPerModelPerMinute\\",\\n            \\"quotaDimensions\\": {\\n              \\"location\\": \\"global\\",\\n              \\"model\\": \\"gemini-2.5-pro\\"\\n            },\\n            \\"quotaValue\\": \\"10000\\"\\n          }\\n        ]\\n      },\\n      {\\n        \\"@type\\": \\"type.googleapis.com/google.rpc.Help\\",\\n        \\"links\\": [\\n          {\\n            \\"description\\": \\"Learn more about Gemini API quotas\\",\\n            \\"url\\": \\"https://ai.google.dev/gemini-api/docs/rate-limits\\"\\n          }\\n        ]\\n      },\\n      {\\n        \\"@type\\": \\"type.googleapis.com/google.rpc.RetryInfo\\",\\n        \\"retryDelay\\": \\"40s\\"\\n      }\\n    ]\\n  }\\n}\\n","code":429,"status":"Too Many Requests"}}';
-
-    const parsed = parseGoogleApiError(userErrorString);
-    expect(parsed).not.toBeNull();
-    expect(parsed?.code).toBe(429);
-    expect(parsed?.message).toContain('You exceeded your current quota');
-    expect(parsed?.details).toHaveLength(4);
-    expect(
-      parsed?.details.some(
-        (d) => d['@type'] === 'type.googleapis.com/google.rpc.QuotaFailure',
-      ),
-    ).toBe(true);
-    expect(
-      parsed?.details.some(
-        (d) => d['@type'] === 'type.googleapis.com/google.rpc.RetryInfo',
-      ),
-    ).toBe(true);
-  });
-});
@@ -1,242 +0,0 @@
-/**
- * @license
- * Copyright 2025 Google LLC
- * SPDX-License-Identifier: Apache-2.0
- */
-
-/**
- * @fileoverview
- * This file contains types and functions for parsing structured Google API errors.
- */
-
-/**
- * Based on google/rpc/error_details.proto
- */
-
-export interface ErrorInfo {
-  '@type': 'type.googleapis.com/google.rpc.ErrorInfo';
-  reason: string;
-  domain: string;
-  metadata: { [key: string]: string };
-}
-
-export interface RetryInfo {
-  '@type': 'type.googleapis.com/google.rpc.RetryInfo';
-  retryDelay: string; // e.g. "51820.638305887s"
-}
-
-export interface DebugInfo {
-  '@type': 'type.googleapis.com/google.rpc.DebugInfo';
-  stackEntries: string[];
-  detail: string;
-}
-
-export interface QuotaFailure {
-  '@type': 'type.googleapis.com/google.rpc.QuotaFailure';
-  violations: Array<{
-    subject: string;
-    description: string;
-    apiService?: string;
-    quotaMetric?: string;
-    quotaId?: string;
-    quotaDimensions?: { [key: string]: string };
-    quotaValue?: number;
-    futureQuotaValue?: number;
-  }>;
-}
-
-export interface PreconditionFailure {
-  '@type': 'type.googleapis.com/google.rpc.PreconditionFailure';
-  violations: Array<{
-    type: string;
-    subject: string;
-    description: string;
-  }>;
-}
-
-export interface LocalizedMessage {
-  '@type': 'type.googleapis.com/google.rpc.LocalizedMessage';
-  locale: string;
-  message: string;
-}
-
-export interface BadRequest {
-  '@type': 'type.googleapis.com/google.rpc.BadRequest';
-  fieldViolations: Array<{
-    field: string;
-    description: string;
-    reason?: string;
-    localizedMessage?: LocalizedMessage;
-  }>;
-}
-
-export interface RequestInfo {
-  '@type': 'type.googleapis.com/google.rpc.RequestInfo';
-  requestId: string;
-  servingData: string;
-}
-
-export interface ResourceInfo {
-  '@type': 'type.googleapis.com/google.rpc.ResourceInfo';
-  resourceType: string;
-  resourceName: string;
-  owner: string;
-  description: string;
-}
-
-export interface Help {
-  '@type': 'type.googleapis.com/google.rpc.Help';
-  links: Array<{
-    description: string;
-    url: string;
-  }>;
-}
-
-export type GoogleApiErrorDetail =
-  | ErrorInfo
-  | RetryInfo
-  | DebugInfo
-  | QuotaFailure
-  | PreconditionFailure
-  | BadRequest
-  | RequestInfo
-  | ResourceInfo
-  | Help
-  | LocalizedMessage;
-
-export interface GoogleApiError {
-  code: number;
-  message: string;
-  details: GoogleApiErrorDetail[];
-}
-
-/**
- * Parses an error object to check if it's a structured Google API error
- * and extracts all details.
- *
- * This function can handle two formats:
- * 1. Standard Google API errors where `details` is a top-level field.
- * 2. Errors where the entire structured error object is stringified inside
- *    the `message` field of a wrapper error.
- *
- * @param error The error object to inspect.
- * @returns A GoogleApiError object if the error matches, otherwise null.
- */
-export function parseGoogleApiError(error: unknown): GoogleApiError | null {
-  if (!error) {
-    return null;
-  }
-
-  let errorObj: unknown = error;
-
-  // If error is a string, try to parse it.
-  if (typeof errorObj === 'string') {
-    try {
-      errorObj = JSON.parse(errorObj);
-    } catch (_) {
-      // Not a JSON string, can't parse.
-      return null;
-    }
-  }
-
-  if (typeof errorObj !== 'object' || errorObj === null) {
-    return null;
-  }
-
-  type ErrorShape = {
-    message?: string;
-    details?: unknown[];
-    code?: number;
-  };
-
-  const gaxiosError = errorObj as {
-    response?: {
-      status?: number;
-      data?:
-        | {
-            error?: ErrorShape;
-          }
-        | string;
-    };
-    error?: ErrorShape;
-    code?: number;
-  };
-
-  let outerError: ErrorShape | undefined;
-  if (gaxiosError.response?.data) {
-    if (typeof gaxiosError.response.data === 'string') {
-      try {
-        const parsedData = JSON.parse(gaxiosError.response.data);
-        if (parsedData.error) {
-          outerError = parsedData.error;
-        }
-      } catch (_) {
-        // Not a JSON string, or doesn't contain .error
-      }
-    } else if (
-      typeof gaxiosError.response.data === 'object' &&
-      gaxiosError.response.data !== null
-    ) {
-      outerError = (
-        gaxiosError.response.data as {
-          error?: ErrorShape;
-        }
-      ).error;
-    }
-  }
-  const responseStatus = gaxiosError.response?.status;
-
-  if (!outerError) {
-    // If the gaxios structure isn't there, check for a top-level `error` property.
-    if (gaxiosError.error) {
-      outerError = gaxiosError.error;
-    } else {
-      return null;
-    }
-  }
-
-  let currentError = outerError;
-  let depth = 0;
-  const maxDepth = 10;
-  // Handle cases where the actual error object is stringified inside the message
-  // by drilling down until we find an error that doesn't have a stringified message.
-  while (typeof currentError.message === 'string' && depth < maxDepth) {
-    try {
-      const parsedMessage = JSON.parse(currentError.message);
-      if (parsedMessage.error) {
-        currentError = parsedMessage.error;
-        depth++;
-      } else {
-        // The message is a JSON string, but not a nested error object.
-        break;
-      }
-    } catch (_) {
-      // It wasn't a JSON string, so we've drilled down as far as we can.
-      break;
-    }
-  }
-
-  const code = responseStatus ?? currentError.code ?? gaxiosError.code;
-  const message = currentError.message;
-  const errorDetails = currentError.details;
-
-  if (Array.isArray(errorDetails) && code && message) {
-    const details: GoogleApiErrorDetail[] = [];
-    for (const detail of errorDetails) {
-      if (detail && typeof detail === 'object' && '@type' in detail) {
-        // We can just cast it; the consumer will have to switch on @type
-        details.push(detail as GoogleApiErrorDetail);
-      }
-    }
-
-    if (details.length > 0) {
-      return {
-        code,
-        message,
-        details,
-      };
-    }
-  }
-
-  return null;
-}
@@ -1,205 +0,0 @@
-/**
- * @license
- * Copyright 2025 Google LLC
- * SPDX-License-Identifier: Apache-2.0
- */
-
-import { describe, it, expect, vi, afterEach } from 'vitest';
-import {
-  classifyGoogleError,
-  RetryableQuotaError,
-  TerminalQuotaError,
-} from './googleQuotaErrors.js';
-import * as errorParser from './googleErrors.js';
-import type { GoogleApiError } from './googleErrors.js';
-
-describe('classifyGoogleError', () => {
-  afterEach(() => {
-    vi.restoreAllMocks();
-  });
-
-  it('should return original error if not a Google API error', () => {
-    const regularError = new Error('Something went wrong');
-    vi.spyOn(errorParser, 'parseGoogleApiError').mockReturnValue(null);
-    const result = classifyGoogleError(regularError);
-    expect(result).toBe(regularError);
-  });
-
-  it('should return original error if code is not 429', () => {
-    const apiError: GoogleApiError = {
-      code: 500,
-      message: 'Server error',
-      details: [],
-    };
-    vi.spyOn(errorParser, 'parseGoogleApiError').mockReturnValue(apiError);
-    const originalError = new Error();
-    const result = classifyGoogleError(originalError);
-    expect(result).toBe(originalError);
-    expect(result).not.toBeInstanceOf(TerminalQuotaError);
-    expect(result).not.toBeInstanceOf(RetryableQuotaError);
-  });
-
-  it('should return TerminalQuotaError for daily quota violations in QuotaFailure', () => {
-    const apiError: GoogleApiError = {
-      code: 429,
-      message: 'Quota exceeded',
-      details: [
-        {
-          '@type': 'type.googleapis.com/google.rpc.QuotaFailure',
-          violations: [
-            {
-              subject: 'user',
-              description: 'daily limit',
-              quotaId: 'RequestsPerDay-limit',
-            },
-          ],
-        },
-      ],
-    };
-    vi.spyOn(errorParser, 'parseGoogleApiError').mockReturnValue(apiError);
-    const result = classifyGoogleError(new Error());
-    expect(result).toBeInstanceOf(TerminalQuotaError);
-    expect((result as TerminalQuotaError).cause).toBe(apiError);
-  });
-
-  it('should return TerminalQuotaError for daily quota violations in ErrorInfo', () => {
-    const apiError: GoogleApiError = {
-      code: 429,
-      message: 'Quota exceeded',
-      details: [
-        {
-          '@type': 'type.googleapis.com/google.rpc.ErrorInfo',
-          reason: 'QUOTA_EXCEEDED',
-          domain: 'googleapis.com',
-          metadata: {
-            quota_limit: 'RequestsPerDay_PerProject_PerUser',
-          },
-        },
-      ],
-    };
-    vi.spyOn(errorParser, 'parseGoogleApiError').mockReturnValue(apiError);
-    const result = classifyGoogleError(new Error());
-    expect(result).toBeInstanceOf(TerminalQuotaError);
-  });
-
-  it('should return TerminalQuotaError for long retry delays', () => {
-    const apiError: GoogleApiError = {
-      code: 429,
-      message: 'Too many requests',
-      details: [
-        {
-          '@type': 'type.googleapis.com/google.rpc.RetryInfo',
-          retryDelay: '301s', // > 5 minutes
-        },
-      ],
-    };
-    vi.spyOn(errorParser, 'parseGoogleApiError').mockReturnValue(apiError);
-    const result = classifyGoogleError(new Error());
-    expect(result).toBeInstanceOf(TerminalQuotaError);
-  });
-
-  it('should return RetryableQuotaError for short retry delays', () => {
-    const apiError: GoogleApiError = {
-      code: 429,
-      message: 'Too many requests',
-      details: [
-        {
-          '@type': 'type.googleapis.com/google.rpc.RetryInfo',
-          retryDelay: '45.123s',
-        },
-      ],
-    };
-    vi.spyOn(errorParser, 'parseGoogleApiError').mockReturnValue(apiError);
-    const result = classifyGoogleError(new Error());
-    expect(result).toBeInstanceOf(RetryableQuotaError);
-    expect((result as RetryableQuotaError).retryDelayMs).toBe(45123);
-  });
-
-  it('should return RetryableQuotaError for per-minute quota violations in QuotaFailure', () => {
-    const apiError: GoogleApiError = {
-      code: 429,
-      message: 'Quota exceeded',
-      details: [
-        {
-          '@type': 'type.googleapis.com/google.rpc.QuotaFailure',
-          violations: [
-            {
-              subject: 'user',
-              description: 'per minute limit',
-              quotaId: 'RequestsPerMinute-limit',
-            },
-          ],
-        },
-      ],
-    };
-    vi.spyOn(errorParser, 'parseGoogleApiError').mockReturnValue(apiError);
-    const result = classifyGoogleError(new Error());
-    expect(result).toBeInstanceOf(RetryableQuotaError);
-    expect((result as RetryableQuotaError).retryDelayMs).toBe(60000);
-  });
-
-  it('should return RetryableQuotaError for per-minute quota violations in ErrorInfo', () => {
-    const apiError: GoogleApiError = {
-      code: 429,
-      message: 'Quota exceeded',
-      details: [
-        {
-          '@type': 'type.googleapis.com/google.rpc.ErrorInfo',
-          reason: 'QUOTA_EXCEEDED',
-          domain: 'googleapis.com',
-          metadata: {
-            quota_limit: 'RequestsPerMinute_PerProject_PerUser',
-          },
-        },
-      ],
-    };
-    vi.spyOn(errorParser, 'parseGoogleApiError').mockReturnValue(apiError);
-    const result = classifyGoogleError(new Error());
-    expect(result).toBeInstanceOf(RetryableQuotaError);
-    expect((result as RetryableQuotaError).retryDelayMs).toBe(60000);
-  });
-
-  it('should prioritize daily limit over retry info', () => {
-    const apiError: GoogleApiError = {
-      code: 429,
-      message: 'Quota exceeded',
-      details: [
-        {
-          '@type': 'type.googleapis.com/google.rpc.QuotaFailure',
-          violations: [
-            {
-              subject: 'user',
-              description: 'daily limit',
-              quotaId: 'RequestsPerDay-limit',
-            },
-          ],
-        },
-        {
-          '@type': 'type.googleapis.com/google.rpc.RetryInfo',
-          retryDelay: '10s',
-        },
-      ],
-    };
-    vi.spyOn(errorParser, 'parseGoogleApiError').mockReturnValue(apiError);
-    const result = classifyGoogleError(new Error());
-    expect(result).toBeInstanceOf(TerminalQuotaError);
-  });
-
-  it('should return original error for 429 without specific details', () => {
-    const apiError: GoogleApiError = {
-      code: 429,
-      message: 'Too many requests',
-      details: [
-        {
-          '@type': 'type.googleapis.com/google.rpc.DebugInfo',
-          detail: 'some debug info',
-          stackEntries: [],
-        },
-      ],
-    };
-    vi.spyOn(errorParser, 'parseGoogleApiError').mockReturnValue(apiError);
-    const originalError = new Error();
-    const result = classifyGoogleError(originalError);
-    expect(result).toBe(originalError);
-  });
-});
@@ -1,162 +0,0 @@
-/**
- * @license
- * Copyright 2025 Google LLC
- * SPDX-License-Identifier: Apache-2.0
- */
-
-import type {
-  ErrorInfo,
-  GoogleApiError,
-  QuotaFailure,
-  RetryInfo,
-} from './googleErrors.js';
-import { parseGoogleApiError } from './googleErrors.js';
-
-const FIVE_MINUTES_IN_SECONDS = 5 * 60;
-
-/**
- * A non-retryable error indicating a hard quota limit has been reached (e.g., daily limit).
- */
-export class TerminalQuotaError extends Error {
-  constructor(
-    message: string,
-    override readonly cause: GoogleApiError,
-  ) {
-    super(message);
-    this.name = 'TerminalQuotaError';
-  }
-}
-
-/**
- * A retryable error indicating a temporary quota issue (e.g., per-minute limit).
- */
-export class RetryableQuotaError extends Error {
-  retryDelayMs: number;
-
-  constructor(
-    message: string,
-    override readonly cause: GoogleApiError,
-    retryDelaySeconds: number,
-  ) {
-    super(message);
-    this.name = 'RetryableQuotaError';
-    this.retryDelayMs = retryDelaySeconds * 1000;
-  }
-}
-
-/**
- * Parses a duration string (e.g., "34.074824224s", "60s") and returns the time in seconds.
- * @param duration The duration string to parse.
- * @returns The duration in seconds, or null if parsing fails.
- */
-function parseDurationInSeconds(duration: string): number | null {
-  if (!duration.endsWith('s')) {
-    return null;
-  }
-  const seconds = parseFloat(duration.slice(0, -1));
-  return isNaN(seconds) ? null : seconds;
-}
-
-/**
- * Analyzes a caught error and classifies it as a specific quota-related error if applicable.
- *
- * It decides whether an error is a `TerminalQuotaError` or a `RetryableQuotaError` based on
- * the following logic:
- * - If the error indicates a daily limit, it's a `TerminalQuotaError`.
- * - If the error suggests a retry delay of more than 5 minutes, it's a `TerminalQuotaError`.
- * - If the error suggests a retry delay of 5 minutes or less, it's a `RetryableQuotaError`.
- * - If the error indicates a per-minute limit, it's a `RetryableQuotaError`.
- *
- * @param error The error to classify.
- * @returns A `TerminalQuotaError`, `RetryableQuotaError`, or the original `unknown` error.
- */
-export function classifyGoogleError(error: unknown): unknown {
-  const googleApiError = parseGoogleApiError(error);
-
-  if (!googleApiError || googleApiError.code !== 429) {
-    return error; // Not a 429 error we can handle.
-  }
-
-  const quotaFailure = googleApiError.details.find(
-    (d): d is QuotaFailure =>
-      d['@type'] === 'type.googleapis.com/google.rpc.QuotaFailure',
-  );
-
-  const errorInfo = googleApiError.details.find(
-    (d): d is ErrorInfo =>
-      d['@type'] === 'type.googleapis.com/google.rpc.ErrorInfo',
-  );
-
-  const retryInfo = googleApiError.details.find(
-    (d): d is RetryInfo =>
-      d['@type'] === 'type.googleapis.com/google.rpc.RetryInfo',
-  );
-
-  // 1. Check for long-term limits in QuotaFailure or ErrorInfo
-  if (quotaFailure) {
-    for (const violation of quotaFailure.violations) {
-      const quotaId = violation.quotaId ?? '';
-      if (quotaId.includes('PerDay') || quotaId.includes('Daily')) {
-        return new TerminalQuotaError(
-          `Reached a daily quota limit: ${violation.description}`,
-          googleApiError,
-        );
-      }
-    }
-  }
-
-  if (errorInfo) {
-    const quotaLimit = errorInfo.metadata?.['quota_limit'] ?? '';
-    if (quotaLimit.includes('PerDay') || quotaLimit.includes('Daily')) {
-      return new TerminalQuotaError(
-        `Reached a daily quota limit: ${errorInfo.reason}`,
-        googleApiError,
-      );
-    }
-  }
-
-  // 2. Check for long delays in RetryInfo
-  if (retryInfo?.retryDelay) {
-    const delaySeconds = parseDurationInSeconds(retryInfo.retryDelay);
-    if (delaySeconds !== null) {
-      if (delaySeconds > FIVE_MINUTES_IN_SECONDS) {
-        return new TerminalQuotaError(
-          `Quota limit requires a long delay of ${retryInfo.retryDelay}.`,
-          googleApiError,
-        );
-      }
-      // This is a retryable error with a specific delay.
-      return new RetryableQuotaError(
-        `Quota limit hit. Retrying after ${retryInfo.retryDelay}.`,
-        googleApiError,
-        delaySeconds,
-      );
-    }
-  }
-
-  // 3. Check for short-term limits in QuotaFailure or ErrorInfo
-  if (quotaFailure) {
-    for (const violation of quotaFailure.violations) {
-      const quotaId = violation.quotaId ?? '';
-      if (quotaId.includes('PerMinute')) {
-        return new RetryableQuotaError(
-          `Quota limit hit: ${violation.description}. Retrying after 60s.`,
-          googleApiError,
-          60,
-        );
-      }
-    }
-  }
-
-  if (errorInfo) {
-    const quotaLimit = errorInfo.metadata?.['quota_limit'] ?? '';
-    if (quotaLimit.includes('PerMinute')) {
-      return new RetryableQuotaError(
-        `Quota limit hit: ${errorInfo.reason}. Retrying after 60s.`,
-        googleApiError,
-        60,
-      );
-    }
-  }
-  return error; // Fallback to original error if no specific classification fits.
-}
@@ -33,3 +33,68 @@ export function isStructuredError(error: unknown): error is StructuredError {
    typeof (error as StructuredError).message === 'string'
  );
 }
+
+export function isProQuotaExceededError(error: unknown): boolean {
+  // Check for Pro quota exceeded errors by looking for the specific pattern
+  // This will match patterns like:
+  // - "Quota exceeded for quota metric 'Gemini 2.5 Pro Requests'"
+  // - "Quota exceeded for quota metric 'Gemini 2.5-preview Pro Requests'"
+  // We use string methods instead of regex to avoid ReDoS vulnerabilities
+
+  const checkMessage = (message: string): boolean =>
+    message.includes("Quota exceeded for quota metric 'Gemini") &&
+    message.includes("Pro Requests'");
+
+  if (typeof error === 'string') {
+    return checkMessage(error);
+  }
+
+  if (isStructuredError(error)) {
+    return checkMessage(error.message);
+  }
+
+  if (isApiError(error)) {
+    return checkMessage(error.error.message);
+  }
+
+  // Check if it's a Gaxios error with response data
+  if (error && typeof error === 'object' && 'response' in error) {
+    const gaxiosError = error as {
+      response?: {
+        data?: unknown;
+      };
+    };
+    if (gaxiosError.response && gaxiosError.response.data) {
+      if (typeof gaxiosError.response.data === 'string') {
+        return checkMessage(gaxiosError.response.data);
+      }
+      if (
+        typeof gaxiosError.response.data === 'object' &&
+        gaxiosError.response.data !== null &&
+        'error' in gaxiosError.response.data
+      ) {
+        const errorData = gaxiosError.response.data as {
+          error?: { message?: string };
+        };
+        return checkMessage(errorData.error?.message || '');
+      }
+    }
+  }
+  return false;
+}
+
+export function isGenericQuotaExceededError(error: unknown): boolean {
+  if (typeof error === 'string') {
+    return error.includes('Quota exceeded for quota metric');
+  }
+
+  if (isStructuredError(error)) {
+    return error.message.includes('Quota exceeded for quota metric');
+  }
+
+  if (isApiError(error)) {
+    return error.error.message.includes('Quota exceeded for quota metric');
+  }
+
+  return false;
+}
@@ -7,14 +7,9 @@
 /* eslint-disable @typescript-eslint/no-explicit-any */
 import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
 import { ApiError } from '@google/genai';
-import { AuthType } from '../core/contentGenerator.js';
 import type { HttpError } from './retry.js';
 import { retryWithBackoff } from './retry.js';
 import { setSimulate429 } from './testUtils.js';
-import {
-  TerminalQuotaError,
-  RetryableQuotaError,
-} from './googleQuotaErrors.js';

 // Helper to create a mock function that fails a certain number of times
 const createFailingFunction = (
@@ -104,26 +99,26 @@ describe('retryWithBackoff', () => {

    // Expect it to fail with the error from the 5th attempt.
    await Promise.all([
-      expect(promise).rejects.toThrow('Simulated error attempt 3'),
+      expect(promise).rejects.toThrow('Simulated error attempt 5'),
      vi.runAllTimersAsync(),
    ]);

-    expect(mockFn).toHaveBeenCalledTimes(3);
+    expect(mockFn).toHaveBeenCalledTimes(5);
  });

-  it('should default to 3 maxAttempts if options.maxAttempts is undefined', async () => {
-    // This function will fail more than 3 times to ensure all retries are used.
+  it('should default to 5 maxAttempts if options.maxAttempts is undefined', async () => {
+    // This function will fail more than 5 times to ensure all retries are used.
    const mockFn = createFailingFunction(10);

    const promise = retryWithBackoff(mockFn, { maxAttempts: undefined });

    // Expect it to fail with the error from the 5th attempt.
    await Promise.all([
-      expect(promise).rejects.toThrow('Simulated error attempt 3'),
+      expect(promise).rejects.toThrow('Simulated error attempt 5'),
      vi.runAllTimersAsync(),
    ]);

-    expect(mockFn).toHaveBeenCalledTimes(3);
+    expect(mockFn).toHaveBeenCalledTimes(5);
  });

  it('should not retry if shouldRetry returns false', async () => {
@@ -340,13 +335,15 @@ describe('retryWithBackoff', () => {
  });

  describe('Flash model fallback for OAuth users', () => {
-    it('should trigger fallback for OAuth personal users on TerminalQuotaError', async () => {
+    it('should trigger fallback for OAuth personal users after persistent 429 errors', async () => {
      const fallbackCallback = vi.fn().mockResolvedValue('gemini-2.5-flash');

      let fallbackOccurred = false;
      const mockFn = vi.fn().mockImplementation(async () => {
        if (!fallbackOccurred) {
-          throw new TerminalQuotaError('Daily limit reached', {} as any);
+          const error: HttpError = new Error('Rate limit exceeded');
+          error.status = 429;
+          throw error;
        }
        return 'success';
      });
@@ -354,9 +351,143 @@ describe('retryWithBackoff', () => {
      const promise = retryWithBackoff(mockFn, {
        maxAttempts: 3,
        initialDelayMs: 100,
-        onPersistent429: async (authType?: string, error?: unknown) => {
+        onPersistent429: async (authType?: string) => {
          fallbackOccurred = true;
-          return await fallbackCallback(authType, error);
+          return await fallbackCallback(authType);
+        },
+        authType: 'oauth-personal',
+      });
+
+      // Advance all timers to complete retries
+      await vi.runAllTimersAsync();
+
+      // Should succeed after fallback
+      await expect(promise).resolves.toBe('success');
+
+      // Verify callback was called with correct auth type
+      expect(fallbackCallback).toHaveBeenCalledWith('oauth-personal');
+
+      // Should retry again after fallback
+      expect(mockFn).toHaveBeenCalledTimes(3); // 2 initial attempts + 1 after fallback
+    });
+
+    it('should NOT trigger fallback for API key users', async () => {
+      const fallbackCallback = vi.fn();
+
+      const mockFn = vi.fn(async () => {
+        const error: HttpError = new Error('Rate limit exceeded');
+        error.status = 429;
+        throw error;
+      });
+
+      const promise = retryWithBackoff(mockFn, {
+        maxAttempts: 3,
+        initialDelayMs: 100,
+        onPersistent429: fallbackCallback,
+        authType: 'gemini-api-key',
+      });
+
+      // Handle the promise properly to avoid unhandled rejections
+      const resultPromise = promise.catch((error) => error);
+      await vi.runAllTimersAsync();
+      const result = await resultPromise;
+
+      // Should fail after all retries without fallback
+      expect(result).toBeInstanceOf(Error);
+      expect(result.message).toBe('Rate limit exceeded');
+
+      // Callback should not be called for API key users
+      expect(fallbackCallback).not.toHaveBeenCalled();
+    });
+
+    it('should reset attempt counter and continue after successful fallback', async () => {
+      let fallbackCalled = false;
+      const fallbackCallback = vi.fn().mockImplementation(async () => {
+        fallbackCalled = true;
+        return 'gemini-2.5-flash';
+      });
+
+      const mockFn = vi.fn().mockImplementation(async () => {
+        if (!fallbackCalled) {
+          const error: HttpError = new Error('Rate limit exceeded');
+          error.status = 429;
+          throw error;
+        }
+        return 'success';
+      });
+
+      const promise = retryWithBackoff(mockFn, {
+        maxAttempts: 3,
+        initialDelayMs: 100,
+        onPersistent429: fallbackCallback,
+        authType: 'oauth-personal',
+      });
+
+      await vi.runAllTimersAsync();
+
+      await expect(promise).resolves.toBe('success');
+      expect(fallbackCallback).toHaveBeenCalledOnce();
+    });
+
+    it('should continue with original error if fallback is rejected', async () => {
+      const fallbackCallback = vi.fn().mockResolvedValue(null); // User rejected fallback
+
+      const mockFn = vi.fn(async () => {
+        const error: HttpError = new Error('Rate limit exceeded');
+        error.status = 429;
+        throw error;
+      });
+
+      const promise = retryWithBackoff(mockFn, {
+        maxAttempts: 3,
+        initialDelayMs: 100,
+        onPersistent429: fallbackCallback,
+        authType: 'oauth-personal',
+      });
+
+      // Handle the promise properly to avoid unhandled rejections
+      const resultPromise = promise.catch((error) => error);
+      await vi.runAllTimersAsync();
+      const result = await resultPromise;
+
+      // Should fail with original error when fallback is rejected
+      expect(result).toBeInstanceOf(Error);
+      expect(result.message).toBe('Rate limit exceeded');
+      expect(fallbackCallback).toHaveBeenCalledWith(
+        'oauth-personal',
+        expect.any(Error),
+      );
+    });
+
+    it('should handle mixed error types (only count consecutive 429s)', async () => {
+      const fallbackCallback = vi.fn().mockResolvedValue('gemini-2.5-flash');
+      let attempts = 0;
+      let fallbackOccurred = false;
+
+      const mockFn = vi.fn().mockImplementation(async () => {
+        attempts++;
+        if (fallbackOccurred) {
+          return 'success';
+        }
+        if (attempts === 1) {
+          // First attempt: 500 error (resets consecutive count)
+          const error: HttpError = new Error('Server error');
+          error.status = 500;
+          throw error;
+        } else {
+          // Remaining attempts: 429 errors
+          const error: HttpError = new Error('Rate limit exceeded');
+          error.status = 429;
+          throw error;
+        }
+      });
+
+      const promise = retryWithBackoff(mockFn, {
+        maxAttempts: 5,
+        initialDelayMs: 100,
+        onPersistent429: async (authType?: string) => {
+          fallbackOccurred = true;
+          return await fallbackCallback(authType);
        },
        authType: 'oauth-personal',
      });
@@ -364,51 +495,9 @@ describe('retryWithBackoff', () => {
      await vi.runAllTimersAsync();

      await expect(promise).resolves.toBe('success');
-      expect(fallbackCallback).toHaveBeenCalledWith(
-        'oauth-personal',
-        expect.any(TerminalQuotaError),
-      );
-      expect(mockFn).toHaveBeenCalledTimes(2);
+
+      // Should trigger fallback after 2 consecutive 429s (attempts 2-3)
+      expect(fallbackCallback).toHaveBeenCalledWith('oauth-personal');
    });
-
-    it('should use retryDelayMs from RetryableQuotaError', async () => {
-      const setTimeoutSpy = vi.spyOn(global, 'setTimeout');
-      const mockFn = vi.fn().mockImplementation(async () => {
-        throw new RetryableQuotaError('Per-minute limit', {} as any, 12.345);
-      });
-
-      const promise = retryWithBackoff(mockFn, {
-        maxAttempts: 2,
-        initialDelayMs: 100,
-      });
-
-      // Attach the rejection expectation *before* running timers
-      // eslint-disable-next-line vitest/valid-expect
-      const assertionPromise = expect(promise).rejects.toThrow();
-      await vi.runAllTimersAsync();
-      await assertionPromise;
-
-      expect(setTimeoutSpy).toHaveBeenCalledWith(expect.any(Function), 12345);
-    });
-
-    it.each([[AuthType.USE_GEMINI], [AuthType.USE_VERTEX_AI], [undefined]])(
-      'should not trigger fallback for non-Google auth users (authType: %s) on TerminalQuotaError',
-      async (authType) => {
-        const fallbackCallback = vi.fn();
-        const mockFn = vi.fn().mockImplementation(async () => {
-          throw new TerminalQuotaError('Daily limit reached', {} as any);
-        });
-
-        const promise = retryWithBackoff(mockFn, {
-          maxAttempts: 3,
-          onPersistent429: fallbackCallback,
-          authType,
-        });
-
-        await expect(promise).rejects.toThrow('Daily limit reached');
-        expect(fallbackCallback).not.toHaveBeenCalled();
-        expect(mockFn).toHaveBeenCalledTimes(1);
-      },
-    );
  });
 });
@@ -8,10 +8,9 @@ import type { GenerateContentResponse } from '@google/genai';
 import { ApiError } from '@google/genai';
 import { AuthType } from '../core/contentGenerator.js';
 import {
-  classifyGoogleError,
-  RetryableQuotaError,
-  TerminalQuotaError,
-} from './googleQuotaErrors.js';
+  isProQuotaExceededError,
+  isGenericQuotaExceededError,
+} from './quotaErrorDetection.js';

 const FETCH_FAILED_MESSAGE =
  'exception TypeError: fetch failed sending request';
@@ -35,7 +34,7 @@ export interface RetryOptions {
 }

 const DEFAULT_RETRY_OPTIONS: RetryOptions = {
-  maxAttempts: 3,
+  maxAttempts: 5,
  initialDelayMs: 5000,
  maxDelayMs: 30000, // 30 seconds
  shouldRetryOnError: defaultShouldRetry,
@@ -120,6 +119,7 @@ export async function retryWithBackoff<T>(

  let attempt = 0;
  let currentDelay = initialDelayMs;
+  let consecutive429Count = 0;

  while (attempt < maxAttempts) {
    attempt++;
@@ -139,39 +139,94 @@ export async function retryWithBackoff<T>(

      return result;
    } catch (error) {
-      const classifiedError = classifyGoogleError(error);
+      const errorStatus = getErrorStatus(error);

-      if (classifiedError instanceof TerminalQuotaError) {
-        if (onPersistent429 && authType === AuthType.LOGIN_WITH_GOOGLE) {
-          try {
-            const fallbackModel = await onPersistent429(
-              authType,
-              classifiedError,
-            );
-            if (fallbackModel) {
-              attempt = 0; // Reset attempts and retry with the new model.
-              currentDelay = initialDelayMs;
-              continue;
-            }
-          } catch (fallbackError) {
-            console.warn('Model fallback failed:', fallbackError);
+      // Check for Pro quota exceeded error first - immediate fallback for OAuth users
+      if (
+        errorStatus === 429 &&
+        authType === AuthType.LOGIN_WITH_GOOGLE &&
+        isProQuotaExceededError(error) &&
+        onPersistent429
+      ) {
+        try {
+          const fallbackModel = await onPersistent429(authType, error);
+          if (fallbackModel !== false && fallbackModel !== null) {
+            // Reset attempt counter and try with new model
+            attempt = 0;
+            consecutive429Count = 0;
+            currentDelay = initialDelayMs;
+            // With the model updated, we continue to the next attempt
+            continue;
+          } else {
+            // Fallback handler returned null/false, meaning don't continue - stop retry process
+            throw error;
          }
+        } catch (fallbackError) {
+          // If fallback fails, continue with original error
+          console.warn('Fallback to Flash model failed:', fallbackError);
        }
-        throw classifiedError; // Throw if no fallback or fallback failed.
      }

-      if (classifiedError instanceof RetryableQuotaError) {
-        if (attempt >= maxAttempts) {
-          throw classifiedError;
+      // Check for generic quota exceeded error (but not Pro, which was handled above) - immediate fallback for OAuth users
+      if (
+        errorStatus === 429 &&
+        authType === AuthType.LOGIN_WITH_GOOGLE &&
+        !isProQuotaExceededError(error) &&
+        isGenericQuotaExceededError(error) &&
+        onPersistent429
+      ) {
+        try {
+          const fallbackModel = await onPersistent429(authType, error);
+          if (fallbackModel !== false && fallbackModel !== null) {
+            // Reset attempt counter and try with new model
+            attempt = 0;
+            consecutive429Count = 0;
+            currentDelay = initialDelayMs;
+            // With the model updated, we continue to the next attempt
+            continue;
+          } else {
+            // Fallback handler returned null/false, meaning don't continue - stop retry process
+            throw error;
+          }
+        } catch (fallbackError) {
+          // If fallback fails, continue with original error
+          console.warn('Fallback to Flash model failed:', fallbackError);
        }
-        console.warn(
-          `Attempt ${attempt} failed: ${classifiedError.message}. Retrying after ${classifiedError.retryDelayMs}ms...`,
-        );
-        await delay(classifiedError.retryDelayMs);
-        continue;
      }

-      // Generic retry logic for other errors
+      // Track consecutive 429 errors
+      if (errorStatus === 429) {
+        consecutive429Count++;
+      } else {
+        consecutive429Count = 0;
+      }
+
+      // If we have persistent 429s and a fallback callback for OAuth
+      if (
+        consecutive429Count >= 2 &&
+        onPersistent429 &&
+        authType === AuthType.LOGIN_WITH_GOOGLE
+      ) {
+        try {
+          const fallbackModel = await onPersistent429(authType, error);
+          if (fallbackModel !== false && fallbackModel !== null) {
+            // Reset attempt counter and try with new model
+            attempt = 0;
+            consecutive429Count = 0;
+            currentDelay = initialDelayMs;
+            // With the model updated, we continue to the next attempt
+            continue;
+          } else {
+            // Fallback handler returned null/false, meaning don't continue - stop retry process
+            throw error;
+          }
+        } catch (fallbackError) {
+          // If fallback fails, continue with original error
+          console.warn('Fallback to Flash model failed:', fallbackError);
+        }
+      }
+
+      // Check if we've exhausted retries or shouldn't retry
      if (
        attempt >= maxAttempts ||
        !shouldRetryOnError(error as Error, retryFetchErrors)
@@ -179,17 +234,31 @@ export async function retryWithBackoff<T>(
        throw error;
      }

-      const errorStatus = getErrorStatus(error);
-      logRetryAttempt(attempt, error, errorStatus);
+      const { delayDurationMs, errorStatus: delayErrorStatus } =
+        getDelayDurationAndStatus(error);

-      // Exponential backoff with jitter for non-quota errors
-      const jitter = currentDelay * 0.3 * (Math.random() * 2 - 1);
-      const delayWithJitter = Math.max(0, currentDelay + jitter);
-      await delay(delayWithJitter);
-      currentDelay = Math.min(maxDelayMs, currentDelay * 2);
+      if (delayDurationMs > 0) {
+        // Respect Retry-After header if present and parsed
+        console.warn(
+          `Attempt ${attempt} failed with status ${delayErrorStatus ?? 'unknown'}. Retrying after explicit delay of ${delayDurationMs}ms...`,
+          error,
+        );
+        await delay(delayDurationMs);
+        // Reset currentDelay for next potential non-429 error, or if Retry-After is not present next time
+        currentDelay = initialDelayMs;
+      } else {
+        // Fall back to exponential backoff with jitter
+        logRetryAttempt(attempt, error, errorStatus);
+        // Add jitter: +/- 30% of currentDelay
+        const jitter = currentDelay * 0.3 * (Math.random() * 2 - 1);
+        const delayWithJitter = Math.max(0, currentDelay + jitter);
+        await delay(delayWithJitter);
+        currentDelay = Math.min(maxDelayMs, currentDelay * 2);
+      }
    }
  }
-
+  // This line should theoretically be unreachable due to the throw in the catch block.
+  // Added for type safety and to satisfy the compiler that a promise is always returned.
  throw new Error('Retry attempts exhausted');
 }

@@ -220,6 +289,62 @@ export function getErrorStatus(error: unknown): number | undefined {
  return undefined;
 }

+/**
+ * Extracts the Retry-After delay from an error object's headers.
+ * @param error The error object.
+ * @returns The delay in milliseconds, or 0 if not found or invalid.
+ */
+function getRetryAfterDelayMs(error: unknown): number {
+  if (typeof error === 'object' && error !== null) {
+    // Check for error.response.headers (common in axios errors)
+    if (
+      'response' in error &&
+      typeof (error as { response?: unknown }).response === 'object' &&
+      (error as { response?: unknown }).response !== null
+    ) {
+      const response = (error as { response: { headers?: unknown } }).response;
+      if (
+        'headers' in response &&
+        typeof response.headers === 'object' &&
+        response.headers !== null
+      ) {
+        const headers = response.headers as { 'retry-after'?: unknown };
+        const retryAfterHeader = headers['retry-after'];
+        if (typeof retryAfterHeader === 'string') {
+          const retryAfterSeconds = parseInt(retryAfterHeader, 10);
+          if (!isNaN(retryAfterSeconds)) {
+            return retryAfterSeconds * 1000;
+          }
+          // It might be an HTTP date
+          const retryAfterDate = new Date(retryAfterHeader);
+          if (!isNaN(retryAfterDate.getTime())) {
+            return Math.max(0, retryAfterDate.getTime() - Date.now());
+          }
+        }
+      }
+    }
+  }
+  return 0;
+}
+
+/**
+ * Determines the delay duration based on the error, prioritizing Retry-After header.
+ * @param error The error object.
+ * @returns An object containing the delay duration in milliseconds and the error status.
+ */
+function getDelayDurationAndStatus(error: unknown): {
+  delayDurationMs: number;
+  errorStatus: number | undefined;
+} {
+  const errorStatus = getErrorStatus(error);
+  let delayDurationMs = 0;
+
+  if (errorStatus === 429) {
+    delayDurationMs = getRetryAfterDelayMs(error);
+  }
+  return { delayDurationMs, errorStatus };
+}
+
 /**
 * Logs a message for a retry attempt when using exponential backoff.
 * @param attempt The current attempt number.