From ee92db7533d33335f4146359a9338d451296105f Mon Sep 17 00:00:00 2001
From: Gaurav <39389231+gsquared94@users.noreply.github.com>
Date: Fri, 24 Oct 2025 11:09:06 -0700
Subject: [PATCH] fix: handle request retries and model fallback correctly
 (#11624)

---
 .../src/ui/hooks/useQuotaAndFallback.test.ts  |  99 +++--
 .../cli/src/ui/hooks/useQuotaAndFallback.ts   |  29 +-
 packages/core/index.ts                        |   2 +
 packages/core/src/index.ts                    |   1 +
 packages/core/src/utils/errorParsing.test.ts  | 244 ------------
 packages/core/src/utils/errorParsing.ts       |  91 +----
 packages/core/src/utils/flashFallback.test.ts |  76 ++--
 packages/core/src/utils/googleErrors.test.ts  | 356 ++++++++++++++++++
 packages/core/src/utils/googleErrors.ts       | 305 +++++++++++++++
 .../core/src/utils/googleQuotaErrors.test.ts  | 306 +++++++++++++++
 packages/core/src/utils/googleQuotaErrors.ts  | 192 ++++++++++
 .../core/src/utils/quotaErrorDetection.ts     |  65 ----
 packages/core/src/utils/retry.test.ts         | 181 +++------
 packages/core/src/utils/retry.ts              | 214 +++--------
 14 files changed, 1357 insertions(+), 804 deletions(-)
 create mode 100644 packages/core/src/utils/googleErrors.test.ts
 create mode 100644 packages/core/src/utils/googleErrors.ts
 create mode 100644 packages/core/src/utils/googleQuotaErrors.test.ts
 create mode 100644 packages/core/src/utils/googleQuotaErrors.ts
diff --git a/packages/cli/src/ui/hooks/useQuotaAndFallback.test.ts b/packages/cli/src/ui/hooks/useQuotaAndFallback.test.ts
index 6d7782694f..0e94a1874d 100644
--- a/packages/cli/src/ui/hooks/useQuotaAndFallback.test.ts
+++ b/packages/cli/src/ui/hooks/useQuotaAndFallback.test.ts
@@ -19,25 +19,15 @@ import {
   type FallbackModelHandler,
   UserTierId,
   AuthType,
-  isGenericQuotaExceededError,
-  isProQuotaExceededError,
+  TerminalQuotaError,
   makeFakeConfig,
+  type GoogleApiError,
+  RetryableQuotaError,
 } from '@google/gemini-cli-core';
 import { useQuotaAndFallback } from './useQuotaAndFallback.js';
 import type { UseHistoryManagerReturn } from './useHistoryManager.js';
 import { AuthState, MessageType } from '../types.js';
 
-// Mock the error checking functions from the core package to control test scenarios
-vi.mock('@google/gemini-cli-core', async (importOriginal) => {
-  const original =
-    await importOriginal<typeof import('@google/gemini-cli-core')>();
-  return {
-    ...original,
-    isGenericQuotaExceededError: vi.fn(),
-    isProQuotaExceededError: vi.fn(),
-  };
-});
-
 // Use a type alias for SpyInstance as it's not directly exported
 type SpyInstance = ReturnType<typeof vi.spyOn>;
 
@@ -47,12 +37,15 @@ describe('useQuotaAndFallback', () => {
   let mockSetAuthState: Mock;
   let mockSetModelSwitchedFromQuotaError: Mock;
   let setFallbackHandlerSpy: SpyInstance;
-
-  const mockedIsGenericQuotaExceededError = isGenericQuotaExceededError as Mock;
-  const mockedIsProQuotaExceededError = isProQuotaExceededError as Mock;
+  let mockGoogleApiError: GoogleApiError;
 
   beforeEach(() => {
     mockConfig = makeFakeConfig();
+    mockGoogleApiError = {
+      code: 429,
+      message: 'mock error',
+      details: [],
+    };
 
     // Spy on the method that requires the private field and mock its return.
     // This is cleaner than modifying the config class for tests.
@@ -72,9 +65,6 @@ describe('useQuotaAndFallback', () => {
 
     setFallbackHandlerSpy = vi.spyOn(mockConfig, 'setFallbackModelHandler');
     vi.spyOn(mockConfig, 'setQuotaErrorOccurred');
-
-    mockedIsGenericQuotaExceededError.mockReturnValue(false);
-    mockedIsProQuotaExceededError.mockReturnValue(false);
   });
 
   afterEach(() => {
@@ -140,51 +130,62 @@ describe('useQuotaAndFallback', () => {
     describe('Automatic Fallback Scenarios', () => {
       const testCases = [
         {
-          errorType: 'generic',
+          description: 'other error for FREE tier',
           tier: UserTierId.FREE,
+          error: new Error('some error'),
           expectedMessageSnippets: [
-            'Automatically switching from model-A to model-B',
+            'Automatically switching from model-A to model-B for faster responses',
             'upgrade to a Gemini Code Assist Standard or Enterprise plan',
           ],
         },
         {
-          errorType: 'generic',
-          tier: UserTierId.STANDARD, // Paid tier
+          description: 'other error for LEGACY tier',
+          tier: UserTierId.LEGACY, // Paid tier
+          error: new Error('some error'),
           expectedMessageSnippets: [
-            'Automatically switching from model-A to model-B',
+            'Automatically switching from model-A to model-B for faster responses',
             'switch to using a paid API key from AI Studio',
           ],
         },
         {
-          errorType: 'other',
+          description: 'retryable quota error for FREE tier',
           tier: UserTierId.FREE,
+          error: new RetryableQuotaError(
+            'retryable quota',
+            mockGoogleApiError,
+            5,
+          ),
           expectedMessageSnippets: [
-            'Automatically switching from model-A to model-B for faster responses',
-            'upgrade to a Gemini Code Assist Standard or Enterprise plan',
+            'Your requests are being throttled right now due to server being at capacity for model-A',
+            'Automatically switching from model-A to model-B',
+            'upgrading to a Gemini Code Assist Standard or Enterprise plan',
           ],
         },
         {
-          errorType: 'other',
+          description: 'retryable quota error for LEGACY tier',
           tier: UserTierId.LEGACY, // Paid tier
+          error: new RetryableQuotaError(
+            'retryable quota',
+            mockGoogleApiError,
+            5,
+          ),
           expectedMessageSnippets: [
-            'Automatically switching from model-A to model-B for faster responses',
+            'Your requests are being throttled right now due to server being at capacity for model-A',
+            'Automatically switching from model-A to model-B',
             'switch to using a paid API key from AI Studio',
           ],
         },
       ];
 
-      for (const { errorType, tier, expectedMessageSnippets } of testCases) {
-        it(`should handle ${errorType} error for ${tier} tier correctly`, async () => {
-          mockedIsGenericQuotaExceededError.mockReturnValue(
-            errorType === 'generic',
-          );
-
+      for (const {
+        description,
+        tier,
+        error,
+        expectedMessageSnippets,
+      } of testCases) {
+        it(`should handle ${description} correctly`, async () => {
           const handler = getRegisteredHandler(tier);
-          const result = await handler(
-            'model-A',
-            'model-B',
-            new Error('quota exceeded'),
-          );
+          const result = await handler('model-A', 'model-B', error);
 
           // Automatic fallbacks should return 'stop'
           expect(result).toBe('stop');
@@ -207,10 +208,6 @@ describe('useQuotaAndFallback', () => {
     });
 
     describe('Interactive Fallback (Pro Quota Error)', () => {
-      beforeEach(() => {
-        mockedIsProQuotaExceededError.mockReturnValue(true);
-      });
-
       it('should set an interactive request and wait for user choice', async () => {
         const { result } = renderHook(() =>
           useQuotaAndFallback({
@@ -229,7 +226,7 @@ describe('useQuotaAndFallback', () => {
         const promise = handler(
           'gemini-pro',
           'gemini-flash',
-          new Error('pro quota'),
+          new TerminalQuotaError('pro quota', mockGoogleApiError),
         );
 
         await act(async () => {});
@@ -268,7 +265,7 @@ describe('useQuotaAndFallback', () => {
         const promise1 = handler(
           'gemini-pro',
           'gemini-flash',
-          new Error('pro quota 1'),
+          new TerminalQuotaError('pro quota 1', mockGoogleApiError),
         );
         await act(async () => {});
 
@@ -278,7 +275,7 @@ describe('useQuotaAndFallback', () => {
         const result2 = await handler(
           'gemini-pro',
           'gemini-flash',
-          new Error('pro quota 2'),
+          new TerminalQuotaError('pro quota 2', mockGoogleApiError),
         );
 
         // The lock should have stopped the second request
@@ -297,10 +294,6 @@ describe('useQuotaAndFallback', () => {
   });
 
   describe('handleProQuotaChoice', () => {
-    beforeEach(() => {
-      mockedIsProQuotaExceededError.mockReturnValue(true);
-    });
-
     it('should do nothing if there is no pending pro quota request', () => {
       const { result } = renderHook(() =>
         useQuotaAndFallback({
@@ -336,7 +329,7 @@ describe('useQuotaAndFallback', () => {
       const promise = handler(
         'gemini-pro',
         'gemini-flash',
-        new Error('pro quota'),
+        new TerminalQuotaError('pro quota', mockGoogleApiError),
       );
       await act(async () => {}); // Allow state to update
 
@@ -367,7 +360,7 @@ describe('useQuotaAndFallback', () => {
       const promise = handler(
         'gemini-pro',
         'gemini-flash',
-        new Error('pro quota'),
+        new TerminalQuotaError('pro quota', mockGoogleApiError),
       );
       await act(async () => {}); // Allow state to update
 
diff --git a/packages/cli/src/ui/hooks/useQuotaAndFallback.ts b/packages/cli/src/ui/hooks/useQuotaAndFallback.ts
index a7eb77659a..194f5f27fc 100644
--- a/packages/cli/src/ui/hooks/useQuotaAndFallback.ts
+++ b/packages/cli/src/ui/hooks/useQuotaAndFallback.ts
@@ -9,9 +9,9 @@ import {
   type Config,
   type FallbackModelHandler,
   type FallbackIntent,
-  isGenericQuotaExceededError,
-  isProQuotaExceededError,
+  TerminalQuotaError,
   UserTierId,
+  RetryableQuotaError,
 } from '@google/gemini-cli-core';
 import { useCallback, useEffect, useRef, useState } from 'react';
 import { type UseHistoryManagerReturn } from './useHistoryManager.js';
@@ -63,7 +63,7 @@ export function useQuotaAndFallback({
 
       let message: string;
 
-      if (error && isProQuotaExceededError(error)) {
+      if (error instanceof TerminalQuotaError) {
         // Pro Quota specific messages (Interactive)
         if (isPaidTier) {
           message = `⚡ You have reached your daily ${failedModel} quota limit.
@@ -76,31 +76,30 @@ export function useQuotaAndFallback({
 ⚡ Or you can utilize a Gemini API Key. See: https://goo.gle/gemini-cli-docs-auth#gemini-api-key
 ⚡ You can switch authentication methods by typing /auth`;
         }
-      } else if (error && isGenericQuotaExceededError(error)) {
-        // Generic Quota (Automatic fallback)
-        const actionMessage = `⚡ You have reached your daily quota limit.\n⚡ Automatically switching from ${failedModel} to ${fallbackModel} for the remainder of this session.`;
+      } else if (error instanceof RetryableQuotaError) {
+        // Short term quota retries exhausted (Automatic fallback)
+        const actionMessage = `⚡ Your requests are being throttled right now due to server being at capacity for ${failedModel}.\n⚡ Automatically switching from ${failedModel} to ${fallbackModel} for the remainder of this session.`;
 
         if (isPaidTier) {
           message = `${actionMessage}
-⚡ To continue accessing the ${failedModel} model today, consider using /auth to switch to using a paid API key from AI Studio at https://aistudio.google.com/apikey`;
+⚡ To continue accessing the ${failedModel} model, retry your request after some time or consider using /auth to switch to using a paid API key from AI Studio at https://aistudio.google.com/apikey`;
         } else {
           message = `${actionMessage}
-⚡ To increase your limits, upgrade to a Gemini Code Assist Standard or Enterprise plan with higher limits at https://goo.gle/set-up-gemini-code-assist
-⚡ Or you can utilize a Gemini API Key. See: https://goo.gle/gemini-cli-docs-auth#gemini-api-key
+⚡ Retry your requests after some time. Otherwise consider upgrading to a Gemini Code Assist Standard or Enterprise plan with higher limits at https://goo.gle/set-up-gemini-code-assist
 ⚡ You can switch authentication methods by typing /auth`;
         }
       } else {
-        // Consecutive 429s or other errors (Automatic fallback)
+        // Other errors (Automatic fallback)
         const actionMessage = `⚡ Automatically switching from ${failedModel} to ${fallbackModel} for faster responses for the remainder of this session.`;
 
         if (isPaidTier) {
           message = `${actionMessage}
-⚡ Possible reasons for this are that you have received multiple consecutive capacity errors or you have reached your daily ${failedModel} quota limit
-⚡ To continue accessing the ${failedModel} model today, consider using /auth to switch to using a paid API key from AI Studio at https://aistudio.google.com/apikey`;
+⚡ Your requests are being throttled temporarily due to server being at capacity for ${failedModel} or there is a service outage.
+⚡ To continue accessing the ${failedModel} model, you can retry your request after some time or consider using /auth to switch to using a paid API key from AI Studio at https://aistudio.google.com/apikey`;
         } else {
           message = `${actionMessage}
-⚡ Possible reasons for this are that you have received multiple consecutive capacity errors or you have reached your daily ${failedModel} quota limit
-⚡ To increase your limits, upgrade to a Gemini Code Assist Standard or Enterprise plan with higher limits at https://goo.gle/set-up-gemini-code-assist
+⚡ Your requests are being throttled temporarily due to server being at capacity for ${failedModel} or there is a service outage.
+⚡ To avoid being throttled, you can retry your request after some time or upgrade to a Gemini Code Assist Standard or Enterprise plan with higher limits at https://goo.gle/set-up-gemini-code-assist
 ⚡ Or you can utilize a Gemini API Key. See: https://goo.gle/gemini-cli-docs-auth#gemini-api-key
 ⚡ You can switch authentication methods by typing /auth`;
         }
@@ -119,7 +118,7 @@ export function useQuotaAndFallback({
       config.setQuotaErrorOccurred(true);
 
       // Interactive Fallback for Pro quota
-      if (error && isProQuotaExceededError(error)) {
+      if (error instanceof TerminalQuotaError) {
         if (isDialogPending.current) {
           return 'stop'; // A dialog is already active, so just stop this request.
         }
diff --git a/packages/core/index.ts b/packages/core/index.ts
index 729fcc8d48..acc9743e61 100644
--- a/packages/core/index.ts
+++ b/packages/core/index.ts
@@ -44,3 +44,5 @@ export { makeFakeConfig } from './src/test-utils/config.js';
 export * from './src/utils/pathReader.js';
 export { ClearcutLogger } from './src/telemetry/clearcut-logger/clearcut-logger.js';
 export { logModelSlashCommand } from './src/telemetry/loggers.js';
+export * from './src/utils/googleQuotaErrors.js';
+export type { GoogleApiError } from './src/utils/googleErrors.js';
diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts
index 42ced4457f..bc2eab2147 100644
--- a/packages/core/src/index.ts
+++ b/packages/core/src/index.ts
@@ -48,6 +48,7 @@ export * from './utils/gitIgnoreParser.js';
 export * from './utils/gitUtils.js';
 export * from './utils/editor.js';
 export * from './utils/quotaErrorDetection.js';
+export * from './utils/googleQuotaErrors.js';
 export * from './utils/fileUtils.js';
 export * from './utils/retry.js';
 export * from './utils/shell-utils.js';
diff --git a/packages/core/src/utils/errorParsing.test.ts b/packages/core/src/utils/errorParsing.test.ts
index 9c71f4d89b..291145d2e8 100644
--- a/packages/core/src/utils/errorParsing.test.ts
+++ b/packages/core/src/utils/errorParsing.test.ts
@@ -6,9 +6,7 @@
 
 import { describe, it, expect } from 'vitest';
 import { parseAndFormatApiError } from './errorParsing.js';
-import { isProQuotaExceededError } from './quotaErrorDetection.js';
 import { DEFAULT_GEMINI_FLASH_MODEL } from '../config/models.js';
-import { UserTierId } from '../code_assist/types.js';
 import { AuthType } from '../core/contentGenerator.js';
 import type { StructuredError } from '../core/turn.js';
 
@@ -40,22 +38,6 @@ describe('parseAndFormatApiError', () => {
     );
   });
 
-  it('should format a 429 API error with the personal message', () => {
-    const errorMessage =
-      'got status: 429 Too Many Requests. {"error":{"code":429,"message":"Rate limit exceeded","status":"RESOURCE_EXHAUSTED"}}';
-    const result = parseAndFormatApiError(
-      errorMessage,
-      AuthType.LOGIN_WITH_GOOGLE,
-      undefined,
-      'gemini-2.5-pro',
-      DEFAULT_GEMINI_FLASH_MODEL,
-    );
-    expect(result).toContain('[API Error: Rate limit exceeded');
-    expect(result).toContain(
-      'Possible quota limitations in place or slow response times detected. Switching to the gemini-2.5-flash model',
-    );
-  });
-
   it('should format a 429 API error with the vertex message', () => {
     const errorMessage =
       'got status: 429 Too Many Requests. {"error":{"code":429,"message":"Rate limit exceeded","status":"RESOURCE_EXHAUSTED"}}';
@@ -132,230 +114,4 @@ describe('parseAndFormatApiError', () => {
     const expected = '[API Error: An unknown error occurred.]';
     expect(parseAndFormatApiError(error)).toBe(expected);
   });
-
-  it('should format a 429 API error with Pro quota exceeded message for Google auth (Free tier)', () => {
-    const errorMessage =
-      'got status: 429 Too Many Requests. {"error":{"code":429,"message":"Quota exceeded for quota metric \'Gemini 2.5 Pro Requests\' and limit \'RequestsPerDay\' of service \'generativelanguage.googleapis.com\' for consumer \'project_number:123456789\'.","status":"RESOURCE_EXHAUSTED"}}';
-    const result = parseAndFormatApiError(
-      errorMessage,
-      AuthType.LOGIN_WITH_GOOGLE,
-      undefined,
-      'gemini-2.5-pro',
-      DEFAULT_GEMINI_FLASH_MODEL,
-    );
-    expect(result).toContain(
-      "[API Error: Quota exceeded for quota metric 'Gemini 2.5 Pro Requests'",
-    );
-    expect(result).toContain(
-      'You have reached your daily gemini-2.5-pro quota limit',
-    );
-    expect(result).toContain('upgrade to get higher limits');
-  });
-
-  it('should format a regular 429 API error with standard message for Google auth', () => {
-    const errorMessage =
-      'got status: 429 Too Many Requests. {"error":{"code":429,"message":"Rate limit exceeded","status":"RESOURCE_EXHAUSTED"}}';
-    const result = parseAndFormatApiError(
-      errorMessage,
-      AuthType.LOGIN_WITH_GOOGLE,
-      undefined,
-      'gemini-2.5-pro',
-      DEFAULT_GEMINI_FLASH_MODEL,
-    );
-    expect(result).toContain('[API Error: Rate limit exceeded');
-    expect(result).toContain(
-      'Possible quota limitations in place or slow response times detected. Switching to the gemini-2.5-flash model',
-    );
-    expect(result).not.toContain(
-      'You have reached your daily gemini-2.5-pro quota limit',
-    );
-  });
-
-  it('should format a 429 API error with generic quota exceeded message for Google auth', () => {
-    const errorMessage =
-      'got status: 429 Too Many Requests. {"error":{"code":429,"message":"Quota exceeded for quota metric \'GenerationRequests\' and limit \'RequestsPerDay\' of service \'generativelanguage.googleapis.com\' for consumer \'project_number:123456789\'.","status":"RESOURCE_EXHAUSTED"}}';
-    const result = parseAndFormatApiError(
-      errorMessage,
-      AuthType.LOGIN_WITH_GOOGLE,
-      undefined,
-      'gemini-2.5-pro',
-      DEFAULT_GEMINI_FLASH_MODEL,
-    );
-    expect(result).toContain(
-      "[API Error: Quota exceeded for quota metric 'GenerationRequests'",
-    );
-    expect(result).toContain('You have reached your daily quota limit');
-    expect(result).not.toContain(
-      'You have reached your daily Gemini 2.5 Pro quota limit',
-    );
-  });
-
-  it('should prioritize Pro quota message over generic quota message for Google auth', () => {
-    const errorMessage =
-      'got status: 429 Too Many Requests. {"error":{"code":429,"message":"Quota exceeded for quota metric \'Gemini 2.5 Pro Requests\' and limit \'RequestsPerDay\' of service \'generativelanguage.googleapis.com\' for consumer \'project_number:123456789\'.","status":"RESOURCE_EXHAUSTED"}}';
-    const result = parseAndFormatApiError(
-      errorMessage,
-      AuthType.LOGIN_WITH_GOOGLE,
-      undefined,
-      'gemini-2.5-pro',
-      DEFAULT_GEMINI_FLASH_MODEL,
-    );
-    expect(result).toContain(
-      "[API Error: Quota exceeded for quota metric 'Gemini 2.5 Pro Requests'",
-    );
-    expect(result).toContain(
-      'You have reached your daily gemini-2.5-pro quota limit',
-    );
-    expect(result).not.toContain('You have reached your daily quota limit');
-  });
-
-  it('should format a 429 API error with Pro quota exceeded message for Google auth (Standard tier)', () => {
-    const errorMessage =
-      'got status: 429 Too Many Requests. {"error":{"code":429,"message":"Quota exceeded for quota metric \'Gemini 2.5 Pro Requests\' and limit \'RequestsPerDay\' of service \'generativelanguage.googleapis.com\' for consumer \'project_number:123456789\'.","status":"RESOURCE_EXHAUSTED"}}';
-    const result = parseAndFormatApiError(
-      errorMessage,
-      AuthType.LOGIN_WITH_GOOGLE,
-      UserTierId.STANDARD,
-      'gemini-2.5-pro',
-      DEFAULT_GEMINI_FLASH_MODEL,
-    );
-    expect(result).toContain(
-      "[API Error: Quota exceeded for quota metric 'Gemini 2.5 Pro Requests'",
-    );
-    expect(result).toContain(
-      'You have reached your daily gemini-2.5-pro quota limit',
-    );
-    expect(result).toContain(
-      'We appreciate you for choosing Gemini Code Assist and the Gemini CLI',
-    );
-    expect(result).not.toContain('upgrade to get higher limits');
-  });
-
-  it('should format a 429 API error with Pro quota exceeded message for Google auth (Legacy tier)', () => {
-    const errorMessage =
-      'got status: 429 Too Many Requests. {"error":{"code":429,"message":"Quota exceeded for quota metric \'Gemini 2.5 Pro Requests\' and limit \'RequestsPerDay\' of service \'generativelanguage.googleapis.com\' for consumer \'project_number:123456789\'.","status":"RESOURCE_EXHAUSTED"}}';
-    const result = parseAndFormatApiError(
-      errorMessage,
-      AuthType.LOGIN_WITH_GOOGLE,
-      UserTierId.LEGACY,
-      'gemini-2.5-pro',
-      DEFAULT_GEMINI_FLASH_MODEL,
-    );
-    expect(result).toContain(
-      "[API Error: Quota exceeded for quota metric 'Gemini 2.5 Pro Requests'",
-    );
-    expect(result).toContain(
-      'You have reached your daily gemini-2.5-pro quota limit',
-    );
-    expect(result).toContain(
-      'We appreciate you for choosing Gemini Code Assist and the Gemini CLI',
-    );
-    expect(result).not.toContain('upgrade to get higher limits');
-  });
-
-  it('should handle different Gemini 2.5 version strings in Pro quota exceeded errors', () => {
-    const errorMessage25 =
-      'got status: 429 Too Many Requests. {"error":{"code":429,"message":"Quota exceeded for quota metric \'Gemini 2.5 Pro Requests\' and limit \'RequestsPerDay\' of service \'generativelanguage.googleapis.com\' for consumer \'project_number:123456789\'.","status":"RESOURCE_EXHAUSTED"}}';
-    const errorMessagePreview =
-      'got status: 429 Too Many Requests. {"error":{"code":429,"message":"Quota exceeded for quota metric \'Gemini 2.5-preview Pro Requests\' and limit \'RequestsPerDay\' of service \'generativelanguage.googleapis.com\' for consumer \'project_number:123456789\'.","status":"RESOURCE_EXHAUSTED"}}';
-
-    const result25 = parseAndFormatApiError(
-      errorMessage25,
-      AuthType.LOGIN_WITH_GOOGLE,
-      undefined,
-      'gemini-2.5-pro',
-      DEFAULT_GEMINI_FLASH_MODEL,
-    );
-    const resultPreview = parseAndFormatApiError(
-      errorMessagePreview,
-      AuthType.LOGIN_WITH_GOOGLE,
-      undefined,
-      'gemini-2.5-preview-pro',
-      DEFAULT_GEMINI_FLASH_MODEL,
-    );
-
-    expect(result25).toContain(
-      'You have reached your daily gemini-2.5-pro quota limit',
-    );
-    expect(resultPreview).toContain(
-      'You have reached your daily gemini-2.5-preview-pro quota limit',
-    );
-    expect(result25).toContain('upgrade to get higher limits');
-    expect(resultPreview).toContain('upgrade to get higher limits');
-  });
-
-  it('should not match non-Pro models with similar version strings', () => {
-    // Test that Flash models with similar version strings don't match
-    expect(
-      isProQuotaExceededError(
-        "Quota exceeded for quota metric 'Gemini 2.5 Flash Requests' and limit",
-      ),
-    ).toBe(false);
-    expect(
-      isProQuotaExceededError(
-        "Quota exceeded for quota metric 'Gemini 2.5-preview Flash Requests' and limit",
-      ),
-    ).toBe(false);
-
-    // Test other model types
-    expect(
-      isProQuotaExceededError(
-        "Quota exceeded for quota metric 'Gemini 2.5 Ultra Requests' and limit",
-      ),
-    ).toBe(false);
-    expect(
-      isProQuotaExceededError(
-        "Quota exceeded for quota metric 'Gemini 2.5 Standard Requests' and limit",
-      ),
-    ).toBe(false);
-
-    // Test generic quota messages
-    expect(
-      isProQuotaExceededError(
-        "Quota exceeded for quota metric 'GenerationRequests' and limit",
-      ),
-    ).toBe(false);
-    expect(
-      isProQuotaExceededError(
-        "Quota exceeded for quota metric 'EmbeddingRequests' and limit",
-      ),
-    ).toBe(false);
-  });
-
-  it('should format a generic quota exceeded message for Google auth (Standard tier)', () => {
-    const errorMessage =
-      'got status: 429 Too Many Requests. {"error":{"code":429,"message":"Quota exceeded for quota metric \'GenerationRequests\' and limit \'RequestsPerDay\' of service \'generativelanguage.googleapis.com\' for consumer \'project_number:123456789\'.","status":"RESOURCE_EXHAUSTED"}}';
-    const result = parseAndFormatApiError(
-      errorMessage,
-      AuthType.LOGIN_WITH_GOOGLE,
-      UserTierId.STANDARD,
-      'gemini-2.5-pro',
-      DEFAULT_GEMINI_FLASH_MODEL,
-    );
-    expect(result).toContain(
-      "[API Error: Quota exceeded for quota metric 'GenerationRequests'",
-    );
-    expect(result).toContain('You have reached your daily quota limit');
-    expect(result).toContain(
-      'We appreciate you for choosing Gemini Code Assist and the Gemini CLI',
-    );
-    expect(result).not.toContain('upgrade to get higher limits');
-  });
-
-  it('should format a regular 429 API error with standard message for Google auth (Standard tier)', () => {
-    const errorMessage =
-      'got status: 429 Too Many Requests. {"error":{"code":429,"message":"Rate limit exceeded","status":"RESOURCE_EXHAUSTED"}}';
-    const result = parseAndFormatApiError(
-      errorMessage,
-      AuthType.LOGIN_WITH_GOOGLE,
-      UserTierId.STANDARD,
-      'gemini-2.5-pro',
-      DEFAULT_GEMINI_FLASH_MODEL,
-    );
-    expect(result).toContain('[API Error: Rate limit exceeded');
-    expect(result).toContain(
-      'We appreciate you for choosing Gemini Code Assist and the Gemini CLI',
-    );
-    expect(result).not.toContain('upgrade to get higher limits');
-  });
 });
diff --git a/packages/core/src/utils/errorParsing.ts b/packages/core/src/utils/errorParsing.ts
index ecfc237573..bad61ea9e2 100644
--- a/packages/core/src/utils/errorParsing.ts
+++ b/packages/core/src/utils/errorParsing.ts
@@ -4,50 +4,11 @@
  * SPDX-License-Identifier: Apache-2.0
  */
 
-import {
-  isProQuotaExceededError,
-  isGenericQuotaExceededError,
-  isApiError,
-  isStructuredError,
-} from './quotaErrorDetection.js';
-import {
-  DEFAULT_GEMINI_MODEL,
-  DEFAULT_GEMINI_FLASH_MODEL,
-} from '../config/models.js';
-import { UserTierId } from '../code_assist/types.js';
+import { isApiError, isStructuredError } from './quotaErrorDetection.js';
+import { DEFAULT_GEMINI_FLASH_MODEL } from '../config/models.js';
+import type { UserTierId } from '../code_assist/types.js';
 import { AuthType } from '../core/contentGenerator.js';
 
-// Free Tier message functions
-const getRateLimitErrorMessageGoogleFree = (
-  fallbackModel: string = DEFAULT_GEMINI_FLASH_MODEL,
-) =>
-  `\nPossible quota limitations in place or slow response times detected. Switching to the ${fallbackModel} model for the rest of this session.`;
-
-const getRateLimitErrorMessageGoogleProQuotaFree = (
-  currentModel: string = DEFAULT_GEMINI_MODEL,
-  fallbackModel: string = DEFAULT_GEMINI_FLASH_MODEL,
-) =>
-  `\nYou have reached your daily ${currentModel} quota limit. You will be switched to the ${fallbackModel} model for the rest of this session. To increase your limits, upgrade to get higher limits at https://goo.gle/set-up-gemini-code-assist, or use /auth to switch to using a paid API key from AI Studio at https://aistudio.google.com/apikey`;
-
-const getRateLimitErrorMessageGoogleGenericQuotaFree = () =>
-  `\nYou have reached your daily quota limit. To increase your limits, upgrade to get higher limits at https://goo.gle/set-up-gemini-code-assist, or use /auth to switch to using a paid API key from AI Studio at https://aistudio.google.com/apikey`;
-
-// Legacy/Standard Tier message functions
-const getRateLimitErrorMessageGooglePaid = (
-  fallbackModel: string = DEFAULT_GEMINI_FLASH_MODEL,
-) =>
-  `\nPossible quota limitations in place or slow response times detected. Switching to the ${fallbackModel} model for the rest of this session. We appreciate you for choosing Gemini Code Assist and the Gemini CLI.`;
-
-const getRateLimitErrorMessageGoogleProQuotaPaid = (
-  currentModel: string = DEFAULT_GEMINI_MODEL,
-  fallbackModel: string = DEFAULT_GEMINI_FLASH_MODEL,
-) =>
-  `\nYou have reached your daily ${currentModel} quota limit. You will be switched to the ${fallbackModel} model for the rest of this session. We appreciate you for choosing Gemini Code Assist and the Gemini CLI. To continue accessing the ${currentModel} model today, consider using /auth to switch to using a paid API key from AI Studio at https://aistudio.google.com/apikey`;
-
-const getRateLimitErrorMessageGoogleGenericQuotaPaid = (
-  currentModel: string = DEFAULT_GEMINI_MODEL,
-) =>
-  `\nYou have reached your daily quota limit. We appreciate you for choosing Gemini Code Assist and the Gemini CLI. To continue accessing the ${currentModel} model today, consider using /auth to switch to using a paid API key from AI Studio at https://aistudio.google.com/apikey`;
 const RATE_LIMIT_ERROR_MESSAGE_USE_GEMINI =
   '\nPlease wait and try again later. To increase your limits, request a quota increase through AI Studio, or switch to another /auth method';
 const RATE_LIMIT_ERROR_MESSAGE_VERTEX =
@@ -59,39 +20,9 @@ const getRateLimitErrorMessageDefault = (
 
 function getRateLimitMessage(
   authType?: AuthType,
-  error?: unknown,
-  userTier?: UserTierId,
-  currentModel?: string,
   fallbackModel?: string,
 ): string {
   switch (authType) {
-    case AuthType.LOGIN_WITH_GOOGLE: {
-      // Determine if user is on a paid tier (Legacy or Standard) - default to FREE if not specified
-      const isPaidTier =
-        userTier === UserTierId.LEGACY || userTier === UserTierId.STANDARD;
-
-      if (isProQuotaExceededError(error)) {
-        return isPaidTier
-          ? getRateLimitErrorMessageGoogleProQuotaPaid(
-              currentModel || DEFAULT_GEMINI_MODEL,
-              fallbackModel,
-            )
-          : getRateLimitErrorMessageGoogleProQuotaFree(
-              currentModel || DEFAULT_GEMINI_MODEL,
-              fallbackModel,
-            );
-      } else if (isGenericQuotaExceededError(error)) {
-        return isPaidTier
-          ? getRateLimitErrorMessageGoogleGenericQuotaPaid(
-              currentModel || DEFAULT_GEMINI_MODEL,
-            )
-          : getRateLimitErrorMessageGoogleGenericQuotaFree();
-      } else {
-        return isPaidTier
-          ? getRateLimitErrorMessageGooglePaid(fallbackModel)
-          : getRateLimitErrorMessageGoogleFree(fallbackModel);
-      }
-    }
     case AuthType.USE_GEMINI:
       return RATE_LIMIT_ERROR_MESSAGE_USE_GEMINI;
     case AuthType.USE_VERTEX_AI:
@@ -111,13 +42,7 @@ export function parseAndFormatApiError(
   if (isStructuredError(error)) {
     let text = `[API Error: ${error.message}]`;
     if (error.status === 429) {
-      text += getRateLimitMessage(
-        authType,
-        error,
-        userTier,
-        currentModel,
-        fallbackModel,
-      );
+      text += getRateLimitMessage(authType, fallbackModel);
     }
     return text;
   }
@@ -146,13 +71,7 @@ export function parseAndFormatApiError(
         }
         let text = `[API Error: ${finalMessage} (Status: ${parsedError.error.status})]`;
         if (parsedError.error.code === 429) {
-          text += getRateLimitMessage(
-            authType,
-            parsedError,
-            userTier,
-            currentModel,
-            fallbackModel,
-          );
+          text += getRateLimitMessage(authType, fallbackModel);
         }
         return text;
       }
diff --git a/packages/core/src/utils/flashFallback.test.ts b/packages/core/src/utils/flashFallback.test.ts
index 8ef9665f42..a3f08f5df6 100644
--- a/packages/core/src/utils/flashFallback.test.ts
+++ b/packages/core/src/utils/flashFallback.test.ts
@@ -11,7 +11,6 @@ import {
   setSimulate429,
   disableSimulationAfterFallback,
   shouldSimulate429,
-  createSimulated429Error,
   resetRequestCounter,
 } from './testUtils.js';
 import { DEFAULT_GEMINI_FLASH_MODEL } from '../config/models.js';
@@ -19,12 +18,15 @@ import { retryWithBackoff } from './retry.js';
 import { AuthType } from '../core/contentGenerator.js';
 // Import the new types (Assuming this test file is in packages/core/src/utils/)
 import type { FallbackModelHandler } from '../fallback/types.js';
+import type { GoogleApiError } from './googleErrors.js';
+import { TerminalQuotaError } from './googleQuotaErrors.js';
 
 vi.mock('node:fs');
 
 // Update the description to reflect that this tests the retry utility's integration
 describe('Retry Utility Fallback Integration', () => {
   let config: Config;
+  let mockGoogleApiError: GoogleApiError;
 
   beforeEach(() => {
     vi.mocked(fs.existsSync).mockReturnValue(true);
@@ -38,6 +40,11 @@ describe('Retry Utility Fallback Integration', () => {
       cwd: '/test',
       model: 'gemini-2.5-pro',
     });
+    mockGoogleApiError = {
+      code: 429,
+      message: 'mock error',
+      details: [],
+    };
 
     // Reset simulation state for each test
     setSimulate429(false);
@@ -56,6 +63,7 @@ describe('Retry Utility Fallback Integration', () => {
     const result = await config.fallbackModelHandler!(
       'gemini-2.5-pro',
       DEFAULT_GEMINI_FLASH_MODEL,
+      new Error('test'),
     );
 
     // Verify it returns the correct intent
@@ -63,81 +71,61 @@ describe('Retry Utility Fallback Integration', () => {
   });
 
   // This test validates the retry utility's logic for triggering the callback.
-  it('should trigger onPersistent429 after 2 consecutive 429 errors for OAuth users', async () => {
+  it('should trigger onPersistent429 on TerminalQuotaError for OAuth users', async () => {
     let fallbackCalled = false;
-    // Removed fallbackModel variable as it's no longer relevant here.
 
-    // Mock function that simulates exactly 2 429 errors, then succeeds after fallback
     const mockApiCall = vi
       .fn()
-      .mockRejectedValueOnce(createSimulated429Error())
-      .mockRejectedValueOnce(createSimulated429Error())
+      .mockRejectedValueOnce(
+        new TerminalQuotaError('Daily limit', mockGoogleApiError),
+      )
+      .mockRejectedValueOnce(
+        new TerminalQuotaError('Daily limit', mockGoogleApiError),
+      )
       .mockResolvedValueOnce('success after fallback');
 
-    // Mock the onPersistent429 callback (this is what client.ts/geminiChat.ts provides)
     const mockPersistent429Callback = vi.fn(async (_authType?: string) => {
       fallbackCalled = true;
-      // Return true to signal retryWithBackoff to reset attempts and continue.
       return true;
     });
 
-    // Test with OAuth personal auth type, with maxAttempts = 2 to ensure fallback triggers
     const result = await retryWithBackoff(mockApiCall, {
       maxAttempts: 2,
       initialDelayMs: 1,
       maxDelayMs: 10,
-      shouldRetryOnError: (error: Error) => {
-        const status = (error as Error & { status?: number }).status;
-        return status === 429;
-      },
       onPersistent429: mockPersistent429Callback,
       authType: AuthType.LOGIN_WITH_GOOGLE,
     });
 
-    // Verify fallback mechanism was triggered
     expect(fallbackCalled).toBe(true);
     expect(mockPersistent429Callback).toHaveBeenCalledWith(
       AuthType.LOGIN_WITH_GOOGLE,
-      expect.any(Error),
+      expect.any(TerminalQuotaError),
     );
     expect(result).toBe('success after fallback');
-    // Should have: 2 failures, then fallback triggered, then 1 success after retry reset
     expect(mockApiCall).toHaveBeenCalledTimes(3);
   });
 
   it('should not trigger onPersistent429 for API key users', async () => {
-    let fallbackCalled = false;
+    const fallbackCallback = vi.fn();
 
-    // Mock function that simulates 429 errors
-    const mockApiCall = vi.fn().mockRejectedValue(createSimulated429Error());
+    const mockApiCall = vi
+      .fn()
+      .mockRejectedValueOnce(
+        new TerminalQuotaError('Daily limit', mockGoogleApiError),
+      );
 
-    // Mock the callback
-    const mockPersistent429Callback = vi.fn(async () => {
-      fallbackCalled = true;
-      return true;
+    const promise = retryWithBackoff(mockApiCall, {
+      maxAttempts: 2,
+      initialDelayMs: 1,
+      maxDelayMs: 10,
+      onPersistent429: fallbackCallback,
+      authType: AuthType.USE_GEMINI, // API key auth type
     });
 
-    // Test with API key auth type - should not trigger fallback
-    try {
-      await retryWithBackoff(mockApiCall, {
-        maxAttempts: 5,
-        initialDelayMs: 10,
-        maxDelayMs: 100,
-        shouldRetryOnError: (error: Error) => {
-          const status = (error as Error & { status?: number }).status;
-          return status === 429;
-        },
-        onPersistent429: mockPersistent429Callback,
-        authType: AuthType.USE_GEMINI, // API key auth type
-      });
-    } catch (error) {
-      // Expected to throw after max attempts
-      expect((error as Error).message).toContain('Rate limit exceeded');
-    }
-
-    // Verify fallback was NOT triggered for API key users
-    expect(fallbackCalled).toBe(false);
-    expect(mockPersistent429Callback).not.toHaveBeenCalled();
+    await expect(promise).rejects.toThrow('Daily limit');
+    expect(fallbackCallback).not.toHaveBeenCalled();
+    expect(mockApiCall).toHaveBeenCalledTimes(1);
   });
 
   // This test validates the test utilities themselves.
diff --git a/packages/core/src/utils/googleErrors.test.ts b/packages/core/src/utils/googleErrors.test.ts
new file mode 100644
index 0000000000..c051fb0310
--- /dev/null
+++ b/packages/core/src/utils/googleErrors.test.ts
@@ -0,0 +1,356 @@
+/**
+ * @license
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import { describe, it, expect } from 'vitest';
+import { parseGoogleApiError } from './googleErrors.js';
+import type { QuotaFailure } from './googleErrors.js';
+
+describe('parseGoogleApiError', () => {
+  it('should return null for non-gaxios errors', () => {
+    expect(parseGoogleApiError(new Error('vanilla error'))).toBeNull();
+    expect(parseGoogleApiError(null)).toBeNull();
+    expect(parseGoogleApiError({})).toBeNull();
+  });
+
+  it('should parse a standard gaxios error', () => {
+    const mockError = {
+      response: {
+        status: 429,
+        data: {
+          error: {
+            code: 429,
+            message: 'Quota exceeded',
+            details: [
+              {
+                '@type': 'type.googleapis.com/google.rpc.QuotaFailure',
+                violations: [{ subject: 'user', description: 'daily limit' }],
+              },
+            ],
+          },
+        },
+      },
+    };
+
+    const parsed = parseGoogleApiError(mockError);
+    expect(parsed).not.toBeNull();
+    expect(parsed?.code).toBe(429);
+    expect(parsed?.message).toBe('Quota exceeded');
+    expect(parsed?.details).toHaveLength(1);
+    const detail = parsed?.details[0] as QuotaFailure;
+    expect(detail['@type']).toBe('type.googleapis.com/google.rpc.QuotaFailure');
+    expect(detail.violations[0].description).toBe('daily limit');
+  });
+
+  it('should parse an error with details stringified in the message', () => {
+    const innerError = {
+      error: {
+        code: 429,
+        message: 'Inner quota message',
+        details: [
+          {
+            '@type': 'type.googleapis.com/google.rpc.RetryInfo',
+            retryDelay: '10s',
+          },
+        ],
+      },
+    };
+
+    const mockError = {
+      response: {
+        status: 429,
+        data: {
+          error: {
+            code: 429,
+            message: JSON.stringify(innerError),
+            details: [], // Top-level details are empty
+          },
+        },
+      },
+    };
+
+    const parsed = parseGoogleApiError(mockError);
+    expect(parsed).not.toBeNull();
+    expect(parsed?.code).toBe(429);
+    expect(parsed?.message).toBe('Inner quota message');
+    expect(parsed?.details).toHaveLength(1);
+    expect(parsed?.details[0]['@type']).toBe(
+      'type.googleapis.com/google.rpc.RetryInfo',
+    );
+  });
+
+  it('should return null if details are not in the expected format', () => {
+    const mockError = {
+      response: {
+        status: 400,
+        data: {
+          error: {
+            code: 400,
+            message: 'Bad Request',
+            details: 'just a string', // Invalid details format
+          },
+        },
+      },
+    };
+    expect(parseGoogleApiError(mockError)).toBeNull();
+  });
+
+  it('should return null if there are no valid details', () => {
+    const mockError = {
+      response: {
+        status: 400,
+        data: {
+          error: {
+            code: 400,
+            message: 'Bad Request',
+            details: [
+              {
+                // missing '@type'
+                reason: 'some reason',
+              },
+            ],
+          },
+        },
+      },
+    };
+    expect(parseGoogleApiError(mockError)).toBeNull();
+  });
+
+  it('should parse a doubly nested error in the message', () => {
+    const innerError = {
+      error: {
+        code: 429,
+        message: 'Innermost quota message',
+        details: [
+          {
+            '@type': 'type.googleapis.com/google.rpc.RetryInfo',
+            retryDelay: '20s',
+          },
+        ],
+      },
+    };
+
+    const middleError = {
+      error: {
+        code: 429,
+        message: JSON.stringify(innerError),
+        details: [],
+      },
+    };
+
+    const mockError = {
+      response: {
+        status: 429,
+        data: {
+          error: {
+            code: 429,
+            message: JSON.stringify(middleError),
+            details: [],
+          },
+        },
+      },
+    };
+
+    const parsed = parseGoogleApiError(mockError);
+    expect(parsed).not.toBeNull();
+    expect(parsed?.code).toBe(429);
+    expect(parsed?.message).toBe('Innermost quota message');
+    expect(parsed?.details).toHaveLength(1);
+    expect(parsed?.details[0]['@type']).toBe(
+      'type.googleapis.com/google.rpc.RetryInfo',
+    );
+  });
+
+  it('should parse an error that is not in a response object', () => {
+    const innerError = {
+      error: {
+        code: 429,
+        message: 'Innermost quota message',
+        details: [
+          {
+            '@type': 'type.googleapis.com/google.rpc.RetryInfo',
+            retryDelay: '20s',
+          },
+        ],
+      },
+    };
+
+    const mockError = {
+      error: {
+        code: 429,
+        message: JSON.stringify(innerError),
+        details: [],
+      },
+    };
+
+    const parsed = parseGoogleApiError(mockError);
+    expect(parsed).not.toBeNull();
+    expect(parsed?.code).toBe(429);
+    expect(parsed?.message).toBe('Innermost quota message');
+    expect(parsed?.details).toHaveLength(1);
+    expect(parsed?.details[0]['@type']).toBe(
+      'type.googleapis.com/google.rpc.RetryInfo',
+    );
+  });
+
+  it('should parse an error that is a JSON string', () => {
+    const innerError = {
+      error: {
+        code: 429,
+        message: 'Innermost quota message',
+        details: [
+          {
+            '@type': 'type.googleapis.com/google.rpc.RetryInfo',
+            retryDelay: '20s',
+          },
+        ],
+      },
+    };
+
+    const mockError = {
+      error: {
+        code: 429,
+        message: JSON.stringify(innerError),
+        details: [],
+      },
+    };
+
+    const parsed = parseGoogleApiError(JSON.stringify(mockError));
+    expect(parsed).not.toBeNull();
+    expect(parsed?.code).toBe(429);
+    expect(parsed?.message).toBe('Innermost quota message');
+    expect(parsed?.details).toHaveLength(1);
+    expect(parsed?.details[0]['@type']).toBe(
+      'type.googleapis.com/google.rpc.RetryInfo',
+    );
+  });
+
+  it('should parse the user-provided nested error string', () => {
+    const userErrorString =
+      '{"error":{"message":"{\\n  \\"error\\": {\\n    \\"code\\": 429,\\n    \\"message\\": \\"You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits.\\\\n* Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_paid_tier_input_token_count, limit: 10000\\\\nPlease retry in 40.025771073s.\\",\\n    \\"status\\": \\"RESOURCE_EXHAUSTED\\",\\n    \\"details\\": [\\n      {\\n        \\"@type\\": \\"type.googleapis.com/google.rpc.DebugInfo\\",\\n        \\"detail\\": \\"[ORIGINAL ERROR] generic::resource_exhausted: You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits.\\\\n* Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_paid_tier_input_token_count, limit: 10000\\\\nPlease retry in 40.025771073s. [google.rpc.error_details_ext] { message: \\\\\\"You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits.\\\\\\\\n* Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_paid_tier_input_token_count, limit: 10000\\\\\\\\nPlease retry in 40.025771073s.\\\\\\" }\\"\\n      },\\n      {\\n        \\"@type\\": \\"type.googleapis.com/google.rpc.QuotaFailure\\",\\n        \\"violations\\": [\\n          {\\n            \\"quotaMetric\\": \\"generativelanguage.googleapis.com/generate_content_paid_tier_input_token_count\\",\\n            \\"quotaId\\": \\"GenerateContentPaidTierInputTokensPerModelPerMinute\\",\\n            \\"quotaDimensions\\": {\\n              \\"location\\": \\"global\\",\\n              \\"model\\": \\"gemini-2.5-pro\\"\\n            },\\n            \\"quotaValue\\": \\"10000\\"\\n          }\\n        ]\\n      },\\n      {\\n        \\"@type\\": \\"type.googleapis.com/google.rpc.Help\\",\\n        \\"links\\": [\\n          {\\n            \\"description\\": \\"Learn more about Gemini API quotas\\",\\n            \\"url\\": \\"https://ai.google.dev/gemini-api/docs/rate-limits\\"\\n          }\\n        ]\\n      },\\n      {\\n        \\"@type\\": \\"type.googleapis.com/google.rpc.RetryInfo\\",\\n        \\"retryDelay\\": \\"40s\\"\\n      }\\n    ]\\n  }\\n}\\n","code":429,"status":"Too Many Requests"}}';
+
+    const parsed = parseGoogleApiError(userErrorString);
+    expect(parsed).not.toBeNull();
+    expect(parsed?.code).toBe(429);
+    expect(parsed?.message).toContain('You exceeded your current quota');
+    expect(parsed?.details).toHaveLength(4);
+    expect(
+      parsed?.details.some(
+        (d) => d['@type'] === 'type.googleapis.com/google.rpc.QuotaFailure',
+      ),
+    ).toBe(true);
+    expect(
+      parsed?.details.some(
+        (d) => d['@type'] === 'type.googleapis.com/google.rpc.RetryInfo',
+      ),
+    ).toBe(true);
+  });
+
+  it('should parse an error that is an array', () => {
+    const mockError = [
+      {
+        error: {
+          code: 429,
+          message: 'Quota exceeded',
+          details: [
+            {
+              '@type': 'type.googleapis.com/google.rpc.QuotaFailure',
+              violations: [{ subject: 'user', description: 'daily limit' }],
+            },
+          ],
+        },
+      },
+    ];
+
+    const parsed = parseGoogleApiError(mockError);
+    expect(parsed).not.toBeNull();
+    expect(parsed?.code).toBe(429);
+    expect(parsed?.message).toBe('Quota exceeded');
+  });
+
+  it('should parse a gaxios error where data is an array', () => {
+    const mockError = {
+      response: {
+        status: 429,
+        data: [
+          {
+            error: {
+              code: 429,
+              message: 'Quota exceeded',
+              details: [
+                {
+                  '@type': 'type.googleapis.com/google.rpc.QuotaFailure',
+                  violations: [{ subject: 'user', description: 'daily limit' }],
+                },
+              ],
+            },
+          },
+        ],
+      },
+    };
+
+    const parsed = parseGoogleApiError(mockError);
+    expect(parsed).not.toBeNull();
+    expect(parsed?.code).toBe(429);
+    expect(parsed?.message).toBe('Quota exceeded');
+  });
+
+  it('should parse a gaxios error where data is a stringified array', () => {
+    const mockError = {
+      response: {
+        status: 429,
+        data: JSON.stringify([
+          {
+            error: {
+              code: 429,
+              message: 'Quota exceeded',
+              details: [
+                {
+                  '@type': 'type.googleapis.com/google.rpc.QuotaFailure',
+                  violations: [{ subject: 'user', description: 'daily limit' }],
+                },
+              ],
+            },
+          },
+        ]),
+      },
+    };
+
+    const parsed = parseGoogleApiError(mockError);
+    expect(parsed).not.toBeNull();
+    expect(parsed?.code).toBe(429);
+    expect(parsed?.message).toBe('Quota exceeded');
+  });
+
+  it('should parse an error with a malformed @type key (returned by Gemini API)', () => {
+    const malformedError = {
+      name: 'API Error',
+      message: {
+        error: {
+          message:
+            '{\n  "error": {\n    "code": 429,\n    "message": "You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits.\\n* Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_free_tier_requests, limit: 2\nPlease retry in 54.887755558s.",\n    "status": "RESOURCE_EXHAUSTED",\n    "details": [\n      {\n        " @type": "type.googleapis.com/google.rpc.DebugInfo",\n        "detail": "[ORIGINAL ERROR] generic::resource_exhausted: You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits.\\n* Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_free_tier_requests, limit: 2\\nPlease retry in 54.887755558s. [google.rpc.error_details_ext] { message: \\"You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits.\\\\n* Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_free_tier_requests, limit: 2\\\\nPlease retry in 54.887755558s.\\" }"\n      },\n      {\n" @type": "type.googleapis.com/google.rpc.QuotaFailure",\n        "violations": [\n          {\n            "quotaMetric": "generativelanguage.googleapis.com/generate_content_free_tier_requests",\n            "quotaId": "GenerateRequestsPerMinutePerProjectPerModel-FreeTier",\n            "quotaDimensions": {\n              "location": "global",\n"model": "gemini-2.5-pro"\n            },\n            "quotaValue": "2"\n          }\n        ]\n      },\n      {\n" @type": "type.googleapis.com/google.rpc.Help",\n        "links": [\n          {\n            "description": "Learn more about Gemini API quotas",\n            "url": "https://ai.google.dev/gemini-api/docs/rate-limits"\n          }\n        ]\n      },\n      {\n" @type": "type.googleapis.com/google.rpc.RetryInfo",\n        "retryDelay": "54s"\n      }\n    ]\n  }\n}\n',
+          code: 429,
+          status: 'Too Many Requests',
+        },
+      },
+    };
+
+    const parsed = parseGoogleApiError(malformedError);
+    expect(parsed).not.toBeNull();
+    expect(parsed?.code).toBe(429);
+    expect(parsed?.message).toContain('You exceeded your current quota');
+    expect(parsed?.details).toHaveLength(4);
+    expect(
+      parsed?.details.some(
+        (d) => d['@type'] === 'type.googleapis.com/google.rpc.QuotaFailure',
+      ),
+    ).toBe(true);
+    expect(
+      parsed?.details.some(
+        (d) => d['@type'] === 'type.googleapis.com/google.rpc.RetryInfo',
+      ),
+    ).toBe(true);
+  });
+});
diff --git a/packages/core/src/utils/googleErrors.ts b/packages/core/src/utils/googleErrors.ts
new file mode 100644
index 0000000000..d7c15ac0b6
--- /dev/null
+++ b/packages/core/src/utils/googleErrors.ts
@@ -0,0 +1,305 @@
+/**
+ * @license
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+/**
+ * @fileoverview
+ * This file contains types and functions for parsing structured Google API errors.
+ */
+
+/**
+ * Based on google/rpc/error_details.proto
+ */
+
+export interface ErrorInfo {
+  '@type': 'type.googleapis.com/google.rpc.ErrorInfo';
+  reason: string;
+  domain: string;
+  metadata: { [key: string]: string };
+}
+
+export interface RetryInfo {
+  '@type': 'type.googleapis.com/google.rpc.RetryInfo';
+  retryDelay: string; // e.g. "51820.638305887s"
+}
+
+export interface DebugInfo {
+  '@type': 'type.googleapis.com/google.rpc.DebugInfo';
+  stackEntries: string[];
+  detail: string;
+}
+
+export interface QuotaFailure {
+  '@type': 'type.googleapis.com/google.rpc.QuotaFailure';
+  violations: Array<{
+    subject?: string;
+    description?: string;
+    apiService?: string;
+    quotaMetric?: string;
+    quotaId?: string;
+    quotaDimensions?: { [key: string]: string };
+    quotaValue?: string | number;
+    futureQuotaValue?: number;
+  }>;
+}
+
+export interface PreconditionFailure {
+  '@type': 'type.googleapis.com/google.rpc.PreconditionFailure';
+  violations: Array<{
+    type: string;
+    subject: string;
+    description: string;
+  }>;
+}
+
+export interface LocalizedMessage {
+  '@type': 'type.googleapis.com/google.rpc.LocalizedMessage';
+  locale: string;
+  message: string;
+}
+
+export interface BadRequest {
+  '@type': 'type.googleapis.com/google.rpc.BadRequest';
+  fieldViolations: Array<{
+    field: string;
+    description: string;
+    reason?: string;
+    localizedMessage?: LocalizedMessage;
+  }>;
+}
+
+export interface RequestInfo {
+  '@type': 'type.googleapis.com/google.rpc.RequestInfo';
+  requestId: string;
+  servingData: string;
+}
+
+export interface ResourceInfo {
+  '@type': 'type.googleapis.com/google.rpc.ResourceInfo';
+  resourceType: string;
+  resourceName: string;
+  owner: string;
+  description: string;
+}
+
+export interface Help {
+  '@type': 'type.googleapis.com/google.rpc.Help';
+  links: Array<{
+    description: string;
+    url: string;
+  }>;
+}
+
+export type GoogleApiErrorDetail =
+  | ErrorInfo
+  | RetryInfo
+  | DebugInfo
+  | QuotaFailure
+  | PreconditionFailure
+  | BadRequest
+  | RequestInfo
+  | ResourceInfo
+  | Help
+  | LocalizedMessage;
+
+export interface GoogleApiError {
+  code: number;
+  message: string;
+  details: GoogleApiErrorDetail[];
+}
+
+type ErrorShape = {
+  message?: string;
+  details?: unknown[];
+  code?: number;
+};
+
+/**
+ * Parses an error object to check if it's a structured Google API error
+ * and extracts all details.
+ *
+ * This function can handle two formats:
+ * 1. Standard Google API errors where `details` is a top-level field.
+ * 2. Errors where the entire structured error object is stringified inside
+ *    the `message` field of a wrapper error.
+ *
+ * @param error The error object to inspect.
+ * @returns A GoogleApiError object if the error matches, otherwise null.
+ */
+export function parseGoogleApiError(error: unknown): GoogleApiError | null {
+  if (!error) {
+    return null;
+  }
+
+  let errorObj: unknown = error;
+
+  // If error is a string, try to parse it.
+  if (typeof errorObj === 'string') {
+    try {
+      errorObj = JSON.parse(errorObj);
+    } catch (_) {
+      // Not a JSON string, can't parse.
+      return null;
+    }
+  }
+
+  if (Array.isArray(errorObj) && errorObj.length > 0) {
+    errorObj = errorObj[0];
+  }
+
+  if (typeof errorObj !== 'object' || errorObj === null) {
+    return null;
+  }
+
+  let currentError: ErrorShape | undefined =
+    fromGaxiosError(errorObj) ?? fromApiError(errorObj);
+
+  let depth = 0;
+  const maxDepth = 10;
+  // Handle cases where the actual error object is stringified inside the message
+  // by drilling down until we find an error that doesn't have a stringified message.
+  while (
+    currentError &&
+    typeof currentError.message === 'string' &&
+    depth < maxDepth
+  ) {
+    try {
+      const parsedMessage = JSON.parse(
+        currentError.message.replace(/\u00A0/g, '').replace(/\n/g, ' '),
+      );
+      if (parsedMessage.error) {
+        currentError = parsedMessage.error;
+        depth++;
+      } else {
+        // The message is a JSON string, but not a nested error object.
+        break;
+      }
+    } catch (_error) {
+      // It wasn't a JSON string, so we've drilled down as far as we can.
+      break;
+    }
+  }
+
+  if (!currentError) {
+    return null;
+  }
+
+  const code = currentError.code;
+  const message = currentError.message;
+  const errorDetails = currentError.details;
+
+  if (Array.isArray(errorDetails) && code && message) {
+    const details: GoogleApiErrorDetail[] = [];
+    for (const detail of errorDetails) {
+      if (detail && typeof detail === 'object') {
+        const detailObj = detail as Record<string, unknown>;
+        const typeKey = Object.keys(detailObj).find(
+          (key) => key.trim() === '@type',
+        );
+        if (typeKey) {
+          if (typeKey !== '@type') {
+            detailObj['@type'] = detailObj[typeKey];
+            delete detailObj[typeKey];
+          }
+          // We can just cast it; the consumer will have to switch on @type
+          details.push(detailObj as unknown as GoogleApiErrorDetail);
+        }
+      }
+    }
+
+    if (details.length > 0) {
+      return {
+        code,
+        message,
+        details,
+      };
+    }
+  }
+
+  return null;
+}
+
+function fromGaxiosError(errorObj: object): ErrorShape | undefined {
+  const gaxiosError = errorObj as {
+    response?: {
+      status?: number;
+      data?:
+        | {
+            error?: ErrorShape;
+          }
+        | string;
+    };
+    error?: ErrorShape;
+    code?: number;
+  };
+
+  let outerError: ErrorShape | undefined;
+  if (gaxiosError.response?.data) {
+    let data = gaxiosError.response.data;
+
+    if (typeof data === 'string') {
+      try {
+        data = JSON.parse(data);
+      } catch (_) {
+        // Not a JSON string, can't parse.
+      }
+    }
+
+    if (Array.isArray(data) && data.length > 0) {
+      data = data[0];
+    }
+
+    if (typeof data === 'object' && data !== null) {
+      if ('error' in data) {
+        outerError = (data as { error: ErrorShape }).error;
+      }
+    }
+  }
+
+  if (!outerError) {
+    // If the gaxios structure isn't there, check for a top-level `error` property.
+    if (gaxiosError.error) {
+      outerError = gaxiosError.error;
+    } else {
+      return undefined;
+    }
+  }
+  return outerError;
+}
+
+function fromApiError(errorObj: object): ErrorShape | undefined {
+  const apiError = errorObj as {
+    message?:
+      | {
+          error?: ErrorShape;
+        }
+      | string;
+    code?: number;
+  };
+
+  let outerError: ErrorShape | undefined;
+  if (apiError.message) {
+    let data = apiError.message;
+
+    if (typeof data === 'string') {
+      try {
+        data = JSON.parse(data);
+      } catch (_) {
+        // Not a JSON string, can't parse.
+      }
+    }
+
+    if (Array.isArray(data) && data.length > 0) {
+      data = data[0];
+    }
+
+    if (typeof data === 'object' && data !== null) {
+      if ('error' in data) {
+        outerError = (data as { error: ErrorShape }).error;
+      }
+    }
+  }
+  return outerError;
+}
diff --git a/packages/core/src/utils/googleQuotaErrors.test.ts b/packages/core/src/utils/googleQuotaErrors.test.ts
new file mode 100644
index 0000000000..cc5e5de43a
--- /dev/null
+++ b/packages/core/src/utils/googleQuotaErrors.test.ts
@@ -0,0 +1,306 @@
+/**
+ * @license
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import { describe, it, expect, vi, afterEach } from 'vitest';
+import {
+  classifyGoogleError,
+  RetryableQuotaError,
+  TerminalQuotaError,
+} from './googleQuotaErrors.js';
+import * as errorParser from './googleErrors.js';
+import type { GoogleApiError } from './googleErrors.js';
+
+describe('classifyGoogleError', () => {
+  afterEach(() => {
+    vi.restoreAllMocks();
+  });
+
+  it('should return original error if not a Google API error', () => {
+    const regularError = new Error('Something went wrong');
+    vi.spyOn(errorParser, 'parseGoogleApiError').mockReturnValue(null);
+    const result = classifyGoogleError(regularError);
+    expect(result).toBe(regularError);
+  });
+
+  it('should return original error if code is not 429', () => {
+    const apiError: GoogleApiError = {
+      code: 500,
+      message: 'Server error',
+      details: [],
+    };
+    vi.spyOn(errorParser, 'parseGoogleApiError').mockReturnValue(apiError);
+    const originalError = new Error();
+    const result = classifyGoogleError(originalError);
+    expect(result).toBe(originalError);
+    expect(result).not.toBeInstanceOf(TerminalQuotaError);
+    expect(result).not.toBeInstanceOf(RetryableQuotaError);
+  });
+
+  it('should return TerminalQuotaError for daily quota violations in QuotaFailure', () => {
+    const apiError: GoogleApiError = {
+      code: 429,
+      message: 'Quota exceeded',
+      details: [
+        {
+          '@type': 'type.googleapis.com/google.rpc.QuotaFailure',
+          violations: [
+            {
+              subject: 'user',
+              description: 'daily limit',
+              quotaId: 'RequestsPerDay-limit',
+            },
+          ],
+        },
+      ],
+    };
+    vi.spyOn(errorParser, 'parseGoogleApiError').mockReturnValue(apiError);
+    const result = classifyGoogleError(new Error());
+    expect(result).toBeInstanceOf(TerminalQuotaError);
+    expect((result as TerminalQuotaError).cause).toBe(apiError);
+  });
+
+  it('should return TerminalQuotaError for daily quota violations in ErrorInfo', () => {
+    const apiError: GoogleApiError = {
+      code: 429,
+      message: 'Quota exceeded',
+      details: [
+        {
+          '@type': 'type.googleapis.com/google.rpc.ErrorInfo',
+          reason: 'QUOTA_EXCEEDED',
+          domain: 'googleapis.com',
+          metadata: {
+            quota_limit: 'RequestsPerDay_PerProject_PerUser',
+          },
+        },
+      ],
+    };
+    vi.spyOn(errorParser, 'parseGoogleApiError').mockReturnValue(apiError);
+    const result = classifyGoogleError(new Error());
+    expect(result).toBeInstanceOf(TerminalQuotaError);
+  });
+
+  it('should return TerminalQuotaError for long retry delays', () => {
+    const apiError: GoogleApiError = {
+      code: 429,
+      message: 'Too many requests',
+      details: [
+        {
+          '@type': 'type.googleapis.com/google.rpc.RetryInfo',
+          retryDelay: '301s', // > 5 minutes
+        },
+      ],
+    };
+    vi.spyOn(errorParser, 'parseGoogleApiError').mockReturnValue(apiError);
+    const result = classifyGoogleError(new Error());
+    expect(result).toBeInstanceOf(TerminalQuotaError);
+  });
+
+  it('should return RetryableQuotaError for short retry delays', () => {
+    const apiError: GoogleApiError = {
+      code: 429,
+      message: 'Too many requests',
+      details: [
+        {
+          '@type': 'type.googleapis.com/google.rpc.RetryInfo',
+          retryDelay: '45.123s',
+        },
+      ],
+    };
+    vi.spyOn(errorParser, 'parseGoogleApiError').mockReturnValue(apiError);
+    const result = classifyGoogleError(new Error());
+    expect(result).toBeInstanceOf(RetryableQuotaError);
+    expect((result as RetryableQuotaError).retryDelayMs).toBe(45123);
+  });
+
+  it('should return RetryableQuotaError for per-minute quota violations in QuotaFailure', () => {
+    const apiError: GoogleApiError = {
+      code: 429,
+      message: 'Quota exceeded',
+      details: [
+        {
+          '@type': 'type.googleapis.com/google.rpc.QuotaFailure',
+          violations: [
+            {
+              subject: 'user',
+              description: 'per minute limit',
+              quotaId: 'RequestsPerMinute-limit',
+            },
+          ],
+        },
+      ],
+    };
+    vi.spyOn(errorParser, 'parseGoogleApiError').mockReturnValue(apiError);
+    const result = classifyGoogleError(new Error());
+    expect(result).toBeInstanceOf(RetryableQuotaError);
+    expect((result as RetryableQuotaError).retryDelayMs).toBe(60000);
+  });
+
+  it('should return RetryableQuotaError for per-minute quota violations in ErrorInfo', () => {
+    const apiError: GoogleApiError = {
+      code: 429,
+      message: 'Quota exceeded',
+      details: [
+        {
+          '@type': 'type.googleapis.com/google.rpc.ErrorInfo',
+          reason: 'QUOTA_EXCEEDED',
+          domain: 'googleapis.com',
+          metadata: {
+            quota_limit: 'RequestsPerMinute_PerProject_PerUser',
+          },
+        },
+      ],
+    };
+    vi.spyOn(errorParser, 'parseGoogleApiError').mockReturnValue(apiError);
+    const result = classifyGoogleError(new Error());
+    expect(result).toBeInstanceOf(RetryableQuotaError);
+    expect((result as RetryableQuotaError).retryDelayMs).toBe(60000);
+  });
+
+  it('should return RetryableQuotaError for another short retry delay', () => {
+    const apiError: GoogleApiError = {
+      code: 429,
+      message:
+        'You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits.\n* Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_free_tier_requests, limit: 2\nPlease retry in 56.185908122s.',
+      details: [
+        {
+          '@type': 'type.googleapis.com/google.rpc.QuotaFailure',
+          violations: [
+            {
+              quotaMetric:
+                'generativelanguage.googleapis.com/generate_content_free_tier_requests',
+              quotaId: 'GenerateRequestsPerMinutePerProjectPerModel-FreeTier',
+              quotaDimensions: {
+                location: 'global',
+                model: 'gemini-2.5-pro',
+              },
+              quotaValue: '2',
+            },
+          ],
+        },
+        {
+          '@type': 'type.googleapis.com/google.rpc.Help',
+          links: [
+            {
+              description: 'Learn more about Gemini API quotas',
+              url: 'https://ai.google.dev/gemini-api/docs/rate-limits',
+            },
+          ],
+        },
+        {
+          '@type': 'type.googleapis.com/google.rpc.RetryInfo',
+          retryDelay: '56s',
+        },
+      ],
+    };
+    vi.spyOn(errorParser, 'parseGoogleApiError').mockReturnValue(apiError);
+    const result = classifyGoogleError(new Error());
+    expect(result).toBeInstanceOf(RetryableQuotaError);
+    expect((result as RetryableQuotaError).retryDelayMs).toBe(56000);
+  });
+
+  it('should return RetryableQuotaError for Cloud Code RATE_LIMIT_EXCEEDED with retry delay', () => {
+    const apiError: GoogleApiError = {
+      code: 429,
+      message:
+        'You have exhausted your capacity on this model. Your quota will reset after 0s.',
+      details: [
+        {
+          '@type': 'type.googleapis.com/google.rpc.ErrorInfo',
+          reason: 'RATE_LIMIT_EXCEEDED',
+          domain: 'cloudcode-pa.googleapis.com',
+          metadata: {
+            uiMessage: 'true',
+            model: 'gemini-2.5-pro',
+            quotaResetDelay: '539.477544ms',
+            quotaResetTimeStamp: '2025-10-20T19:14:08Z',
+          },
+        },
+        {
+          '@type': 'type.googleapis.com/google.rpc.RetryInfo',
+          retryDelay: '0.539477544s',
+        },
+      ],
+    };
+    vi.spyOn(errorParser, 'parseGoogleApiError').mockReturnValue(apiError);
+    const result = classifyGoogleError(new Error());
+    expect(result).toBeInstanceOf(RetryableQuotaError);
+    expect((result as RetryableQuotaError).retryDelayMs).toBeCloseTo(
+      539.477544,
+    );
+  });
+
+  it('should return TerminalQuotaError for Cloud Code QUOTA_EXHAUSTED', () => {
+    const apiError: GoogleApiError = {
+      code: 429,
+      message:
+        'You have exhausted your capacity on this model. Your quota will reset after 0s.',
+      details: [
+        {
+          '@type': 'type.googleapis.com/google.rpc.ErrorInfo',
+          reason: 'QUOTA_EXHAUSTED',
+          domain: 'cloudcode-pa.googleapis.com',
+          metadata: {
+            uiMessage: 'true',
+            model: 'gemini-2.5-pro',
+            quotaResetDelay: '539.477544ms',
+            quotaResetTimeStamp: '2025-10-20T19:14:08Z',
+          },
+        },
+        {
+          '@type': 'type.googleapis.com/google.rpc.RetryInfo',
+          retryDelay: '0.539477544s',
+        },
+      ],
+    };
+    vi.spyOn(errorParser, 'parseGoogleApiError').mockReturnValue(apiError);
+    const result = classifyGoogleError(new Error());
+    expect(result).toBeInstanceOf(TerminalQuotaError);
+  });
+
+  it('should prioritize daily limit over retry info', () => {
+    const apiError: GoogleApiError = {
+      code: 429,
+      message: 'Quota exceeded',
+      details: [
+        {
+          '@type': 'type.googleapis.com/google.rpc.QuotaFailure',
+          violations: [
+            {
+              subject: 'user',
+              description: 'daily limit',
+              quotaId: 'RequestsPerDay-limit',
+            },
+          ],
+        },
+        {
+          '@type': 'type.googleapis.com/google.rpc.RetryInfo',
+          retryDelay: '10s',
+        },
+      ],
+    };
+    vi.spyOn(errorParser, 'parseGoogleApiError').mockReturnValue(apiError);
+    const result = classifyGoogleError(new Error());
+    expect(result).toBeInstanceOf(TerminalQuotaError);
+  });
+
+  it('should return original error for 429 without specific details', () => {
+    const apiError: GoogleApiError = {
+      code: 429,
+      message: 'Too many requests',
+      details: [
+        {
+          '@type': 'type.googleapis.com/google.rpc.DebugInfo',
+          detail: 'some debug info',
+          stackEntries: [],
+        },
+      ],
+    };
+    vi.spyOn(errorParser, 'parseGoogleApiError').mockReturnValue(apiError);
+    const originalError = new Error();
+    const result = classifyGoogleError(originalError);
+    expect(result).toBe(originalError);
+  });
+});
diff --git a/packages/core/src/utils/googleQuotaErrors.ts b/packages/core/src/utils/googleQuotaErrors.ts
new file mode 100644
index 0000000000..4de1a81710
--- /dev/null
+++ b/packages/core/src/utils/googleQuotaErrors.ts
@@ -0,0 +1,192 @@
+/**
+ * @license
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import type {
+  ErrorInfo,
+  GoogleApiError,
+  QuotaFailure,
+  RetryInfo,
+} from './googleErrors.js';
+import { parseGoogleApiError } from './googleErrors.js';
+
+/**
+ * A non-retryable error indicating a hard quota limit has been reached (e.g., daily limit).
+ */
+export class TerminalQuotaError extends Error {
+  constructor(
+    message: string,
+    override readonly cause: GoogleApiError,
+  ) {
+    super(message);
+    this.name = 'TerminalQuotaError';
+  }
+}
+
+/**
+ * A retryable error indicating a temporary quota issue (e.g., per-minute limit).
+ */
+export class RetryableQuotaError extends Error {
+  retryDelayMs: number;
+
+  constructor(
+    message: string,
+    override readonly cause: GoogleApiError,
+    retryDelaySeconds: number,
+  ) {
+    super(message);
+    this.name = 'RetryableQuotaError';
+    this.retryDelayMs = retryDelaySeconds * 1000;
+  }
+}
+
+/**
+ * Parses a duration string (e.g., "34.074824224s", "60s") and returns the time in seconds.
+ * @param duration The duration string to parse.
+ * @returns The duration in seconds, or null if parsing fails.
+ */
+function parseDurationInSeconds(duration: string): number | null {
+  if (!duration.endsWith('s')) {
+    return null;
+  }
+  const seconds = parseFloat(duration.slice(0, -1));
+  return isNaN(seconds) ? null : seconds;
+}
+
+/**
+ * Analyzes a caught error and classifies it as a specific quota-related error if applicable.
+ *
+ * It decides whether an error is a `TerminalQuotaError` or a `RetryableQuotaError` based on
+ * the following logic:
+ * - If the error indicates a daily limit, it's a `TerminalQuotaError`.
+ * - If the error suggests a retry delay of more than 2 minutes, it's a `TerminalQuotaError`.
+ * - If the error suggests a retry delay of 2 minutes or less, it's a `RetryableQuotaError`.
+ * - If the error indicates a per-minute limit, it's a `RetryableQuotaError`.
+ *
+ * @param error The error to classify.
+ * @returns A `TerminalQuotaError`, `RetryableQuotaError`, or the original `unknown` error.
+ */
+export function classifyGoogleError(error: unknown): unknown {
+  const googleApiError = parseGoogleApiError(error);
+
+  if (!googleApiError || googleApiError.code !== 429) {
+    return error; // Not a 429 error we can handle.
+  }
+
+  const quotaFailure = googleApiError.details.find(
+    (d): d is QuotaFailure =>
+      d['@type'] === 'type.googleapis.com/google.rpc.QuotaFailure',
+  );
+
+  const errorInfo = googleApiError.details.find(
+    (d): d is ErrorInfo =>
+      d['@type'] === 'type.googleapis.com/google.rpc.ErrorInfo',
+  );
+
+  const retryInfo = googleApiError.details.find(
+    (d): d is RetryInfo =>
+      d['@type'] === 'type.googleapis.com/google.rpc.RetryInfo',
+  );
+
+  // 1. Check for long-term limits in QuotaFailure or ErrorInfo
+  if (quotaFailure) {
+    for (const violation of quotaFailure.violations) {
+      const quotaId = violation.quotaId ?? '';
+      if (quotaId.includes('PerDay') || quotaId.includes('Daily')) {
+        return new TerminalQuotaError(
+          `${googleApiError.message}\nExpected quota reset within 24h.`,
+          googleApiError,
+        );
+      }
+    }
+  }
+
+  if (errorInfo) {
+    // New Cloud Code API quota handling
+    if (errorInfo.domain) {
+      const validDomains = [
+        'cloudcode-pa.googleapis.com',
+        'staging-cloudcode-pa.googleapis.com',
+        'autopush-cloudcode-pa.googleapis.com',
+      ];
+      if (validDomains.includes(errorInfo.domain)) {
+        if (errorInfo.reason === 'RATE_LIMIT_EXCEEDED') {
+          let delaySeconds = 10; // Default retry of 10s
+          if (retryInfo?.retryDelay) {
+            const parsedDelay = parseDurationInSeconds(retryInfo.retryDelay);
+            if (parsedDelay) {
+              delaySeconds = parsedDelay;
+            }
+          }
+          return new RetryableQuotaError(
+            `${googleApiError.message}`,
+            googleApiError,
+            delaySeconds,
+          );
+        }
+        if (errorInfo.reason === 'QUOTA_EXHAUSTED') {
+          return new TerminalQuotaError(
+            `${googleApiError.message}`,
+            googleApiError,
+          );
+        }
+      }
+    }
+
+    // Existing Cloud Code API quota handling
+    const quotaLimit = errorInfo.metadata?.['quota_limit'] ?? '';
+    if (quotaLimit.includes('PerDay') || quotaLimit.includes('Daily')) {
+      return new TerminalQuotaError(
+        `${googleApiError.message}\nExpected quota reset within 24h.`,
+        googleApiError,
+      );
+    }
+  }
+
+  // 2. Check for long delays in RetryInfo
+  if (retryInfo?.retryDelay) {
+    const delaySeconds = parseDurationInSeconds(retryInfo.retryDelay);
+    if (delaySeconds) {
+      if (delaySeconds > 120) {
+        return new TerminalQuotaError(
+          `${googleApiError.message}\nSuggested retry after ${retryInfo.retryDelay}.`,
+          googleApiError,
+        );
+      }
+      // This is a retryable error with a specific delay.
+      return new RetryableQuotaError(
+        `${googleApiError.message}\nSuggested retry after ${retryInfo.retryDelay}.`,
+        googleApiError,
+        delaySeconds,
+      );
+    }
+  }
+
+  // 3. Check for short-term limits in QuotaFailure or ErrorInfo
+  if (quotaFailure) {
+    for (const violation of quotaFailure.violations) {
+      const quotaId = violation.quotaId ?? '';
+      if (quotaId.includes('PerMinute')) {
+        return new RetryableQuotaError(
+          `${googleApiError.message}\nSuggested retry after 60s.`,
+          googleApiError,
+          60,
+        );
+      }
+    }
+  }
+
+  if (errorInfo) {
+    const quotaLimit = errorInfo.metadata?.['quota_limit'] ?? '';
+    if (quotaLimit.includes('PerMinute')) {
+      return new RetryableQuotaError(
+        `${errorInfo.reason}\nSuggested retry after 60s.`,
+        googleApiError,
+        60,
+      );
+    }
+  }
+  return error; // Fallback to original error if no specific classification fits.
+}
diff --git a/packages/core/src/utils/quotaErrorDetection.ts b/packages/core/src/utils/quotaErrorDetection.ts
index 6417e0db57..893e48b0f2 100644
--- a/packages/core/src/utils/quotaErrorDetection.ts
+++ b/packages/core/src/utils/quotaErrorDetection.ts
@@ -33,68 +33,3 @@ export function isStructuredError(error: unknown): error is StructuredError {
     typeof (error as StructuredError).message === 'string'
   );
 }
-
-export function isProQuotaExceededError(error: unknown): boolean {
-  // Check for Pro quota exceeded errors by looking for the specific pattern
-  // This will match patterns like:
-  // - "Quota exceeded for quota metric 'Gemini 2.5 Pro Requests'"
-  // - "Quota exceeded for quota metric 'Gemini 2.5-preview Pro Requests'"
-  // We use string methods instead of regex to avoid ReDoS vulnerabilities
-
-  const checkMessage = (message: string): boolean =>
-    message.includes("Quota exceeded for quota metric 'Gemini") &&
-    message.includes("Pro Requests'");
-
-  if (typeof error === 'string') {
-    return checkMessage(error);
-  }
-
-  if (isStructuredError(error)) {
-    return checkMessage(error.message);
-  }
-
-  if (isApiError(error)) {
-    return checkMessage(error.error.message);
-  }
-
-  // Check if it's a Gaxios error with response data
-  if (error && typeof error === 'object' && 'response' in error) {
-    const gaxiosError = error as {
-      response?: {
-        data?: unknown;
-      };
-    };
-    if (gaxiosError.response && gaxiosError.response.data) {
-      if (typeof gaxiosError.response.data === 'string') {
-        return checkMessage(gaxiosError.response.data);
-      }
-      if (
-        typeof gaxiosError.response.data === 'object' &&
-        gaxiosError.response.data !== null &&
-        'error' in gaxiosError.response.data
-      ) {
-        const errorData = gaxiosError.response.data as {
-          error?: { message?: string };
-        };
-        return checkMessage(errorData.error?.message || '');
-      }
-    }
-  }
-  return false;
-}
-
-export function isGenericQuotaExceededError(error: unknown): boolean {
-  if (typeof error === 'string') {
-    return error.includes('Quota exceeded for quota metric');
-  }
-
-  if (isStructuredError(error)) {
-    return error.message.includes('Quota exceeded for quota metric');
-  }
-
-  if (isApiError(error)) {
-    return error.error.message.includes('Quota exceeded for quota metric');
-  }
-
-  return false;
-}
diff --git a/packages/core/src/utils/retry.test.ts b/packages/core/src/utils/retry.test.ts
index 13af50b475..e0297e8903 100644
--- a/packages/core/src/utils/retry.test.ts
+++ b/packages/core/src/utils/retry.test.ts
@@ -7,10 +7,15 @@
 /* eslint-disable @typescript-eslint/no-explicit-any */
 import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
 import { ApiError } from '@google/genai';
+import { AuthType } from '../core/contentGenerator.js';
 import type { HttpError } from './retry.js';
 import { retryWithBackoff } from './retry.js';
 import { setSimulate429 } from './testUtils.js';
 import { debugLogger } from './debugLogger.js';
+import {
+  TerminalQuotaError,
+  RetryableQuotaError,
+} from './googleQuotaErrors.js';
 
 // Helper to create a mock function that fails a certain number of times
 const createFailingFunction = (
@@ -100,26 +105,26 @@ describe('retryWithBackoff', () => {
 
     // Expect it to fail with the error from the 5th attempt.
     await Promise.all([
-      expect(promise).rejects.toThrow('Simulated error attempt 5'),
+      expect(promise).rejects.toThrow('Simulated error attempt 3'),
       vi.runAllTimersAsync(),
     ]);
 
-    expect(mockFn).toHaveBeenCalledTimes(5);
+    expect(mockFn).toHaveBeenCalledTimes(3);
   });
 
-  it('should default to 5 maxAttempts if options.maxAttempts is undefined', async () => {
-    // This function will fail more than 5 times to ensure all retries are used.
+  it('should default to 3 maxAttempts if options.maxAttempts is undefined', async () => {
+    // This function will fail more than 3 times to ensure all retries are used.
     const mockFn = createFailingFunction(10);
 
     const promise = retryWithBackoff(mockFn, { maxAttempts: undefined });
 
     // Expect it to fail with the error from the 5th attempt.
     await Promise.all([
-      expect(promise).rejects.toThrow('Simulated error attempt 5'),
+      expect(promise).rejects.toThrow('Simulated error attempt 3'),
       vi.runAllTimersAsync(),
     ]);
 
-    expect(mockFn).toHaveBeenCalledTimes(5);
+    expect(mockFn).toHaveBeenCalledTimes(3);
   });
 
   it('should not retry if shouldRetry returns false', async () => {
@@ -336,15 +341,13 @@ describe('retryWithBackoff', () => {
   });
 
   describe('Flash model fallback for OAuth users', () => {
-    it('should trigger fallback for OAuth personal users after persistent 429 errors', async () => {
+    it('should trigger fallback for OAuth personal users on TerminalQuotaError', async () => {
       const fallbackCallback = vi.fn().mockResolvedValue('gemini-2.5-flash');
 
       let fallbackOccurred = false;
       const mockFn = vi.fn().mockImplementation(async () => {
         if (!fallbackOccurred) {
-          const error: HttpError = new Error('Rate limit exceeded');
-          error.status = 429;
-          throw error;
+          throw new TerminalQuotaError('Daily limit reached', {} as any);
         }
         return 'success';
       });
@@ -352,154 +355,62 @@ describe('retryWithBackoff', () => {
       const promise = retryWithBackoff(mockFn, {
         maxAttempts: 3,
         initialDelayMs: 100,
-        onPersistent429: async (authType?: string) => {
+        onPersistent429: async (authType?: string, error?: unknown) => {
           fallbackOccurred = true;
-          return await fallbackCallback(authType);
+          return await fallbackCallback(authType, error);
         },
         authType: 'oauth-personal',
       });
 
-      // Advance all timers to complete retries
-      await vi.runAllTimersAsync();
-
-      // Should succeed after fallback
-      await expect(promise).resolves.toBe('success');
-
-      // Verify callback was called with correct auth type
-      expect(fallbackCallback).toHaveBeenCalledWith('oauth-personal');
-
-      // Should retry again after fallback
-      expect(mockFn).toHaveBeenCalledTimes(3); // 2 initial attempts + 1 after fallback
-    });
-
-    it('should NOT trigger fallback for API key users', async () => {
-      const fallbackCallback = vi.fn();
-
-      const mockFn = vi.fn(async () => {
-        const error: HttpError = new Error('Rate limit exceeded');
-        error.status = 429;
-        throw error;
-      });
-
-      const promise = retryWithBackoff(mockFn, {
-        maxAttempts: 3,
-        initialDelayMs: 100,
-        onPersistent429: fallbackCallback,
-        authType: 'gemini-api-key',
-      });
-
-      // Handle the promise properly to avoid unhandled rejections
-      const resultPromise = promise.catch((error) => error);
-      await vi.runAllTimersAsync();
-      const result = await resultPromise;
-
-      // Should fail after all retries without fallback
-      expect(result).toBeInstanceOf(Error);
-      expect(result.message).toBe('Rate limit exceeded');
-
-      // Callback should not be called for API key users
-      expect(fallbackCallback).not.toHaveBeenCalled();
-    });
-
-    it('should reset attempt counter and continue after successful fallback', async () => {
-      let fallbackCalled = false;
-      const fallbackCallback = vi.fn().mockImplementation(async () => {
-        fallbackCalled = true;
-        return 'gemini-2.5-flash';
-      });
-
-      const mockFn = vi.fn().mockImplementation(async () => {
-        if (!fallbackCalled) {
-          const error: HttpError = new Error('Rate limit exceeded');
-          error.status = 429;
-          throw error;
-        }
-        return 'success';
-      });
-
-      const promise = retryWithBackoff(mockFn, {
-        maxAttempts: 3,
-        initialDelayMs: 100,
-        onPersistent429: fallbackCallback,
-        authType: 'oauth-personal',
-      });
-
       await vi.runAllTimersAsync();
 
       await expect(promise).resolves.toBe('success');
-      expect(fallbackCallback).toHaveBeenCalledOnce();
-    });
-
-    it('should continue with original error if fallback is rejected', async () => {
-      const fallbackCallback = vi.fn().mockResolvedValue(null); // User rejected fallback
-
-      const mockFn = vi.fn(async () => {
-        const error: HttpError = new Error('Rate limit exceeded');
-        error.status = 429;
-        throw error;
-      });
-
-      const promise = retryWithBackoff(mockFn, {
-        maxAttempts: 3,
-        initialDelayMs: 100,
-        onPersistent429: fallbackCallback,
-        authType: 'oauth-personal',
-      });
-
-      // Handle the promise properly to avoid unhandled rejections
-      const resultPromise = promise.catch((error) => error);
-      await vi.runAllTimersAsync();
-      const result = await resultPromise;
-
-      // Should fail with original error when fallback is rejected
-      expect(result).toBeInstanceOf(Error);
-      expect(result.message).toBe('Rate limit exceeded');
       expect(fallbackCallback).toHaveBeenCalledWith(
         'oauth-personal',
-        expect.any(Error),
+        expect.any(TerminalQuotaError),
       );
+      expect(mockFn).toHaveBeenCalledTimes(2);
     });
 
-    it('should handle mixed error types (only count consecutive 429s)', async () => {
-      const fallbackCallback = vi.fn().mockResolvedValue('gemini-2.5-flash');
-      let attempts = 0;
-      let fallbackOccurred = false;
-
+    it('should use retryDelayMs from RetryableQuotaError', async () => {
+      const setTimeoutSpy = vi.spyOn(global, 'setTimeout');
       const mockFn = vi.fn().mockImplementation(async () => {
-        attempts++;
-        if (fallbackOccurred) {
-          return 'success';
-        }
-        if (attempts === 1) {
-          // First attempt: 500 error (resets consecutive count)
-          const error: HttpError = new Error('Server error');
-          error.status = 500;
-          throw error;
-        } else {
-          // Remaining attempts: 429 errors
-          const error: HttpError = new Error('Rate limit exceeded');
-          error.status = 429;
-          throw error;
-        }
+        throw new RetryableQuotaError('Per-minute limit', {} as any, 12.345);
       });
 
       const promise = retryWithBackoff(mockFn, {
-        maxAttempts: 5,
+        maxAttempts: 2,
         initialDelayMs: 100,
-        onPersistent429: async (authType?: string) => {
-          fallbackOccurred = true;
-          return await fallbackCallback(authType);
-        },
-        authType: 'oauth-personal',
       });
 
+      // Attach the rejection expectation *before* running timers
+      // eslint-disable-next-line vitest/valid-expect
+      const assertionPromise = expect(promise).rejects.toThrow();
       await vi.runAllTimersAsync();
+      await assertionPromise;
 
-      await expect(promise).resolves.toBe('success');
-
-      // Should trigger fallback after 2 consecutive 429s (attempts 2-3)
-      expect(fallbackCallback).toHaveBeenCalledWith('oauth-personal');
+      expect(setTimeoutSpy).toHaveBeenCalledWith(expect.any(Function), 12345);
     });
+
+    it.each([[AuthType.USE_GEMINI], [AuthType.USE_VERTEX_AI], [undefined]])(
+      'should not trigger fallback for non-Google auth users (authType: %s) on TerminalQuotaError',
+      async (authType) => {
+        const fallbackCallback = vi.fn();
+        const mockFn = vi.fn().mockImplementation(async () => {
+          throw new TerminalQuotaError('Daily limit reached', {} as any);
+        });
+
+        const promise = retryWithBackoff(mockFn, {
+          maxAttempts: 3,
+          onPersistent429: fallbackCallback,
+          authType,
+        });
+
+        await expect(promise).rejects.toThrow('Daily limit reached');
+        expect(fallbackCallback).not.toHaveBeenCalled();
+        expect(mockFn).toHaveBeenCalledTimes(1);
+      },
+    );
   });
   it('should abort the retry loop when the signal is aborted', async () => {
     const abortController = new AbortController();
diff --git a/packages/core/src/utils/retry.ts b/packages/core/src/utils/retry.ts
index 70afe42f5d..edb8f9bb85 100644
--- a/packages/core/src/utils/retry.ts
+++ b/packages/core/src/utils/retry.ts
@@ -8,9 +8,10 @@ import type { GenerateContentResponse } from '@google/genai';
 import { ApiError } from '@google/genai';
 import { AuthType } from '../core/contentGenerator.js';
 import {
-  isProQuotaExceededError,
-  isGenericQuotaExceededError,
-} from './quotaErrorDetection.js';
+  classifyGoogleError,
+  RetryableQuotaError,
+  TerminalQuotaError,
+} from './googleQuotaErrors.js';
 import { delay, createAbortError } from './delay.js';
 import { debugLogger } from './debugLogger.js';
 
@@ -37,7 +38,7 @@ export interface RetryOptions {
 }
 
 const DEFAULT_RETRY_OPTIONS: RetryOptions = {
-  maxAttempts: 5,
+  maxAttempts: 3,
   initialDelayMs: 5000,
   maxDelayMs: 30000, // 30 seconds
   shouldRetryOnError: defaultShouldRetry,
@@ -118,7 +119,6 @@ export async function retryWithBackoff<T>(
 
   let attempt = 0;
   let currentDelay = initialDelayMs;
-  let consecutive429Count = 0;
 
   while (attempt < maxAttempts) {
     if (signal?.aborted) {
@@ -145,94 +145,54 @@ export async function retryWithBackoff<T>(
         throw error;
       }
 
-      const errorStatus = getErrorStatus(error);
+      const classifiedError = classifyGoogleError(error);
 
-      // Check for Pro quota exceeded error first - immediate fallback for OAuth users
-      if (
-        errorStatus === 429 &&
-        authType === AuthType.LOGIN_WITH_GOOGLE &&
-        isProQuotaExceededError(error) &&
-        onPersistent429
-      ) {
-        try {
-          const fallbackModel = await onPersistent429(authType, error);
-          if (fallbackModel !== false && fallbackModel !== null) {
-            // Reset attempt counter and try with new model
-            attempt = 0;
-            consecutive429Count = 0;
-            currentDelay = initialDelayMs;
-            // With the model updated, we continue to the next attempt
-            continue;
-          } else {
-            // Fallback handler returned null/false, meaning don't continue - stop retry process
-            throw error;
+      if (classifiedError instanceof TerminalQuotaError) {
+        if (onPersistent429 && authType === AuthType.LOGIN_WITH_GOOGLE) {
+          try {
+            const fallbackModel = await onPersistent429(
+              authType,
+              classifiedError,
+            );
+            if (fallbackModel) {
+              attempt = 0; // Reset attempts and retry with the new model.
+              currentDelay = initialDelayMs;
+              continue;
+            }
+          } catch (fallbackError) {
+            debugLogger.warn('Fallback to Flash model failed:', fallbackError);
           }
-        } catch (fallbackError) {
-          // If fallback fails, continue with original error
-          debugLogger.warn('Fallback to Flash model failed:', fallbackError);
         }
+        throw classifiedError; // Throw if no fallback or fallback failed.
       }
 
-      // Check for generic quota exceeded error (but not Pro, which was handled above) - immediate fallback for OAuth users
-      if (
-        errorStatus === 429 &&
-        authType === AuthType.LOGIN_WITH_GOOGLE &&
-        !isProQuotaExceededError(error) &&
-        isGenericQuotaExceededError(error) &&
-        onPersistent429
-      ) {
-        try {
-          const fallbackModel = await onPersistent429(authType, error);
-          if (fallbackModel !== false && fallbackModel !== null) {
-            // Reset attempt counter and try with new model
-            attempt = 0;
-            consecutive429Count = 0;
-            currentDelay = initialDelayMs;
-            // With the model updated, we continue to the next attempt
-            continue;
-          } else {
-            // Fallback handler returned null/false, meaning don't continue - stop retry process
-            throw error;
+      if (classifiedError instanceof RetryableQuotaError) {
+        if (attempt >= maxAttempts) {
+          if (onPersistent429 && authType === AuthType.LOGIN_WITH_GOOGLE) {
+            try {
+              const fallbackModel = await onPersistent429(
+                authType,
+                classifiedError,
+              );
+              if (fallbackModel) {
+                attempt = 0; // Reset attempts and retry with the new model.
+                currentDelay = initialDelayMs;
+                continue;
+              }
+            } catch (fallbackError) {
+              console.warn('Model fallback failed:', fallbackError);
+            }
           }
-        } catch (fallbackError) {
-          // If fallback fails, continue with original error
-          debugLogger.warn('Fallback to Flash model failed:', fallbackError);
+          throw classifiedError;
         }
+        console.warn(
+          `Attempt ${attempt} failed: ${classifiedError.message}. Retrying after ${classifiedError.retryDelayMs}ms...`,
+        );
+        await delay(classifiedError.retryDelayMs, signal);
+        continue;
       }
 
-      // Track consecutive 429 errors
-      if (errorStatus === 429) {
-        consecutive429Count++;
-      } else {
-        consecutive429Count = 0;
-      }
-
-      // If we have persistent 429s and a fallback callback for OAuth
-      if (
-        consecutive429Count >= 2 &&
-        onPersistent429 &&
-        authType === AuthType.LOGIN_WITH_GOOGLE
-      ) {
-        try {
-          const fallbackModel = await onPersistent429(authType, error);
-          if (fallbackModel !== false && fallbackModel !== null) {
-            // Reset attempt counter and try with new model
-            attempt = 0;
-            consecutive429Count = 0;
-            currentDelay = initialDelayMs;
-            // With the model updated, we continue to the next attempt
-            continue;
-          } else {
-            // Fallback handler returned null/false, meaning don't continue - stop retry process
-            throw error;
-          }
-        } catch (fallbackError) {
-          // If fallback fails, continue with original error
-          debugLogger.warn('Fallback to Flash model failed:', fallbackError);
-        }
-      }
-
-      // Check if we've exhausted retries or shouldn't retry
+      // Generic retry logic for other errors
       if (
         attempt >= maxAttempts ||
         !shouldRetryOnError(error as Error, retryFetchErrors)
@@ -240,31 +200,17 @@ export async function retryWithBackoff<T>(
         throw error;
       }
 
-      const { delayDurationMs, errorStatus: delayErrorStatus } =
-        getDelayDurationAndStatus(error);
+      const errorStatus = getErrorStatus(error);
+      logRetryAttempt(attempt, error, errorStatus);
 
-      if (delayDurationMs > 0) {
-        // Respect Retry-After header if present and parsed
-        debugLogger.warn(
-          `Attempt ${attempt} failed with status ${delayErrorStatus ?? 'unknown'}. Retrying after explicit delay of ${delayDurationMs}ms...`,
-          error,
-        );
-        await delay(delayDurationMs, signal);
-        // Reset currentDelay for next potential non-429 error, or if Retry-After is not present next time
-        currentDelay = initialDelayMs;
-      } else {
-        // Fall back to exponential backoff with jitter
-        logRetryAttempt(attempt, error, errorStatus);
-        // Add jitter: +/- 30% of currentDelay
-        const jitter = currentDelay * 0.3 * (Math.random() * 2 - 1);
-        const delayWithJitter = Math.max(0, currentDelay + jitter);
-        await delay(delayWithJitter, signal);
-        currentDelay = Math.min(maxDelayMs, currentDelay * 2);
-      }
+      // Exponential backoff with jitter for non-quota errors
+      const jitter = currentDelay * 0.3 * (Math.random() * 2 - 1);
+      const delayWithJitter = Math.max(0, currentDelay + jitter);
+      await delay(delayWithJitter, signal);
+      currentDelay = Math.min(maxDelayMs, currentDelay * 2);
     }
   }
-  // This line should theoretically be unreachable due to the throw in the catch block.
-  // Added for type safety and to satisfy the compiler that a promise is always returned.
+
   throw new Error('Retry attempts exhausted');
 }
 
@@ -295,62 +241,6 @@ export function getErrorStatus(error: unknown): number | undefined {
   return undefined;
 }
 
-/**
- * Extracts the Retry-After delay from an error object's headers.
- * @param error The error object.
- * @returns The delay in milliseconds, or 0 if not found or invalid.
- */
-function getRetryAfterDelayMs(error: unknown): number {
-  if (typeof error === 'object' && error !== null) {
-    // Check for error.response.headers (common in axios errors)
-    if (
-      'response' in error &&
-      typeof (error as { response?: unknown }).response === 'object' &&
-      (error as { response?: unknown }).response !== null
-    ) {
-      const response = (error as { response: { headers?: unknown } }).response;
-      if (
-        'headers' in response &&
-        typeof response.headers === 'object' &&
-        response.headers !== null
-      ) {
-        const headers = response.headers as { 'retry-after'?: unknown };
-        const retryAfterHeader = headers['retry-after'];
-        if (typeof retryAfterHeader === 'string') {
-          const retryAfterSeconds = parseInt(retryAfterHeader, 10);
-          if (!isNaN(retryAfterSeconds)) {
-            return retryAfterSeconds * 1000;
-          }
-          // It might be an HTTP date
-          const retryAfterDate = new Date(retryAfterHeader);
-          if (!isNaN(retryAfterDate.getTime())) {
-            return Math.max(0, retryAfterDate.getTime() - Date.now());
-          }
-        }
-      }
-    }
-  }
-  return 0;
-}
-
-/**
- * Determines the delay duration based on the error, prioritizing Retry-After header.
- * @param error The error object.
- * @returns An object containing the delay duration in milliseconds and the error status.
- */
-function getDelayDurationAndStatus(error: unknown): {
-  delayDurationMs: number;
-  errorStatus: number | undefined;
-} {
-  const errorStatus = getErrorStatus(error);
-  let delayDurationMs = 0;
-
-  if (errorStatus === 429) {
-    delayDurationMs = getRetryAfterDelayMs(error);
-  }
-  return { delayDurationMs, errorStatus };
-}
-
 /**
  * Logs a message for a retry attempt when using exponential backoff.
  * @param attempt The current attempt number.