fix: handle request retries and model fallback correctly (#11624)

2026-03-16 00:51:25 -07:00 · 2025-10-24 11:09:06 -07:00
parent c2104a14fb
commit ee92db7533
14 changed files with 1357 additions and 804 deletions
--- a/packages/cli/src/ui/hooks/useQuotaAndFallback.test.ts
+++ b/packages/cli/src/ui/hooks/useQuotaAndFallback.test.ts
@@ -19,25 +19,15 @@ import {
  type FallbackModelHandler,
  UserTierId,
  AuthType,
-  isGenericQuotaExceededError,
+  TerminalQuotaError,
  isProQuotaExceededError,
  makeFakeConfig,
  type GoogleApiError,
  RetryableQuotaError,
 } from '@google/gemini-cli-core';
 import { useQuotaAndFallback } from './useQuotaAndFallback.js';
 import type { UseHistoryManagerReturn } from './useHistoryManager.js';
 import { AuthState, MessageType } from '../types.js';
 // Mock the error checking functions from the core package to control test scenarios
 vi.mock('@google/gemini-cli-core', async (importOriginal) => {
  const original =
    await importOriginal<typeof import('@google/gemini-cli-core')>();
  return {
    ...original,
    isGenericQuotaExceededError: vi.fn(),
    isProQuotaExceededError: vi.fn(),
  };
 });
 // Use a type alias for SpyInstance as it's not directly exported
 type SpyInstance = ReturnType<typeof vi.spyOn>;
@@ -47,12 +37,15 @@ describe('useQuotaAndFallback', () => {
  let mockSetAuthState: Mock;
  let mockSetModelSwitchedFromQuotaError: Mock;
  let setFallbackHandlerSpy: SpyInstance;
-
+  let mockGoogleApiError: GoogleApiError;
  const mockedIsGenericQuotaExceededError = isGenericQuotaExceededError as Mock;
  const mockedIsProQuotaExceededError = isProQuotaExceededError as Mock;
  beforeEach(() => {
    mockConfig = makeFakeConfig();
    mockGoogleApiError = {
      code: 429,
      message: 'mock error',
      details: [],
    };
    // Spy on the method that requires the private field and mock its return.
    // This is cleaner than modifying the config class for tests.
@@ -72,9 +65,6 @@ describe('useQuotaAndFallback', () => {
    setFallbackHandlerSpy = vi.spyOn(mockConfig, 'setFallbackModelHandler');
    vi.spyOn(mockConfig, 'setQuotaErrorOccurred');
    mockedIsGenericQuotaExceededError.mockReturnValue(false);
    mockedIsProQuotaExceededError.mockReturnValue(false);
  });
  afterEach(() => {
@@ -140,51 +130,62 @@ describe('useQuotaAndFallback', () => {
    describe('Automatic Fallback Scenarios', () => {
      const testCases = [
        {
-          errorType: 'generic',
+          description: 'other error for FREE tier',
          tier: UserTierId.FREE,
          error: new Error('some error'),
          expectedMessageSnippets: [
-            'Automatically switching from model-A to model-B',
+            'Automatically switching from model-A to model-B for faster responses',
            'upgrade to a Gemini Code Assist Standard or Enterprise plan',
          ],
        },
        {
-          errorType: 'generic',
+          description: 'other error for LEGACY tier',
-          tier: UserTierId.STANDARD, // Paid tier
+          tier: UserTierId.LEGACY, // Paid tier
          error: new Error('some error'),
          expectedMessageSnippets: [
-            'Automatically switching from model-A to model-B',
+            'Automatically switching from model-A to model-B for faster responses',
            'switch to using a paid API key from AI Studio',
          ],
        },
        {
-          errorType: 'other',
+          description: 'retryable quota error for FREE tier',
          tier: UserTierId.FREE,
          error: new RetryableQuotaError(
            'retryable quota',
            mockGoogleApiError,
            5,
          ),
          expectedMessageSnippets: [
-            'Automatically switching from model-A to model-B for faster responses',
+            'Your requests are being throttled right now due to server being at capacity for model-A',
-            'upgrade to a Gemini Code Assist Standard or Enterprise plan',
+            'Automatically switching from model-A to model-B',
            'upgrading to a Gemini Code Assist Standard or Enterprise plan',
          ],
        },
        {
-          errorType: 'other',
+          description: 'retryable quota error for LEGACY tier',
          tier: UserTierId.LEGACY, // Paid tier
          error: new RetryableQuotaError(
            'retryable quota',
            mockGoogleApiError,
            5,
          ),
          expectedMessageSnippets: [
-            'Automatically switching from model-A to model-B for faster responses',
+            'Your requests are being throttled right now due to server being at capacity for model-A',
            'Automatically switching from model-A to model-B',
            'switch to using a paid API key from AI Studio',
          ],
        },
      ];
-      for (const { errorType, tier, expectedMessageSnippets } of testCases) {
+      for (const {
-        it(`should handle ${errorType} error for ${tier} tier correctly`, async () => {
+        description,
-          mockedIsGenericQuotaExceededError.mockReturnValue(
+        tier,
-            errorType === 'generic',
+        error,
-          );
+        expectedMessageSnippets,
-
+      } of testCases) {
        it(`should handle ${description} correctly`, async () => {
          const handler = getRegisteredHandler(tier);
-          const result = await handler(
+          const result = await handler('model-A', 'model-B', error);
            'model-A',
            'model-B',
            new Error('quota exceeded'),
          );
          // Automatic fallbacks should return 'stop'
          expect(result).toBe('stop');
@@ -207,10 +208,6 @@ describe('useQuotaAndFallback', () => {
    });
    describe('Interactive Fallback (Pro Quota Error)', () => {
      beforeEach(() => {
        mockedIsProQuotaExceededError.mockReturnValue(true);
      });
      it('should set an interactive request and wait for user choice', async () => {
        const { result } = renderHook(() =>
          useQuotaAndFallback({
@@ -229,7 +226,7 @@ describe('useQuotaAndFallback', () => {
        const promise = handler(
          'gemini-pro',
          'gemini-flash',
-          new Error('pro quota'),
+          new TerminalQuotaError('pro quota', mockGoogleApiError),
        );
        await act(async () => {});
@@ -268,7 +265,7 @@ describe('useQuotaAndFallback', () => {
        const promise1 = handler(
          'gemini-pro',
          'gemini-flash',
-          new Error('pro quota 1'),
+          new TerminalQuotaError('pro quota 1', mockGoogleApiError),
        );
        await act(async () => {});
@@ -278,7 +275,7 @@ describe('useQuotaAndFallback', () => {
        const result2 = await handler(
          'gemini-pro',
          'gemini-flash',
-          new Error('pro quota 2'),
+          new TerminalQuotaError('pro quota 2', mockGoogleApiError),
        );
        // The lock should have stopped the second request
@@ -297,10 +294,6 @@ describe('useQuotaAndFallback', () => {
  });
  describe('handleProQuotaChoice', () => {
    beforeEach(() => {
      mockedIsProQuotaExceededError.mockReturnValue(true);
    });
    it('should do nothing if there is no pending pro quota request', () => {
      const { result } = renderHook(() =>
        useQuotaAndFallback({
@@ -336,7 +329,7 @@ describe('useQuotaAndFallback', () => {
      const promise = handler(
        'gemini-pro',
        'gemini-flash',
-        new Error('pro quota'),
+        new TerminalQuotaError('pro quota', mockGoogleApiError),
      );
      await act(async () => {}); // Allow state to update
@@ -367,7 +360,7 @@ describe('useQuotaAndFallback', () => {
      const promise = handler(
        'gemini-pro',
        'gemini-flash',
-        new Error('pro quota'),
+        new TerminalQuotaError('pro quota', mockGoogleApiError),
      );
      await act(async () => {}); // Allow state to update
--- a/packages/cli/src/ui/hooks/useQuotaAndFallback.ts
+++ b/packages/cli/src/ui/hooks/useQuotaAndFallback.ts
@@ -9,9 +9,9 @@ import {
  type Config,
  type FallbackModelHandler,
  type FallbackIntent,
-  isGenericQuotaExceededError,
+  TerminalQuotaError,
  isProQuotaExceededError,
  UserTierId,
  RetryableQuotaError,
 } from '@google/gemini-cli-core';
 import { useCallback, useEffect, useRef, useState } from 'react';
 import { type UseHistoryManagerReturn } from './useHistoryManager.js';
@@ -63,7 +63,7 @@ export function useQuotaAndFallback({
      let message: string;
-      if (error && isProQuotaExceededError(error)) {
+      if (error instanceof TerminalQuotaError) {
        // Pro Quota specific messages (Interactive)
        if (isPaidTier) {
          message = `⚡ You have reached your daily ${failedModel} quota limit.
@@ -76,31 +76,30 @@ export function useQuotaAndFallback({
 ⚡ Or you can utilize a Gemini API Key. See: https://goo.gle/gemini-cli-docs-auth#gemini-api-key
 ⚡ You can switch authentication methods by typing /auth`;
        }
-      } else if (error && isGenericQuotaExceededError(error)) {
+      } else if (error instanceof RetryableQuotaError) {
-        // Generic Quota (Automatic fallback)
+        // Short term quota retries exhausted (Automatic fallback)
-        const actionMessage = `⚡ You have reached your daily quota limit.\n⚡ Automatically switching from ${failedModel} to ${fallbackModel} for the remainder of this session.`;
+        const actionMessage = `⚡ Your requests are being throttled right now due to server being at capacity for ${failedModel}.\n⚡ Automatically switching from ${failedModel} to ${fallbackModel} for the remainder of this session.`;
        if (isPaidTier) {
          message = `${actionMessage}
-⚡ To continue accessing the ${failedModel} model today, consider using /auth to switch to using a paid API key from AI Studio at https://aistudio.google.com/apikey`;
+⚡ To continue accessing the ${failedModel} model, retry your request after some time or consider using /auth to switch to using a paid API key from AI Studio at https://aistudio.google.com/apikey`;
        } else {
          message = `${actionMessage}
-⚡ To increase your limits, upgrade to a Gemini Code Assist Standard or Enterprise plan with higher limits at https://goo.gle/set-up-gemini-code-assist
+⚡ Retry your requests after some time. Otherwise consider upgrading to a Gemini Code Assist Standard or Enterprise plan with higher limits at https://goo.gle/set-up-gemini-code-assist
 ⚡ Or you can utilize a Gemini API Key. See: https://goo.gle/gemini-cli-docs-auth#gemini-api-key
 ⚡ You can switch authentication methods by typing /auth`;
        }
      } else {
-        // Consecutive 429s or other errors (Automatic fallback)
+        // Other errors (Automatic fallback)
        const actionMessage = `⚡ Automatically switching from ${failedModel} to ${fallbackModel} for faster responses for the remainder of this session.`;
        if (isPaidTier) {
          message = `${actionMessage}
-⚡ Possible reasons for this are that you have received multiple consecutive capacity errors or you have reached your daily ${failedModel} quota limit
+⚡ Your requests are being throttled temporarily due to server being at capacity for ${failedModel} or there is a service outage.
-⚡ To continue accessing the ${failedModel} model today, consider using /auth to switch to using a paid API key from AI Studio at https://aistudio.google.com/apikey`;
+⚡ To continue accessing the ${failedModel} model, you can retry your request after some time or consider using /auth to switch to using a paid API key from AI Studio at https://aistudio.google.com/apikey`;
        } else {
          message = `${actionMessage}
-⚡ Possible reasons for this are that you have received multiple consecutive capacity errors or you have reached your daily ${failedModel} quota limit
+⚡ Your requests are being throttled temporarily due to server being at capacity for ${failedModel} or there is a service outage.
-⚡ To increase your limits, upgrade to a Gemini Code Assist Standard or Enterprise plan with higher limits at https://goo.gle/set-up-gemini-code-assist
+⚡ To avoid being throttled, you can retry your request after some time or upgrade to a Gemini Code Assist Standard or Enterprise plan with higher limits at https://goo.gle/set-up-gemini-code-assist
 ⚡ Or you can utilize a Gemini API Key. See: https://goo.gle/gemini-cli-docs-auth#gemini-api-key
 ⚡ You can switch authentication methods by typing /auth`;
        }
@@ -119,7 +118,7 @@ export function useQuotaAndFallback({
      config.setQuotaErrorOccurred(true);
      // Interactive Fallback for Pro quota
-      if (error && isProQuotaExceededError(error)) {
+      if (error instanceof TerminalQuotaError) {
        if (isDialogPending.current) {
          return 'stop'; // A dialog is already active, so just stop this request.
        }
--- a/packages/core/index.ts
+++ b/packages/core/index.ts
@@ -44,3 +44,5 @@ export { makeFakeConfig } from './src/test-utils/config.js';
 export * from './src/utils/pathReader.js';
 export { ClearcutLogger } from './src/telemetry/clearcut-logger/clearcut-logger.js';
 export { logModelSlashCommand } from './src/telemetry/loggers.js';
 export * from './src/utils/googleQuotaErrors.js';
 export type { GoogleApiError } from './src/utils/googleErrors.js';
--- a/packages/core/src/index.ts
+++ b/packages/core/src/index.ts
@@ -48,6 +48,7 @@ export * from './utils/gitIgnoreParser.js';
 export * from './utils/gitUtils.js';
 export * from './utils/editor.js';
 export * from './utils/quotaErrorDetection.js';
 export * from './utils/googleQuotaErrors.js';
 export * from './utils/fileUtils.js';
 export * from './utils/retry.js';
 export * from './utils/shell-utils.js';
--- a/packages/core/src/utils/errorParsing.test.ts
+++ b/packages/core/src/utils/errorParsing.test.ts
@@ -6,9 +6,7 @@
 import { describe, it, expect } from 'vitest';
 import { parseAndFormatApiError } from './errorParsing.js';
 import { isProQuotaExceededError } from './quotaErrorDetection.js';
 import { DEFAULT_GEMINI_FLASH_MODEL } from '../config/models.js';
 import { UserTierId } from '../code_assist/types.js';
 import { AuthType } from '../core/contentGenerator.js';
 import type { StructuredError } from '../core/turn.js';
@@ -40,22 +38,6 @@ describe('parseAndFormatApiError', () => {
    );
  });
  it('should format a 429 API error with the personal message', () => {
    const errorMessage =
      'got status: 429 Too Many Requests. {"error":{"code":429,"message":"Rate limit exceeded","status":"RESOURCE_EXHAUSTED"}}';
    const result = parseAndFormatApiError(
      errorMessage,
      AuthType.LOGIN_WITH_GOOGLE,
      undefined,
      'gemini-2.5-pro',
      DEFAULT_GEMINI_FLASH_MODEL,
    );
    expect(result).toContain('[API Error: Rate limit exceeded');
    expect(result).toContain(
      'Possible quota limitations in place or slow response times detected. Switching to the gemini-2.5-flash model',
    );
  });
  it('should format a 429 API error with the vertex message', () => {
    const errorMessage =
      'got status: 429 Too Many Requests. {"error":{"code":429,"message":"Rate limit exceeded","status":"RESOURCE_EXHAUSTED"}}';
@@ -132,230 +114,4 @@ describe('parseAndFormatApiError', () => {
    const expected = '[API Error: An unknown error occurred.]';
    expect(parseAndFormatApiError(error)).toBe(expected);
  });
  it('should format a 429 API error with Pro quota exceeded message for Google auth (Free tier)', () => {
    const errorMessage =
      'got status: 429 Too Many Requests. {"error":{"code":429,"message":"Quota exceeded for quota metric \'Gemini 2.5 Pro Requests\' and limit \'RequestsPerDay\' of service \'generativelanguage.googleapis.com\' for consumer \'project_number:123456789\'.","status":"RESOURCE_EXHAUSTED"}}';
    const result = parseAndFormatApiError(
      errorMessage,
      AuthType.LOGIN_WITH_GOOGLE,
      undefined,
      'gemini-2.5-pro',
      DEFAULT_GEMINI_FLASH_MODEL,
    );
    expect(result).toContain(
      "[API Error: Quota exceeded for quota metric 'Gemini 2.5 Pro Requests'",
    );
    expect(result).toContain(
      'You have reached your daily gemini-2.5-pro quota limit',
    );
    expect(result).toContain('upgrade to get higher limits');
  });
  it('should format a regular 429 API error with standard message for Google auth', () => {
    const errorMessage =
      'got status: 429 Too Many Requests. {"error":{"code":429,"message":"Rate limit exceeded","status":"RESOURCE_EXHAUSTED"}}';
    const result = parseAndFormatApiError(
      errorMessage,
      AuthType.LOGIN_WITH_GOOGLE,
      undefined,
      'gemini-2.5-pro',
      DEFAULT_GEMINI_FLASH_MODEL,
    );
    expect(result).toContain('[API Error: Rate limit exceeded');
    expect(result).toContain(
      'Possible quota limitations in place or slow response times detected. Switching to the gemini-2.5-flash model',
    );
    expect(result).not.toContain(
      'You have reached your daily gemini-2.5-pro quota limit',
    );
  });
  it('should format a 429 API error with generic quota exceeded message for Google auth', () => {
    const errorMessage =
      'got status: 429 Too Many Requests. {"error":{"code":429,"message":"Quota exceeded for quota metric \'GenerationRequests\' and limit \'RequestsPerDay\' of service \'generativelanguage.googleapis.com\' for consumer \'project_number:123456789\'.","status":"RESOURCE_EXHAUSTED"}}';
    const result = parseAndFormatApiError(
      errorMessage,
      AuthType.LOGIN_WITH_GOOGLE,
      undefined,
      'gemini-2.5-pro',
      DEFAULT_GEMINI_FLASH_MODEL,
    );
    expect(result).toContain(
      "[API Error: Quota exceeded for quota metric 'GenerationRequests'",
    );
    expect(result).toContain('You have reached your daily quota limit');
    expect(result).not.toContain(
      'You have reached your daily Gemini 2.5 Pro quota limit',
    );
  });
  it('should prioritize Pro quota message over generic quota message for Google auth', () => {
    const errorMessage =
      'got status: 429 Too Many Requests. {"error":{"code":429,"message":"Quota exceeded for quota metric \'Gemini 2.5 Pro Requests\' and limit \'RequestsPerDay\' of service \'generativelanguage.googleapis.com\' for consumer \'project_number:123456789\'.","status":"RESOURCE_EXHAUSTED"}}';
    const result = parseAndFormatApiError(
      errorMessage,
      AuthType.LOGIN_WITH_GOOGLE,
      undefined,
      'gemini-2.5-pro',
      DEFAULT_GEMINI_FLASH_MODEL,
    );
    expect(result).toContain(
      "[API Error: Quota exceeded for quota metric 'Gemini 2.5 Pro Requests'",
    );
    expect(result).toContain(
      'You have reached your daily gemini-2.5-pro quota limit',
    );
    expect(result).not.toContain('You have reached your daily quota limit');
  });
  it('should format a 429 API error with Pro quota exceeded message for Google auth (Standard tier)', () => {
    const errorMessage =
      'got status: 429 Too Many Requests. {"error":{"code":429,"message":"Quota exceeded for quota metric \'Gemini 2.5 Pro Requests\' and limit \'RequestsPerDay\' of service \'generativelanguage.googleapis.com\' for consumer \'project_number:123456789\'.","status":"RESOURCE_EXHAUSTED"}}';
    const result = parseAndFormatApiError(
      errorMessage,
      AuthType.LOGIN_WITH_GOOGLE,
      UserTierId.STANDARD,
      'gemini-2.5-pro',
      DEFAULT_GEMINI_FLASH_MODEL,
    );
    expect(result).toContain(
      "[API Error: Quota exceeded for quota metric 'Gemini 2.5 Pro Requests'",
    );
    expect(result).toContain(
      'You have reached your daily gemini-2.5-pro quota limit',
    );
    expect(result).toContain(
      'We appreciate you for choosing Gemini Code Assist and the Gemini CLI',
    );
    expect(result).not.toContain('upgrade to get higher limits');
  });
  it('should format a 429 API error with Pro quota exceeded message for Google auth (Legacy tier)', () => {
    const errorMessage =
      'got status: 429 Too Many Requests. {"error":{"code":429,"message":"Quota exceeded for quota metric \'Gemini 2.5 Pro Requests\' and limit \'RequestsPerDay\' of service \'generativelanguage.googleapis.com\' for consumer \'project_number:123456789\'.","status":"RESOURCE_EXHAUSTED"}}';
    const result = parseAndFormatApiError(
      errorMessage,
      AuthType.LOGIN_WITH_GOOGLE,
      UserTierId.LEGACY,
      'gemini-2.5-pro',
      DEFAULT_GEMINI_FLASH_MODEL,
    );
    expect(result).toContain(
      "[API Error: Quota exceeded for quota metric 'Gemini 2.5 Pro Requests'",
    );
    expect(result).toContain(
      'You have reached your daily gemini-2.5-pro quota limit',
    );
    expect(result).toContain(
      'We appreciate you for choosing Gemini Code Assist and the Gemini CLI',
    );
    expect(result).not.toContain('upgrade to get higher limits');
  });
  it('should handle different Gemini 2.5 version strings in Pro quota exceeded errors', () => {
    const errorMessage25 =
      'got status: 429 Too Many Requests. {"error":{"code":429,"message":"Quota exceeded for quota metric \'Gemini 2.5 Pro Requests\' and limit \'RequestsPerDay\' of service \'generativelanguage.googleapis.com\' for consumer \'project_number:123456789\'.","status":"RESOURCE_EXHAUSTED"}}';
    const errorMessagePreview =
      'got status: 429 Too Many Requests. {"error":{"code":429,"message":"Quota exceeded for quota metric \'Gemini 2.5-preview Pro Requests\' and limit \'RequestsPerDay\' of service \'generativelanguage.googleapis.com\' for consumer \'project_number:123456789\'.","status":"RESOURCE_EXHAUSTED"}}';
    const result25 = parseAndFormatApiError(
      errorMessage25,
      AuthType.LOGIN_WITH_GOOGLE,
      undefined,
      'gemini-2.5-pro',
      DEFAULT_GEMINI_FLASH_MODEL,
    );
    const resultPreview = parseAndFormatApiError(
      errorMessagePreview,
      AuthType.LOGIN_WITH_GOOGLE,
      undefined,
      'gemini-2.5-preview-pro',
      DEFAULT_GEMINI_FLASH_MODEL,
    );
    expect(result25).toContain(
      'You have reached your daily gemini-2.5-pro quota limit',
    );
    expect(resultPreview).toContain(
      'You have reached your daily gemini-2.5-preview-pro quota limit',
    );
    expect(result25).toContain('upgrade to get higher limits');
    expect(resultPreview).toContain('upgrade to get higher limits');
  });
  it('should not match non-Pro models with similar version strings', () => {
    // Test that Flash models with similar version strings don't match
    expect(
      isProQuotaExceededError(
        "Quota exceeded for quota metric 'Gemini 2.5 Flash Requests' and limit",
      ),
    ).toBe(false);
    expect(
      isProQuotaExceededError(
        "Quota exceeded for quota metric 'Gemini 2.5-preview Flash Requests' and limit",
      ),
    ).toBe(false);
    // Test other model types
    expect(
      isProQuotaExceededError(
        "Quota exceeded for quota metric 'Gemini 2.5 Ultra Requests' and limit",
      ),
    ).toBe(false);
    expect(
      isProQuotaExceededError(
        "Quota exceeded for quota metric 'Gemini 2.5 Standard Requests' and limit",
      ),
    ).toBe(false);
    // Test generic quota messages
    expect(
      isProQuotaExceededError(
        "Quota exceeded for quota metric 'GenerationRequests' and limit",
      ),
    ).toBe(false);
    expect(
      isProQuotaExceededError(
        "Quota exceeded for quota metric 'EmbeddingRequests' and limit",
      ),
    ).toBe(false);
  });
  it('should format a generic quota exceeded message for Google auth (Standard tier)', () => {
    const errorMessage =
      'got status: 429 Too Many Requests. {"error":{"code":429,"message":"Quota exceeded for quota metric \'GenerationRequests\' and limit \'RequestsPerDay\' of service \'generativelanguage.googleapis.com\' for consumer \'project_number:123456789\'.","status":"RESOURCE_EXHAUSTED"}}';
    const result = parseAndFormatApiError(
      errorMessage,
      AuthType.LOGIN_WITH_GOOGLE,
      UserTierId.STANDARD,
      'gemini-2.5-pro',
      DEFAULT_GEMINI_FLASH_MODEL,
    );
    expect(result).toContain(
      "[API Error: Quota exceeded for quota metric 'GenerationRequests'",
    );
    expect(result).toContain('You have reached your daily quota limit');
    expect(result).toContain(
      'We appreciate you for choosing Gemini Code Assist and the Gemini CLI',
    );
    expect(result).not.toContain('upgrade to get higher limits');
  });
  it('should format a regular 429 API error with standard message for Google auth (Standard tier)', () => {
    const errorMessage =
      'got status: 429 Too Many Requests. {"error":{"code":429,"message":"Rate limit exceeded","status":"RESOURCE_EXHAUSTED"}}';
    const result = parseAndFormatApiError(
      errorMessage,
      AuthType.LOGIN_WITH_GOOGLE,
      UserTierId.STANDARD,
      'gemini-2.5-pro',
      DEFAULT_GEMINI_FLASH_MODEL,
    );
    expect(result).toContain('[API Error: Rate limit exceeded');
    expect(result).toContain(
      'We appreciate you for choosing Gemini Code Assist and the Gemini CLI',
    );
    expect(result).not.toContain('upgrade to get higher limits');
  });
 });
--- a/packages/core/src/utils/errorParsing.ts
+++ b/packages/core/src/utils/errorParsing.ts
@@ -4,50 +4,11 @@
 * SPDX-License-Identifier: Apache-2.0
 */
-import {
+import { isApiError, isStructuredError } from './quotaErrorDetection.js';
-  isProQuotaExceededError,
+import { DEFAULT_GEMINI_FLASH_MODEL } from '../config/models.js';
-  isGenericQuotaExceededError,
+import type { UserTierId } from '../code_assist/types.js';
  isApiError,
  isStructuredError,
 } from './quotaErrorDetection.js';
 import {
  DEFAULT_GEMINI_MODEL,
  DEFAULT_GEMINI_FLASH_MODEL,
 } from '../config/models.js';
 import { UserTierId } from '../code_assist/types.js';
 import { AuthType } from '../core/contentGenerator.js';
 // Free Tier message functions
 const getRateLimitErrorMessageGoogleFree = (
  fallbackModel: string = DEFAULT_GEMINI_FLASH_MODEL,
 ) =>
  `\nPossible quota limitations in place or slow response times detected. Switching to the ${fallbackModel} model for the rest of this session.`;
 const getRateLimitErrorMessageGoogleProQuotaFree = (
  currentModel: string = DEFAULT_GEMINI_MODEL,
  fallbackModel: string = DEFAULT_GEMINI_FLASH_MODEL,
 ) =>
  `\nYou have reached your daily ${currentModel} quota limit. You will be switched to the ${fallbackModel} model for the rest of this session. To increase your limits, upgrade to get higher limits at https://goo.gle/set-up-gemini-code-assist, or use /auth to switch to using a paid API key from AI Studio at https://aistudio.google.com/apikey`;
 const getRateLimitErrorMessageGoogleGenericQuotaFree = () =>
  `\nYou have reached your daily quota limit. To increase your limits, upgrade to get higher limits at https://goo.gle/set-up-gemini-code-assist, or use /auth to switch to using a paid API key from AI Studio at https://aistudio.google.com/apikey`;
 // Legacy/Standard Tier message functions
 const getRateLimitErrorMessageGooglePaid = (
  fallbackModel: string = DEFAULT_GEMINI_FLASH_MODEL,
 ) =>
  `\nPossible quota limitations in place or slow response times detected. Switching to the ${fallbackModel} model for the rest of this session. We appreciate you for choosing Gemini Code Assist and the Gemini CLI.`;
 const getRateLimitErrorMessageGoogleProQuotaPaid = (
  currentModel: string = DEFAULT_GEMINI_MODEL,
  fallbackModel: string = DEFAULT_GEMINI_FLASH_MODEL,
 ) =>
  `\nYou have reached your daily ${currentModel} quota limit. You will be switched to the ${fallbackModel} model for the rest of this session. We appreciate you for choosing Gemini Code Assist and the Gemini CLI. To continue accessing the ${currentModel} model today, consider using /auth to switch to using a paid API key from AI Studio at https://aistudio.google.com/apikey`;
 const getRateLimitErrorMessageGoogleGenericQuotaPaid = (
  currentModel: string = DEFAULT_GEMINI_MODEL,
 ) =>
  `\nYou have reached your daily quota limit. We appreciate you for choosing Gemini Code Assist and the Gemini CLI. To continue accessing the ${currentModel} model today, consider using /auth to switch to using a paid API key from AI Studio at https://aistudio.google.com/apikey`;
 const RATE_LIMIT_ERROR_MESSAGE_USE_GEMINI =
  '\nPlease wait and try again later. To increase your limits, request a quota increase through AI Studio, or switch to another /auth method';
 const RATE_LIMIT_ERROR_MESSAGE_VERTEX =
@@ -59,39 +20,9 @@ const getRateLimitErrorMessageDefault = (
 function getRateLimitMessage(
  authType?: AuthType,
  error?: unknown,
  userTier?: UserTierId,
  currentModel?: string,
  fallbackModel?: string,
 ): string {
  switch (authType) {
    case AuthType.LOGIN_WITH_GOOGLE: {
      // Determine if user is on a paid tier (Legacy or Standard) - default to FREE if not specified
      const isPaidTier =
        userTier === UserTierId.LEGACY || userTier === UserTierId.STANDARD;
      if (isProQuotaExceededError(error)) {
        return isPaidTier
          ? getRateLimitErrorMessageGoogleProQuotaPaid(
              currentModel || DEFAULT_GEMINI_MODEL,
              fallbackModel,
            )
          : getRateLimitErrorMessageGoogleProQuotaFree(
              currentModel || DEFAULT_GEMINI_MODEL,
              fallbackModel,
            );
      } else if (isGenericQuotaExceededError(error)) {
        return isPaidTier
          ? getRateLimitErrorMessageGoogleGenericQuotaPaid(
              currentModel || DEFAULT_GEMINI_MODEL,
            )
          : getRateLimitErrorMessageGoogleGenericQuotaFree();
      } else {
        return isPaidTier
          ? getRateLimitErrorMessageGooglePaid(fallbackModel)
          : getRateLimitErrorMessageGoogleFree(fallbackModel);
      }
    }
    case AuthType.USE_GEMINI:
      return RATE_LIMIT_ERROR_MESSAGE_USE_GEMINI;
    case AuthType.USE_VERTEX_AI:
@@ -111,13 +42,7 @@ export function parseAndFormatApiError(
  if (isStructuredError(error)) {
    let text = `[API Error: ${error.message}]`;
    if (error.status === 429) {
-      text += getRateLimitMessage(
+      text += getRateLimitMessage(authType, fallbackModel);
        authType,
        error,
        userTier,
        currentModel,
        fallbackModel,
      );
    }
    return text;
  }
@@ -146,13 +71,7 @@ export function parseAndFormatApiError(
        }
        let text = `[API Error: ${finalMessage} (Status: ${parsedError.error.status})]`;
        if (parsedError.error.code === 429) {
-          text += getRateLimitMessage(
+          text += getRateLimitMessage(authType, fallbackModel);
            authType,
            parsedError,
            userTier,
            currentModel,
            fallbackModel,
          );
        }
        return text;
      }
--- a/packages/core/src/utils/flashFallback.test.ts
+++ b/packages/core/src/utils/flashFallback.test.ts
@@ -11,7 +11,6 @@ import {
  setSimulate429,
  disableSimulationAfterFallback,
  shouldSimulate429,
  createSimulated429Error,
  resetRequestCounter,
 } from './testUtils.js';
 import { DEFAULT_GEMINI_FLASH_MODEL } from '../config/models.js';
@@ -19,12 +18,15 @@ import { retryWithBackoff } from './retry.js';
 import { AuthType } from '../core/contentGenerator.js';
 // Import the new types (Assuming this test file is in packages/core/src/utils/)
 import type { FallbackModelHandler } from '../fallback/types.js';
 import type { GoogleApiError } from './googleErrors.js';
 import { TerminalQuotaError } from './googleQuotaErrors.js';
 vi.mock('node:fs');
 // Update the description to reflect that this tests the retry utility's integration
 describe('Retry Utility Fallback Integration', () => {
  let config: Config;
  let mockGoogleApiError: GoogleApiError;
  beforeEach(() => {
    vi.mocked(fs.existsSync).mockReturnValue(true);
@@ -38,6 +40,11 @@ describe('Retry Utility Fallback Integration', () => {
      cwd: '/test',
      model: 'gemini-2.5-pro',
    });
    mockGoogleApiError = {
      code: 429,
      message: 'mock error',
      details: [],
    };
    // Reset simulation state for each test
    setSimulate429(false);
@@ -56,6 +63,7 @@ describe('Retry Utility Fallback Integration', () => {
    const result = await config.fallbackModelHandler!(
      'gemini-2.5-pro',
      DEFAULT_GEMINI_FLASH_MODEL,
      new Error('test'),
    );
    // Verify it returns the correct intent
@@ -63,81 +71,61 @@ describe('Retry Utility Fallback Integration', () => {
  });
  // This test validates the retry utility's logic for triggering the callback.
-  it('should trigger onPersistent429 after 2 consecutive 429 errors for OAuth users', async () => {
+  it('should trigger onPersistent429 on TerminalQuotaError for OAuth users', async () => {
    let fallbackCalled = false;
    // Removed fallbackModel variable as it's no longer relevant here.
    // Mock function that simulates exactly 2 429 errors, then succeeds after fallback
    const mockApiCall = vi
      .fn()
-      .mockRejectedValueOnce(createSimulated429Error())
+      .mockRejectedValueOnce(
-      .mockRejectedValueOnce(createSimulated429Error())
+        new TerminalQuotaError('Daily limit', mockGoogleApiError),
      )
      .mockRejectedValueOnce(
        new TerminalQuotaError('Daily limit', mockGoogleApiError),
      )
      .mockResolvedValueOnce('success after fallback');
    // Mock the onPersistent429 callback (this is what client.ts/geminiChat.ts provides)
    const mockPersistent429Callback = vi.fn(async (_authType?: string) => {
      fallbackCalled = true;
      // Return true to signal retryWithBackoff to reset attempts and continue.
      return true;
    });
    // Test with OAuth personal auth type, with maxAttempts = 2 to ensure fallback triggers
    const result = await retryWithBackoff(mockApiCall, {
      maxAttempts: 2,
      initialDelayMs: 1,
      maxDelayMs: 10,
      shouldRetryOnError: (error: Error) => {
        const status = (error as Error & { status?: number }).status;
        return status === 429;
      },
      onPersistent429: mockPersistent429Callback,
      authType: AuthType.LOGIN_WITH_GOOGLE,
    });
    // Verify fallback mechanism was triggered
    expect(fallbackCalled).toBe(true);
    expect(mockPersistent429Callback).toHaveBeenCalledWith(
      AuthType.LOGIN_WITH_GOOGLE,
-      expect.any(Error),
+      expect.any(TerminalQuotaError),
    );
    expect(result).toBe('success after fallback');
    // Should have: 2 failures, then fallback triggered, then 1 success after retry reset
    expect(mockApiCall).toHaveBeenCalledTimes(3);
  });
  it('should not trigger onPersistent429 for API key users', async () => {
-    let fallbackCalled = false;
+    const fallbackCallback = vi.fn();
-    // Mock function that simulates 429 errors
+    const mockApiCall = vi
-    const mockApiCall = vi.fn().mockRejectedValue(createSimulated429Error());
+      .fn()
      .mockRejectedValueOnce(
        new TerminalQuotaError('Daily limit', mockGoogleApiError),
      );
-    // Mock the callback
+    const promise = retryWithBackoff(mockApiCall, {
-    const mockPersistent429Callback = vi.fn(async () => {
+      maxAttempts: 2,
-      fallbackCalled = true;
+      initialDelayMs: 1,
-      return true;
+      maxDelayMs: 10,
      onPersistent429: fallbackCallback,
      authType: AuthType.USE_GEMINI, // API key auth type
    });
-    // Test with API key auth type - should not trigger fallback
+    await expect(promise).rejects.toThrow('Daily limit');
-    try {
+    expect(fallbackCallback).not.toHaveBeenCalled();
-      await retryWithBackoff(mockApiCall, {
+    expect(mockApiCall).toHaveBeenCalledTimes(1);
        maxAttempts: 5,
        initialDelayMs: 10,
        maxDelayMs: 100,
        shouldRetryOnError: (error: Error) => {
          const status = (error as Error & { status?: number }).status;
          return status === 429;
        },
        onPersistent429: mockPersistent429Callback,
        authType: AuthType.USE_GEMINI, // API key auth type
      });
    } catch (error) {
      // Expected to throw after max attempts
      expect((error as Error).message).toContain('Rate limit exceeded');
    }
    // Verify fallback was NOT triggered for API key users
    expect(fallbackCalled).toBe(false);
    expect(mockPersistent429Callback).not.toHaveBeenCalled();
  });
  // This test validates the test utilities themselves.
--- a/packages/core/src/utils/googleErrors.test.ts
+++ b/packages/core/src/utils/googleErrors.test.ts
@@ -0,0 +1,356 @@
 /**
 * @license
 * Copyright 2025 Google LLC
 * SPDX-License-Identifier: Apache-2.0
 */
 import { describe, it, expect } from 'vitest';
 import { parseGoogleApiError } from './googleErrors.js';
 import type { QuotaFailure } from './googleErrors.js';
 describe('parseGoogleApiError', () => {
  it('should return null for non-gaxios errors', () => {
    expect(parseGoogleApiError(new Error('vanilla error'))).toBeNull();
    expect(parseGoogleApiError(null)).toBeNull();
    expect(parseGoogleApiError({})).toBeNull();
  });
  it('should parse a standard gaxios error', () => {
    const mockError = {
      response: {
        status: 429,
        data: {
          error: {
            code: 429,
            message: 'Quota exceeded',
            details: [
              {
                '@type': 'type.googleapis.com/google.rpc.QuotaFailure',
                violations: [{ subject: 'user', description: 'daily limit' }],
              },
            ],
          },
        },
      },
    };
    const parsed = parseGoogleApiError(mockError);
    expect(parsed).not.toBeNull();
    expect(parsed?.code).toBe(429);
    expect(parsed?.message).toBe('Quota exceeded');
    expect(parsed?.details).toHaveLength(1);
    const detail = parsed?.details[0] as QuotaFailure;
    expect(detail['@type']).toBe('type.googleapis.com/google.rpc.QuotaFailure');
    expect(detail.violations[0].description).toBe('daily limit');
  });
  it('should parse an error with details stringified in the message', () => {
    const innerError = {
      error: {
        code: 429,
        message: 'Inner quota message',
        details: [
          {
            '@type': 'type.googleapis.com/google.rpc.RetryInfo',
            retryDelay: '10s',
          },
        ],
      },
    };
    const mockError = {
      response: {
        status: 429,
        data: {
          error: {
            code: 429,
            message: JSON.stringify(innerError),
            details: [], // Top-level details are empty
          },
        },
      },
    };
    const parsed = parseGoogleApiError(mockError);
    expect(parsed).not.toBeNull();
    expect(parsed?.code).toBe(429);
    expect(parsed?.message).toBe('Inner quota message');
    expect(parsed?.details).toHaveLength(1);
    expect(parsed?.details[0]['@type']).toBe(
      'type.googleapis.com/google.rpc.RetryInfo',
    );
  });
  it('should return null if details are not in the expected format', () => {
    const mockError = {
      response: {
        status: 400,
        data: {
          error: {
            code: 400,
            message: 'Bad Request',
            details: 'just a string', // Invalid details format
          },
        },
      },
    };
    expect(parseGoogleApiError(mockError)).toBeNull();
  });
  it('should return null if there are no valid details', () => {
    const mockError = {
      response: {
        status: 400,
        data: {
          error: {
            code: 400,
            message: 'Bad Request',
            details: [
              {
                // missing '@type'
                reason: 'some reason',
              },
            ],
          },
        },
      },
    };
    expect(parseGoogleApiError(mockError)).toBeNull();
  });
  it('should parse a doubly nested error in the message', () => {
    const innerError = {
      error: {
        code: 429,
        message: 'Innermost quota message',
        details: [
          {
            '@type': 'type.googleapis.com/google.rpc.RetryInfo',
            retryDelay: '20s',
          },
        ],
      },
    };
    const middleError = {
      error: {
        code: 429,
        message: JSON.stringify(innerError),
        details: [],
      },
    };
    const mockError = {
      response: {
        status: 429,
        data: {
          error: {
            code: 429,
            message: JSON.stringify(middleError),
            details: [],
          },
        },
      },
    };
    const parsed = parseGoogleApiError(mockError);
    expect(parsed).not.toBeNull();
    expect(parsed?.code).toBe(429);
    expect(parsed?.message).toBe('Innermost quota message');
    expect(parsed?.details).toHaveLength(1);
    expect(parsed?.details[0]['@type']).toBe(
      'type.googleapis.com/google.rpc.RetryInfo',
    );
  });
  it('should parse an error that is not in a response object', () => {
    const innerError = {
      error: {
        code: 429,
        message: 'Innermost quota message',
        details: [
          {
            '@type': 'type.googleapis.com/google.rpc.RetryInfo',
            retryDelay: '20s',
          },
        ],
      },
    };
    const mockError = {
      error: {
        code: 429,
        message: JSON.stringify(innerError),
        details: [],
      },
    };
    const parsed = parseGoogleApiError(mockError);
    expect(parsed).not.toBeNull();
    expect(parsed?.code).toBe(429);
    expect(parsed?.message).toBe('Innermost quota message');
    expect(parsed?.details).toHaveLength(1);
    expect(parsed?.details[0]['@type']).toBe(
      'type.googleapis.com/google.rpc.RetryInfo',
    );
  });
  it('should parse an error that is a JSON string', () => {
    const innerError = {
      error: {
        code: 429,
        message: 'Innermost quota message',
        details: [
          {
            '@type': 'type.googleapis.com/google.rpc.RetryInfo',
            retryDelay: '20s',
          },
        ],
      },
    };
    const mockError = {
      error: {
        code: 429,
        message: JSON.stringify(innerError),
        details: [],
      },
    };
    const parsed = parseGoogleApiError(JSON.stringify(mockError));
    expect(parsed).not.toBeNull();
    expect(parsed?.code).toBe(429);
    expect(parsed?.message).toBe('Innermost quota message');
    expect(parsed?.details).toHaveLength(1);
    expect(parsed?.details[0]['@type']).toBe(
      'type.googleapis.com/google.rpc.RetryInfo',
    );
  });
  it('should parse the user-provided nested error string', () => {
    const userErrorString =
      '{"error":{"message":"{\\n  \\"error\\": {\\n    \\"code\\": 429,\\n    \\"message\\": \\"You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits.\\\\n* Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_paid_tier_input_token_count, limit: 10000\\\\nPlease retry in 40.025771073s.\\",\\n    \\"status\\": \\"RESOURCE_EXHAUSTED\\",\\n    \\"details\\": [\\n      {\\n        \\"@type\\": \\"type.googleapis.com/google.rpc.DebugInfo\\",\\n        \\"detail\\": \\"[ORIGINAL ERROR] generic::resource_exhausted: You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits.\\\\n* Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_paid_tier_input_token_count, limit: 10000\\\\nPlease retry in 40.025771073s. [google.rpc.error_details_ext] { message: \\\\\\"You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits.\\\\\\\\n* Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_paid_tier_input_token_count, limit: 10000\\\\\\\\nPlease retry in 40.025771073s.\\\\\\" }\\"\\n      },\\n      {\\n        \\"@type\\": \\"type.googleapis.com/google.rpc.QuotaFailure\\",\\n        \\"violations\\": [\\n          {\\n            \\"quotaMetric\\": \\"generativelanguage.googleapis.com/generate_content_paid_tier_input_token_count\\",\\n            \\"quotaId\\": \\"GenerateContentPaidTierInputTokensPerModelPerMinute\\",\\n            \\"quotaDimensions\\": {\\n              \\"location\\": \\"global\\",\\n              \\"model\\": \\"gemini-2.5-pro\\"\\n            },\\n            \\"quotaValue\\": \\"10000\\"\\n          }\\n        ]\\n      },\\n      {\\n        \\"@type\\": \\"type.googleapis.com/google.rpc.Help\\",\\n        \\"links\\": [\\n          {\\n            \\"description\\": \\"Learn more about Gemini API quotas\\",\\n            \\"url\\": \\"https://ai.google.dev/gemini-api/docs/rate-limits\\"\\n          }\\n        ]\\n      },\\n      {\\n        \\"@type\\": \\"type.googleapis.com/google.rpc.RetryInfo\\",\\n        \\"retryDelay\\": \\"40s\\"\\n      }\\n    ]\\n  }\\n}\\n","code":429,"status":"Too Many Requests"}}';
    const parsed = parseGoogleApiError(userErrorString);
    expect(parsed).not.toBeNull();
    expect(parsed?.code).toBe(429);
    expect(parsed?.message).toContain('You exceeded your current quota');
    expect(parsed?.details).toHaveLength(4);
    expect(
      parsed?.details.some(
        (d) => d['@type'] === 'type.googleapis.com/google.rpc.QuotaFailure',
      ),
    ).toBe(true);
    expect(
      parsed?.details.some(
        (d) => d['@type'] === 'type.googleapis.com/google.rpc.RetryInfo',
      ),
    ).toBe(true);
  });
  it('should parse an error that is an array', () => {
    const mockError = [
      {
        error: {
          code: 429,
          message: 'Quota exceeded',
          details: [
            {
              '@type': 'type.googleapis.com/google.rpc.QuotaFailure',
              violations: [{ subject: 'user', description: 'daily limit' }],
            },
          ],
        },
      },
    ];
    const parsed = parseGoogleApiError(mockError);
    expect(parsed).not.toBeNull();
    expect(parsed?.code).toBe(429);
    expect(parsed?.message).toBe('Quota exceeded');
  });
  it('should parse a gaxios error where data is an array', () => {
    const mockError = {
      response: {
        status: 429,
        data: [
          {
            error: {
              code: 429,
              message: 'Quota exceeded',
              details: [
                {
                  '@type': 'type.googleapis.com/google.rpc.QuotaFailure',
                  violations: [{ subject: 'user', description: 'daily limit' }],
                },
              ],
            },
          },
        ],
      },
    };
    const parsed = parseGoogleApiError(mockError);
    expect(parsed).not.toBeNull();
    expect(parsed?.code).toBe(429);
    expect(parsed?.message).toBe('Quota exceeded');
  });
  it('should parse a gaxios error where data is a stringified array', () => {
    const mockError = {
      response: {
        status: 429,
        data: JSON.stringify([
          {
            error: {
              code: 429,
              message: 'Quota exceeded',
              details: [
                {
                  '@type': 'type.googleapis.com/google.rpc.QuotaFailure',
                  violations: [{ subject: 'user', description: 'daily limit' }],
                },
              ],
            },
          },
        ]),
      },
    };
    const parsed = parseGoogleApiError(mockError);
    expect(parsed).not.toBeNull();
    expect(parsed?.code).toBe(429);
    expect(parsed?.message).toBe('Quota exceeded');
  });
  it('should parse an error with a malformed @type key (returned by Gemini API)', () => {
    const malformedError = {
      name: 'API Error',
      message: {
        error: {
          message:
            '{\n  "error": {\n    "code": 429,\n    "message": "You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits.\\n* Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_free_tier_requests, limit: 2\nPlease retry in 54.887755558s.",\n    "status": "RESOURCE_EXHAUSTED",\n    "details": [\n      {\n        " @type": "type.googleapis.com/google.rpc.DebugInfo",\n        "detail": "[ORIGINAL ERROR] generic::resource_exhausted: You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits.\\n* Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_free_tier_requests, limit: 2\\nPlease retry in 54.887755558s. [google.rpc.error_details_ext] { message: \\"You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits.\\\\n* Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_free_tier_requests, limit: 2\\\\nPlease retry in 54.887755558s.\\" }"\n      },\n      {\n" @type": "type.googleapis.com/google.rpc.QuotaFailure",\n        "violations": [\n          {\n            "quotaMetric": "generativelanguage.googleapis.com/generate_content_free_tier_requests",\n            "quotaId": "GenerateRequestsPerMinutePerProjectPerModel-FreeTier",\n            "quotaDimensions": {\n              "location": "global",\n"model": "gemini-2.5-pro"\n            },\n            "quotaValue": "2"\n          }\n        ]\n      },\n      {\n" @type": "type.googleapis.com/google.rpc.Help",\n        "links": [\n          {\n            "description": "Learn more about Gemini API quotas",\n            "url": "https://ai.google.dev/gemini-api/docs/rate-limits"\n          }\n        ]\n      },\n      {\n" @type": "type.googleapis.com/google.rpc.RetryInfo",\n        "retryDelay": "54s"\n      }\n    ]\n  }\n}\n',
          code: 429,
          status: 'Too Many Requests',
        },
      },
    };
    const parsed = parseGoogleApiError(malformedError);
    expect(parsed).not.toBeNull();
    expect(parsed?.code).toBe(429);
    expect(parsed?.message).toContain('You exceeded your current quota');
    expect(parsed?.details).toHaveLength(4);
    expect(
      parsed?.details.some(
        (d) => d['@type'] === 'type.googleapis.com/google.rpc.QuotaFailure',
      ),
    ).toBe(true);
    expect(
      parsed?.details.some(
        (d) => d['@type'] === 'type.googleapis.com/google.rpc.RetryInfo',
      ),
    ).toBe(true);
  });
 });
--- a/packages/core/src/utils/googleErrors.ts
+++ b/packages/core/src/utils/googleErrors.ts
@@ -0,0 +1,305 @@
 /**
 * @license
 * Copyright 2025 Google LLC
 * SPDX-License-Identifier: Apache-2.0
 */
 /**
 * @fileoverview
 * This file contains types and functions for parsing structured Google API errors.
 */
 /**
 * Based on google/rpc/error_details.proto
 */
 export interface ErrorInfo {
  '@type': 'type.googleapis.com/google.rpc.ErrorInfo';
  reason: string;
  domain: string;
  metadata: { [key: string]: string };
 }
 export interface RetryInfo {
  '@type': 'type.googleapis.com/google.rpc.RetryInfo';
  retryDelay: string; // e.g. "51820.638305887s"
 }
 export interface DebugInfo {
  '@type': 'type.googleapis.com/google.rpc.DebugInfo';
  stackEntries: string[];
  detail: string;
 }
 export interface QuotaFailure {
  '@type': 'type.googleapis.com/google.rpc.QuotaFailure';
  violations: Array<{
    subject?: string;
    description?: string;
    apiService?: string;
    quotaMetric?: string;
    quotaId?: string;
    quotaDimensions?: { [key: string]: string };
    quotaValue?: string | number;
    futureQuotaValue?: number;
  }>;
 }
 export interface PreconditionFailure {
  '@type': 'type.googleapis.com/google.rpc.PreconditionFailure';
  violations: Array<{
    type: string;
    subject: string;
    description: string;
  }>;
 }
 export interface LocalizedMessage {
  '@type': 'type.googleapis.com/google.rpc.LocalizedMessage';
  locale: string;
  message: string;
 }
 export interface BadRequest {
  '@type': 'type.googleapis.com/google.rpc.BadRequest';
  fieldViolations: Array<{
    field: string;
    description: string;
    reason?: string;
    localizedMessage?: LocalizedMessage;
  }>;
 }
 export interface RequestInfo {
  '@type': 'type.googleapis.com/google.rpc.RequestInfo';
  requestId: string;
  servingData: string;
 }
 export interface ResourceInfo {
  '@type': 'type.googleapis.com/google.rpc.ResourceInfo';
  resourceType: string;
  resourceName: string;
  owner: string;
  description: string;
 }
 export interface Help {
  '@type': 'type.googleapis.com/google.rpc.Help';
  links: Array<{
    description: string;
    url: string;
  }>;
 }
 export type GoogleApiErrorDetail =
  | ErrorInfo
  | RetryInfo
  | DebugInfo
  | QuotaFailure
  | PreconditionFailure
  | BadRequest
  | RequestInfo
  | ResourceInfo
  | Help
  | LocalizedMessage;
 export interface GoogleApiError {
  code: number;
  message: string;
  details: GoogleApiErrorDetail[];
 }
 type ErrorShape = {
  message?: string;
  details?: unknown[];
  code?: number;
 };
 /**
 * Parses an error object to check if it's a structured Google API error
 * and extracts all details.
 *
 * This function can handle two formats:
 * 1. Standard Google API errors where `details` is a top-level field.
 * 2. Errors where the entire structured error object is stringified inside
 *    the `message` field of a wrapper error.
 *
 * @param error The error object to inspect.
 * @returns A GoogleApiError object if the error matches, otherwise null.
 */
 export function parseGoogleApiError(error: unknown): GoogleApiError | null {
  if (!error) {
    return null;
  }
  let errorObj: unknown = error;
  // If error is a string, try to parse it.
  if (typeof errorObj === 'string') {
    try {
      errorObj = JSON.parse(errorObj);
    } catch (_) {
      // Not a JSON string, can't parse.
      return null;
    }
  }
  if (Array.isArray(errorObj) && errorObj.length > 0) {
    errorObj = errorObj[0];
  }
  if (typeof errorObj !== 'object' || errorObj === null) {
    return null;
  }
  let currentError: ErrorShape | undefined =
    fromGaxiosError(errorObj) ?? fromApiError(errorObj);
  let depth = 0;
  const maxDepth = 10;
  // Handle cases where the actual error object is stringified inside the message
  // by drilling down until we find an error that doesn't have a stringified message.
  while (
    currentError &&
    typeof currentError.message === 'string' &&
    depth < maxDepth
  ) {
    try {
      const parsedMessage = JSON.parse(
        currentError.message.replace(/\u00A0/g, '').replace(/\n/g, ' '),
      );
      if (parsedMessage.error) {
        currentError = parsedMessage.error;
        depth++;
      } else {
        // The message is a JSON string, but not a nested error object.
        break;
      }
    } catch (_error) {
      // It wasn't a JSON string, so we've drilled down as far as we can.
      break;
    }
  }
  if (!currentError) {
    return null;
  }
  const code = currentError.code;
  const message = currentError.message;
  const errorDetails = currentError.details;
  if (Array.isArray(errorDetails) && code && message) {
    const details: GoogleApiErrorDetail[] = [];
    for (const detail of errorDetails) {
      if (detail && typeof detail === 'object') {
        const detailObj = detail as Record<string, unknown>;
        const typeKey = Object.keys(detailObj).find(
          (key) => key.trim() === '@type',
        );
        if (typeKey) {
          if (typeKey !== '@type') {
            detailObj['@type'] = detailObj[typeKey];
            delete detailObj[typeKey];
          }
          // We can just cast it; the consumer will have to switch on @type
          details.push(detailObj as unknown as GoogleApiErrorDetail);
        }
      }
    }
    if (details.length > 0) {
      return {
        code,
        message,
        details,
      };
    }
  }
  return null;
 }
 function fromGaxiosError(errorObj: object): ErrorShape | undefined {
  const gaxiosError = errorObj as {
    response?: {
      status?: number;
      data?:
        | {
            error?: ErrorShape;
          }
        | string;
    };
    error?: ErrorShape;
    code?: number;
  };
  let outerError: ErrorShape | undefined;
  if (gaxiosError.response?.data) {
    let data = gaxiosError.response.data;
    if (typeof data === 'string') {
      try {
        data = JSON.parse(data);
      } catch (_) {
        // Not a JSON string, can't parse.
      }
    }
    if (Array.isArray(data) && data.length > 0) {
      data = data[0];
    }
    if (typeof data === 'object' && data !== null) {
      if ('error' in data) {
        outerError = (data as { error: ErrorShape }).error;
      }
    }
  }
  if (!outerError) {
    // If the gaxios structure isn't there, check for a top-level `error` property.
    if (gaxiosError.error) {
      outerError = gaxiosError.error;
    } else {
      return undefined;
    }
  }
  return outerError;
 }
 function fromApiError(errorObj: object): ErrorShape | undefined {
  const apiError = errorObj as {
    message?:
      | {
          error?: ErrorShape;
        }
      | string;
    code?: number;
  };
  let outerError: ErrorShape | undefined;
  if (apiError.message) {
    let data = apiError.message;
    if (typeof data === 'string') {
      try {
        data = JSON.parse(data);
      } catch (_) {
        // Not a JSON string, can't parse.
      }
    }
    if (Array.isArray(data) && data.length > 0) {
      data = data[0];
    }
    if (typeof data === 'object' && data !== null) {
      if ('error' in data) {
        outerError = (data as { error: ErrorShape }).error;
      }
    }
  }
  return outerError;
 }
--- a/packages/core/src/utils/googleQuotaErrors.test.ts
+++ b/packages/core/src/utils/googleQuotaErrors.test.ts
@@ -0,0 +1,306 @@
 /**
 * @license
 * Copyright 2025 Google LLC
 * SPDX-License-Identifier: Apache-2.0
 */
 import { describe, it, expect, vi, afterEach } from 'vitest';
 import {
  classifyGoogleError,
  RetryableQuotaError,
  TerminalQuotaError,
 } from './googleQuotaErrors.js';
 import * as errorParser from './googleErrors.js';
 import type { GoogleApiError } from './googleErrors.js';
 describe('classifyGoogleError', () => {
  afterEach(() => {
    vi.restoreAllMocks();
  });
  it('should return original error if not a Google API error', () => {
    const regularError = new Error('Something went wrong');
    vi.spyOn(errorParser, 'parseGoogleApiError').mockReturnValue(null);
    const result = classifyGoogleError(regularError);
    expect(result).toBe(regularError);
  });
  it('should return original error if code is not 429', () => {
    const apiError: GoogleApiError = {
      code: 500,
      message: 'Server error',
      details: [],
    };
    vi.spyOn(errorParser, 'parseGoogleApiError').mockReturnValue(apiError);
    const originalError = new Error();
    const result = classifyGoogleError(originalError);
    expect(result).toBe(originalError);
    expect(result).not.toBeInstanceOf(TerminalQuotaError);
    expect(result).not.toBeInstanceOf(RetryableQuotaError);
  });
  it('should return TerminalQuotaError for daily quota violations in QuotaFailure', () => {
    const apiError: GoogleApiError = {
      code: 429,
      message: 'Quota exceeded',
      details: [
        {
          '@type': 'type.googleapis.com/google.rpc.QuotaFailure',
          violations: [
            {
              subject: 'user',
              description: 'daily limit',
              quotaId: 'RequestsPerDay-limit',
            },
          ],
        },
      ],
    };
    vi.spyOn(errorParser, 'parseGoogleApiError').mockReturnValue(apiError);
    const result = classifyGoogleError(new Error());
    expect(result).toBeInstanceOf(TerminalQuotaError);
    expect((result as TerminalQuotaError).cause).toBe(apiError);
  });
  it('should return TerminalQuotaError for daily quota violations in ErrorInfo', () => {
    const apiError: GoogleApiError = {
      code: 429,
      message: 'Quota exceeded',
      details: [
        {
          '@type': 'type.googleapis.com/google.rpc.ErrorInfo',
          reason: 'QUOTA_EXCEEDED',
          domain: 'googleapis.com',
          metadata: {
            quota_limit: 'RequestsPerDay_PerProject_PerUser',
          },
        },
      ],
    };
    vi.spyOn(errorParser, 'parseGoogleApiError').mockReturnValue(apiError);
    const result = classifyGoogleError(new Error());
    expect(result).toBeInstanceOf(TerminalQuotaError);
  });
  it('should return TerminalQuotaError for long retry delays', () => {
    const apiError: GoogleApiError = {
      code: 429,
      message: 'Too many requests',
      details: [
        {
          '@type': 'type.googleapis.com/google.rpc.RetryInfo',
          retryDelay: '301s', // > 5 minutes
        },
      ],
    };
    vi.spyOn(errorParser, 'parseGoogleApiError').mockReturnValue(apiError);
    const result = classifyGoogleError(new Error());
    expect(result).toBeInstanceOf(TerminalQuotaError);
  });
  it('should return RetryableQuotaError for short retry delays', () => {
    const apiError: GoogleApiError = {
      code: 429,
      message: 'Too many requests',
      details: [
        {
          '@type': 'type.googleapis.com/google.rpc.RetryInfo',
          retryDelay: '45.123s',
        },
      ],
    };
    vi.spyOn(errorParser, 'parseGoogleApiError').mockReturnValue(apiError);
    const result = classifyGoogleError(new Error());
    expect(result).toBeInstanceOf(RetryableQuotaError);
    expect((result as RetryableQuotaError).retryDelayMs).toBe(45123);
  });
  it('should return RetryableQuotaError for per-minute quota violations in QuotaFailure', () => {
    const apiError: GoogleApiError = {
      code: 429,
      message: 'Quota exceeded',
      details: [
        {
          '@type': 'type.googleapis.com/google.rpc.QuotaFailure',
          violations: [
            {
              subject: 'user',
              description: 'per minute limit',
              quotaId: 'RequestsPerMinute-limit',
            },
          ],
        },
      ],
    };
    vi.spyOn(errorParser, 'parseGoogleApiError').mockReturnValue(apiError);
    const result = classifyGoogleError(new Error());
    expect(result).toBeInstanceOf(RetryableQuotaError);
    expect((result as RetryableQuotaError).retryDelayMs).toBe(60000);
  });
  it('should return RetryableQuotaError for per-minute quota violations in ErrorInfo', () => {
    const apiError: GoogleApiError = {
      code: 429,
      message: 'Quota exceeded',
      details: [
        {
          '@type': 'type.googleapis.com/google.rpc.ErrorInfo',
          reason: 'QUOTA_EXCEEDED',
          domain: 'googleapis.com',
          metadata: {
            quota_limit: 'RequestsPerMinute_PerProject_PerUser',
          },
        },
      ],
    };
    vi.spyOn(errorParser, 'parseGoogleApiError').mockReturnValue(apiError);
    const result = classifyGoogleError(new Error());
    expect(result).toBeInstanceOf(RetryableQuotaError);
    expect((result as RetryableQuotaError).retryDelayMs).toBe(60000);
  });
  it('should return RetryableQuotaError for another short retry delay', () => {
    const apiError: GoogleApiError = {
      code: 429,
      message:
        'You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits.\n* Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_free_tier_requests, limit: 2\nPlease retry in 56.185908122s.',
      details: [
        {
          '@type': 'type.googleapis.com/google.rpc.QuotaFailure',
          violations: [
            {
              quotaMetric:
                'generativelanguage.googleapis.com/generate_content_free_tier_requests',
              quotaId: 'GenerateRequestsPerMinutePerProjectPerModel-FreeTier',
              quotaDimensions: {
                location: 'global',
                model: 'gemini-2.5-pro',
              },
              quotaValue: '2',
            },
          ],
        },
        {
          '@type': 'type.googleapis.com/google.rpc.Help',
          links: [
            {
              description: 'Learn more about Gemini API quotas',
              url: 'https://ai.google.dev/gemini-api/docs/rate-limits',
            },
          ],
        },
        {
          '@type': 'type.googleapis.com/google.rpc.RetryInfo',
          retryDelay: '56s',
        },
      ],
    };
    vi.spyOn(errorParser, 'parseGoogleApiError').mockReturnValue(apiError);
    const result = classifyGoogleError(new Error());
    expect(result).toBeInstanceOf(RetryableQuotaError);
    expect((result as RetryableQuotaError).retryDelayMs).toBe(56000);
  });
  it('should return RetryableQuotaError for Cloud Code RATE_LIMIT_EXCEEDED with retry delay', () => {
    const apiError: GoogleApiError = {
      code: 429,
      message:
        'You have exhausted your capacity on this model. Your quota will reset after 0s.',
      details: [
        {
          '@type': 'type.googleapis.com/google.rpc.ErrorInfo',
          reason: 'RATE_LIMIT_EXCEEDED',
          domain: 'cloudcode-pa.googleapis.com',
          metadata: {
            uiMessage: 'true',
            model: 'gemini-2.5-pro',
            quotaResetDelay: '539.477544ms',
            quotaResetTimeStamp: '2025-10-20T19:14:08Z',
          },
        },
        {
          '@type': 'type.googleapis.com/google.rpc.RetryInfo',
          retryDelay: '0.539477544s',
        },
      ],
    };
    vi.spyOn(errorParser, 'parseGoogleApiError').mockReturnValue(apiError);
    const result = classifyGoogleError(new Error());
    expect(result).toBeInstanceOf(RetryableQuotaError);
    expect((result as RetryableQuotaError).retryDelayMs).toBeCloseTo(
      539.477544,
    );
  });
  it('should return TerminalQuotaError for Cloud Code QUOTA_EXHAUSTED', () => {
    const apiError: GoogleApiError = {
      code: 429,
      message:
        'You have exhausted your capacity on this model. Your quota will reset after 0s.',
      details: [
        {
          '@type': 'type.googleapis.com/google.rpc.ErrorInfo',
          reason: 'QUOTA_EXHAUSTED',
          domain: 'cloudcode-pa.googleapis.com',
          metadata: {
            uiMessage: 'true',
            model: 'gemini-2.5-pro',
            quotaResetDelay: '539.477544ms',
            quotaResetTimeStamp: '2025-10-20T19:14:08Z',
          },
        },
        {
          '@type': 'type.googleapis.com/google.rpc.RetryInfo',
          retryDelay: '0.539477544s',
        },
      ],
    };
    vi.spyOn(errorParser, 'parseGoogleApiError').mockReturnValue(apiError);
    const result = classifyGoogleError(new Error());
    expect(result).toBeInstanceOf(TerminalQuotaError);
  });
  it('should prioritize daily limit over retry info', () => {
    const apiError: GoogleApiError = {
      code: 429,
      message: 'Quota exceeded',
      details: [
        {
          '@type': 'type.googleapis.com/google.rpc.QuotaFailure',
          violations: [
            {
              subject: 'user',
              description: 'daily limit',
              quotaId: 'RequestsPerDay-limit',
            },
          ],
        },
        {
          '@type': 'type.googleapis.com/google.rpc.RetryInfo',
          retryDelay: '10s',
        },
      ],
    };
    vi.spyOn(errorParser, 'parseGoogleApiError').mockReturnValue(apiError);
    const result = classifyGoogleError(new Error());
    expect(result).toBeInstanceOf(TerminalQuotaError);
  });
  it('should return original error for 429 without specific details', () => {
    const apiError: GoogleApiError = {
      code: 429,
      message: 'Too many requests',
      details: [
        {
          '@type': 'type.googleapis.com/google.rpc.DebugInfo',
          detail: 'some debug info',
          stackEntries: [],
        },
      ],
    };
    vi.spyOn(errorParser, 'parseGoogleApiError').mockReturnValue(apiError);
    const originalError = new Error();
    const result = classifyGoogleError(originalError);
    expect(result).toBe(originalError);
  });
 });
--- a/packages/core/src/utils/googleQuotaErrors.ts
+++ b/packages/core/src/utils/googleQuotaErrors.ts
@@ -0,0 +1,192 @@
 /**
 * @license
 * Copyright 2025 Google LLC
 * SPDX-License-Identifier: Apache-2.0
 */
 import type {
  ErrorInfo,
  GoogleApiError,
  QuotaFailure,
  RetryInfo,
 } from './googleErrors.js';
 import { parseGoogleApiError } from './googleErrors.js';
 /**
 * A non-retryable error indicating a hard quota limit has been reached (e.g., daily limit).
 */
 export class TerminalQuotaError extends Error {
  constructor(
    message: string,
    override readonly cause: GoogleApiError,
  ) {
    super(message);
    this.name = 'TerminalQuotaError';
  }
 }
 /**
 * A retryable error indicating a temporary quota issue (e.g., per-minute limit).
 */
 export class RetryableQuotaError extends Error {
  retryDelayMs: number;
  constructor(
    message: string,
    override readonly cause: GoogleApiError,
    retryDelaySeconds: number,
  ) {
    super(message);
    this.name = 'RetryableQuotaError';
    this.retryDelayMs = retryDelaySeconds * 1000;
  }
 }
 /**
 * Parses a duration string (e.g., "34.074824224s", "60s") and returns the time in seconds.
 * @param duration The duration string to parse.
 * @returns The duration in seconds, or null if parsing fails.
 */
 function parseDurationInSeconds(duration: string): number | null {
  if (!duration.endsWith('s')) {
    return null;
  }
  const seconds = parseFloat(duration.slice(0, -1));
  return isNaN(seconds) ? null : seconds;
 }
 /**
 * Analyzes a caught error and classifies it as a specific quota-related error if applicable.
 *
 * It decides whether an error is a `TerminalQuotaError` or a `RetryableQuotaError` based on
 * the following logic:
 * - If the error indicates a daily limit, it's a `TerminalQuotaError`.
 * - If the error suggests a retry delay of more than 2 minutes, it's a `TerminalQuotaError`.
 * - If the error suggests a retry delay of 2 minutes or less, it's a `RetryableQuotaError`.
 * - If the error indicates a per-minute limit, it's a `RetryableQuotaError`.
 *
 * @param error The error to classify.
 * @returns A `TerminalQuotaError`, `RetryableQuotaError`, or the original `unknown` error.
 */
 export function classifyGoogleError(error: unknown): unknown {
  const googleApiError = parseGoogleApiError(error);
  if (!googleApiError || googleApiError.code !== 429) {
    return error; // Not a 429 error we can handle.
  }
  const quotaFailure = googleApiError.details.find(
    (d): d is QuotaFailure =>
      d['@type'] === 'type.googleapis.com/google.rpc.QuotaFailure',
  );
  const errorInfo = googleApiError.details.find(
    (d): d is ErrorInfo =>
      d['@type'] === 'type.googleapis.com/google.rpc.ErrorInfo',
  );
  const retryInfo = googleApiError.details.find(
    (d): d is RetryInfo =>
      d['@type'] === 'type.googleapis.com/google.rpc.RetryInfo',
  );
  // 1. Check for long-term limits in QuotaFailure or ErrorInfo
  if (quotaFailure) {
    for (const violation of quotaFailure.violations) {
      const quotaId = violation.quotaId ?? '';
      if (quotaId.includes('PerDay') || quotaId.includes('Daily')) {
        return new TerminalQuotaError(
          `${googleApiError.message}\nExpected quota reset within 24h.`,
          googleApiError,
        );
      }
    }
  }
  if (errorInfo) {
    // New Cloud Code API quota handling
    if (errorInfo.domain) {
      const validDomains = [
        'cloudcode-pa.googleapis.com',
        'staging-cloudcode-pa.googleapis.com',
        'autopush-cloudcode-pa.googleapis.com',
      ];
      if (validDomains.includes(errorInfo.domain)) {
        if (errorInfo.reason === 'RATE_LIMIT_EXCEEDED') {
          let delaySeconds = 10; // Default retry of 10s
          if (retryInfo?.retryDelay) {
            const parsedDelay = parseDurationInSeconds(retryInfo.retryDelay);
            if (parsedDelay) {
              delaySeconds = parsedDelay;
            }
          }
          return new RetryableQuotaError(
            `${googleApiError.message}`,
            googleApiError,
            delaySeconds,
          );
        }
        if (errorInfo.reason === 'QUOTA_EXHAUSTED') {
          return new TerminalQuotaError(
            `${googleApiError.message}`,
            googleApiError,
          );
        }
      }
    }
    // Existing Cloud Code API quota handling
    const quotaLimit = errorInfo.metadata?.['quota_limit'] ?? '';
    if (quotaLimit.includes('PerDay') || quotaLimit.includes('Daily')) {
      return new TerminalQuotaError(
        `${googleApiError.message}\nExpected quota reset within 24h.`,
        googleApiError,
      );
    }
  }
  // 2. Check for long delays in RetryInfo
  if (retryInfo?.retryDelay) {
    const delaySeconds = parseDurationInSeconds(retryInfo.retryDelay);
    if (delaySeconds) {
      if (delaySeconds > 120) {
        return new TerminalQuotaError(
          `${googleApiError.message}\nSuggested retry after ${retryInfo.retryDelay}.`,
          googleApiError,
        );
      }
      // This is a retryable error with a specific delay.
      return new RetryableQuotaError(
        `${googleApiError.message}\nSuggested retry after ${retryInfo.retryDelay}.`,
        googleApiError,
        delaySeconds,
      );
    }
  }
  // 3. Check for short-term limits in QuotaFailure or ErrorInfo
  if (quotaFailure) {
    for (const violation of quotaFailure.violations) {
      const quotaId = violation.quotaId ?? '';
      if (quotaId.includes('PerMinute')) {
        return new RetryableQuotaError(
          `${googleApiError.message}\nSuggested retry after 60s.`,
          googleApiError,
          60,
        );
      }
    }
  }
  if (errorInfo) {
    const quotaLimit = errorInfo.metadata?.['quota_limit'] ?? '';
    if (quotaLimit.includes('PerMinute')) {
      return new RetryableQuotaError(
        `${errorInfo.reason}\nSuggested retry after 60s.`,
        googleApiError,
        60,
      );
    }
  }
  return error; // Fallback to original error if no specific classification fits.
 }
--- a/packages/core/src/utils/quotaErrorDetection.ts
+++ b/packages/core/src/utils/quotaErrorDetection.ts
@@ -33,68 +33,3 @@ export function isStructuredError(error: unknown): error is StructuredError {
    typeof (error as StructuredError).message === 'string'
  );
 }
 export function isProQuotaExceededError(error: unknown): boolean {
  // Check for Pro quota exceeded errors by looking for the specific pattern
  // This will match patterns like:
  // - "Quota exceeded for quota metric 'Gemini 2.5 Pro Requests'"
  // - "Quota exceeded for quota metric 'Gemini 2.5-preview Pro Requests'"
  // We use string methods instead of regex to avoid ReDoS vulnerabilities
  const checkMessage = (message: string): boolean =>
    message.includes("Quota exceeded for quota metric 'Gemini") &&
    message.includes("Pro Requests'");
  if (typeof error === 'string') {
    return checkMessage(error);
  }
  if (isStructuredError(error)) {
    return checkMessage(error.message);
  }
  if (isApiError(error)) {
    return checkMessage(error.error.message);
  }
  // Check if it's a Gaxios error with response data
  if (error && typeof error === 'object' && 'response' in error) {
    const gaxiosError = error as {
      response?: {
        data?: unknown;
      };
    };
    if (gaxiosError.response && gaxiosError.response.data) {
      if (typeof gaxiosError.response.data === 'string') {
        return checkMessage(gaxiosError.response.data);
      }
      if (
        typeof gaxiosError.response.data === 'object' &&
        gaxiosError.response.data !== null &&
        'error' in gaxiosError.response.data
      ) {
        const errorData = gaxiosError.response.data as {
          error?: { message?: string };
        };
        return checkMessage(errorData.error?.message || '');
      }
    }
  }
  return false;
 }
 export function isGenericQuotaExceededError(error: unknown): boolean {
  if (typeof error === 'string') {
    return error.includes('Quota exceeded for quota metric');
  }
  if (isStructuredError(error)) {
    return error.message.includes('Quota exceeded for quota metric');
  }
  if (isApiError(error)) {
    return error.error.message.includes('Quota exceeded for quota metric');
  }
  return false;
 }
--- a/packages/core/src/utils/retry.test.ts
+++ b/packages/core/src/utils/retry.test.ts
@@ -7,10 +7,15 @@
 /* eslint-disable @typescript-eslint/no-explicit-any */
 import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
 import { ApiError } from '@google/genai';
 import { AuthType } from '../core/contentGenerator.js';
 import type { HttpError } from './retry.js';
 import { retryWithBackoff } from './retry.js';
 import { setSimulate429 } from './testUtils.js';
 import { debugLogger } from './debugLogger.js';
 import {
  TerminalQuotaError,
  RetryableQuotaError,
 } from './googleQuotaErrors.js';
 // Helper to create a mock function that fails a certain number of times
 const createFailingFunction = (
@@ -100,26 +105,26 @@ describe('retryWithBackoff', () => {
    // Expect it to fail with the error from the 5th attempt.
    await Promise.all([
-      expect(promise).rejects.toThrow('Simulated error attempt 5'),
+      expect(promise).rejects.toThrow('Simulated error attempt 3'),
      vi.runAllTimersAsync(),
    ]);
-    expect(mockFn).toHaveBeenCalledTimes(5);
+    expect(mockFn).toHaveBeenCalledTimes(3);
  });
-  it('should default to 5 maxAttempts if options.maxAttempts is undefined', async () => {
+  it('should default to 3 maxAttempts if options.maxAttempts is undefined', async () => {
-    // This function will fail more than 5 times to ensure all retries are used.
+    // This function will fail more than 3 times to ensure all retries are used.
    const mockFn = createFailingFunction(10);
    const promise = retryWithBackoff(mockFn, { maxAttempts: undefined });
    // Expect it to fail with the error from the 5th attempt.
    await Promise.all([
-      expect(promise).rejects.toThrow('Simulated error attempt 5'),
+      expect(promise).rejects.toThrow('Simulated error attempt 3'),
      vi.runAllTimersAsync(),
    ]);
-    expect(mockFn).toHaveBeenCalledTimes(5);
+    expect(mockFn).toHaveBeenCalledTimes(3);
  });
  it('should not retry if shouldRetry returns false', async () => {
@@ -336,15 +341,13 @@ describe('retryWithBackoff', () => {
  });
  describe('Flash model fallback for OAuth users', () => {
-    it('should trigger fallback for OAuth personal users after persistent 429 errors', async () => {
+    it('should trigger fallback for OAuth personal users on TerminalQuotaError', async () => {
      const fallbackCallback = vi.fn().mockResolvedValue('gemini-2.5-flash');
      let fallbackOccurred = false;
      const mockFn = vi.fn().mockImplementation(async () => {
        if (!fallbackOccurred) {
-          const error: HttpError = new Error('Rate limit exceeded');
+          throw new TerminalQuotaError('Daily limit reached', {} as any);
          error.status = 429;
          throw error;
        }
        return 'success';
      });
@@ -352,154 +355,62 @@ describe('retryWithBackoff', () => {
      const promise = retryWithBackoff(mockFn, {
        maxAttempts: 3,
        initialDelayMs: 100,
-        onPersistent429: async (authType?: string) => {
+        onPersistent429: async (authType?: string, error?: unknown) => {
          fallbackOccurred = true;
-          return await fallbackCallback(authType);
+          return await fallbackCallback(authType, error);
        },
        authType: 'oauth-personal',
      });
      // Advance all timers to complete retries
      await vi.runAllTimersAsync();
      // Should succeed after fallback
      await expect(promise).resolves.toBe('success');
      // Verify callback was called with correct auth type
      expect(fallbackCallback).toHaveBeenCalledWith('oauth-personal');
      // Should retry again after fallback
      expect(mockFn).toHaveBeenCalledTimes(3); // 2 initial attempts + 1 after fallback
    });
    it('should NOT trigger fallback for API key users', async () => {
      const fallbackCallback = vi.fn();
      const mockFn = vi.fn(async () => {
        const error: HttpError = new Error('Rate limit exceeded');
        error.status = 429;
        throw error;
      });
      const promise = retryWithBackoff(mockFn, {
        maxAttempts: 3,
        initialDelayMs: 100,
        onPersistent429: fallbackCallback,
        authType: 'gemini-api-key',
      });
      // Handle the promise properly to avoid unhandled rejections
      const resultPromise = promise.catch((error) => error);
      await vi.runAllTimersAsync();
      const result = await resultPromise;
      // Should fail after all retries without fallback
      expect(result).toBeInstanceOf(Error);
      expect(result.message).toBe('Rate limit exceeded');
      // Callback should not be called for API key users
      expect(fallbackCallback).not.toHaveBeenCalled();
    });
    it('should reset attempt counter and continue after successful fallback', async () => {
      let fallbackCalled = false;
      const fallbackCallback = vi.fn().mockImplementation(async () => {
        fallbackCalled = true;
        return 'gemini-2.5-flash';
      });
      const mockFn = vi.fn().mockImplementation(async () => {
        if (!fallbackCalled) {
          const error: HttpError = new Error('Rate limit exceeded');
          error.status = 429;
          throw error;
        }
        return 'success';
      });
      const promise = retryWithBackoff(mockFn, {
        maxAttempts: 3,
        initialDelayMs: 100,
        onPersistent429: fallbackCallback,
        authType: 'oauth-personal',
      });
      await vi.runAllTimersAsync();
      await expect(promise).resolves.toBe('success');
      expect(fallbackCallback).toHaveBeenCalledOnce();
    });
    it('should continue with original error if fallback is rejected', async () => {
      const fallbackCallback = vi.fn().mockResolvedValue(null); // User rejected fallback
      const mockFn = vi.fn(async () => {
        const error: HttpError = new Error('Rate limit exceeded');
        error.status = 429;
        throw error;
      });
      const promise = retryWithBackoff(mockFn, {
        maxAttempts: 3,
        initialDelayMs: 100,
        onPersistent429: fallbackCallback,
        authType: 'oauth-personal',
      });
      // Handle the promise properly to avoid unhandled rejections
      const resultPromise = promise.catch((error) => error);
      await vi.runAllTimersAsync();
      const result = await resultPromise;
      // Should fail with original error when fallback is rejected
      expect(result).toBeInstanceOf(Error);
      expect(result.message).toBe('Rate limit exceeded');
      expect(fallbackCallback).toHaveBeenCalledWith(
        'oauth-personal',
-        expect.any(Error),
+        expect.any(TerminalQuotaError),
      );
      expect(mockFn).toHaveBeenCalledTimes(2);
    });
-    it('should handle mixed error types (only count consecutive 429s)', async () => {
+    it('should use retryDelayMs from RetryableQuotaError', async () => {
-      const fallbackCallback = vi.fn().mockResolvedValue('gemini-2.5-flash');
+      const setTimeoutSpy = vi.spyOn(global, 'setTimeout');
      let attempts = 0;
      let fallbackOccurred = false;
      const mockFn = vi.fn().mockImplementation(async () => {
-        attempts++;
+        throw new RetryableQuotaError('Per-minute limit', {} as any, 12.345);
        if (fallbackOccurred) {
          return 'success';
        }
        if (attempts === 1) {
          // First attempt: 500 error (resets consecutive count)
          const error: HttpError = new Error('Server error');
          error.status = 500;
          throw error;
        } else {
          // Remaining attempts: 429 errors
          const error: HttpError = new Error('Rate limit exceeded');
          error.status = 429;
          throw error;
        }
      });
      const promise = retryWithBackoff(mockFn, {
-        maxAttempts: 5,
+        maxAttempts: 2,
        initialDelayMs: 100,
        onPersistent429: async (authType?: string) => {
          fallbackOccurred = true;
          return await fallbackCallback(authType);
        },
        authType: 'oauth-personal',
      });
      // Attach the rejection expectation *before* running timers
      // eslint-disable-next-line vitest/valid-expect
      const assertionPromise = expect(promise).rejects.toThrow();
      await vi.runAllTimersAsync();
      await assertionPromise;
-      await expect(promise).resolves.toBe('success');
+      expect(setTimeoutSpy).toHaveBeenCalledWith(expect.any(Function), 12345);
      // Should trigger fallback after 2 consecutive 429s (attempts 2-3)
      expect(fallbackCallback).toHaveBeenCalledWith('oauth-personal');
    });
    it.each([[AuthType.USE_GEMINI], [AuthType.USE_VERTEX_AI], [undefined]])(
      'should not trigger fallback for non-Google auth users (authType: %s) on TerminalQuotaError',
      async (authType) => {
        const fallbackCallback = vi.fn();
        const mockFn = vi.fn().mockImplementation(async () => {
          throw new TerminalQuotaError('Daily limit reached', {} as any);
        });
        const promise = retryWithBackoff(mockFn, {
          maxAttempts: 3,
          onPersistent429: fallbackCallback,
          authType,
        });
        await expect(promise).rejects.toThrow('Daily limit reached');
        expect(fallbackCallback).not.toHaveBeenCalled();
        expect(mockFn).toHaveBeenCalledTimes(1);
      },
    );
  });
  it('should abort the retry loop when the signal is aborted', async () => {
    const abortController = new AbortController();
--- a/packages/core/src/utils/retry.ts
+++ b/packages/core/src/utils/retry.ts
@@ -8,9 +8,10 @@ import type { GenerateContentResponse } from '@google/genai';
 import { ApiError } from '@google/genai';
 import { AuthType } from '../core/contentGenerator.js';
 import {
-  isProQuotaExceededError,
+  classifyGoogleError,
-  isGenericQuotaExceededError,
+  RetryableQuotaError,
-} from './quotaErrorDetection.js';
+  TerminalQuotaError,
 } from './googleQuotaErrors.js';
 import { delay, createAbortError } from './delay.js';
 import { debugLogger } from './debugLogger.js';
@@ -37,7 +38,7 @@ export interface RetryOptions {
 }
 const DEFAULT_RETRY_OPTIONS: RetryOptions = {
-  maxAttempts: 5,
+  maxAttempts: 3,
  initialDelayMs: 5000,
  maxDelayMs: 30000, // 30 seconds
  shouldRetryOnError: defaultShouldRetry,
@@ -118,7 +119,6 @@ export async function retryWithBackoff<T>(
  let attempt = 0;
  let currentDelay = initialDelayMs;
  let consecutive429Count = 0;
  while (attempt < maxAttempts) {
    if (signal?.aborted) {
@@ -145,94 +145,54 @@ export async function retryWithBackoff<T>(
        throw error;
      }
-      const errorStatus = getErrorStatus(error);
+      const classifiedError = classifyGoogleError(error);
-      // Check for Pro quota exceeded error first - immediate fallback for OAuth users
+      if (classifiedError instanceof TerminalQuotaError) {
-      if (
+        if (onPersistent429 && authType === AuthType.LOGIN_WITH_GOOGLE) {
-        errorStatus === 429 &&
+          try {
-        authType === AuthType.LOGIN_WITH_GOOGLE &&
+            const fallbackModel = await onPersistent429(
-        isProQuotaExceededError(error) &&
+              authType,
-        onPersistent429
+              classifiedError,
-      ) {
+            );
-        try {
+            if (fallbackModel) {
-          const fallbackModel = await onPersistent429(authType, error);
+              attempt = 0; // Reset attempts and retry with the new model.
-          if (fallbackModel !== false && fallbackModel !== null) {
+              currentDelay = initialDelayMs;
-            // Reset attempt counter and try with new model
+              continue;
-            attempt = 0;
+            }
-            consecutive429Count = 0;
+          } catch (fallbackError) {
-            currentDelay = initialDelayMs;
+            debugLogger.warn('Fallback to Flash model failed:', fallbackError);
            // With the model updated, we continue to the next attempt
            continue;
          } else {
            // Fallback handler returned null/false, meaning don't continue - stop retry process
            throw error;
          }
        } catch (fallbackError) {
          // If fallback fails, continue with original error
          debugLogger.warn('Fallback to Flash model failed:', fallbackError);
        }
        throw classifiedError; // Throw if no fallback or fallback failed.
      }
-      // Check for generic quota exceeded error (but not Pro, which was handled above) - immediate fallback for OAuth users
+      if (classifiedError instanceof RetryableQuotaError) {
-      if (
+        if (attempt >= maxAttempts) {
-        errorStatus === 429 &&
+          if (onPersistent429 && authType === AuthType.LOGIN_WITH_GOOGLE) {
-        authType === AuthType.LOGIN_WITH_GOOGLE &&
+            try {
-        !isProQuotaExceededError(error) &&
+              const fallbackModel = await onPersistent429(
-        isGenericQuotaExceededError(error) &&
+                authType,
-        onPersistent429
+                classifiedError,
-      ) {
+              );
-        try {
+              if (fallbackModel) {
-          const fallbackModel = await onPersistent429(authType, error);
+                attempt = 0; // Reset attempts and retry with the new model.
-          if (fallbackModel !== false && fallbackModel !== null) {
+                currentDelay = initialDelayMs;
-            // Reset attempt counter and try with new model
+                continue;
-            attempt = 0;
+              }
-            consecutive429Count = 0;
+            } catch (fallbackError) {
-            currentDelay = initialDelayMs;
+              console.warn('Model fallback failed:', fallbackError);
-            // With the model updated, we continue to the next attempt
+            }
            continue;
          } else {
            // Fallback handler returned null/false, meaning don't continue - stop retry process
            throw error;
          }
-        } catch (fallbackError) {
+          throw classifiedError;
          // If fallback fails, continue with original error
          debugLogger.warn('Fallback to Flash model failed:', fallbackError);
        }
        console.warn(
          `Attempt ${attempt} failed: ${classifiedError.message}. Retrying after ${classifiedError.retryDelayMs}ms...`,
        );
        await delay(classifiedError.retryDelayMs, signal);
        continue;
      }
-      // Track consecutive 429 errors
+      // Generic retry logic for other errors
      if (errorStatus === 429) {
        consecutive429Count++;
      } else {
        consecutive429Count = 0;
      }
      // If we have persistent 429s and a fallback callback for OAuth
      if (
        consecutive429Count >= 2 &&
        onPersistent429 &&
        authType === AuthType.LOGIN_WITH_GOOGLE
      ) {
        try {
          const fallbackModel = await onPersistent429(authType, error);
          if (fallbackModel !== false && fallbackModel !== null) {
            // Reset attempt counter and try with new model
            attempt = 0;
            consecutive429Count = 0;
            currentDelay = initialDelayMs;
            // With the model updated, we continue to the next attempt
            continue;
          } else {
            // Fallback handler returned null/false, meaning don't continue - stop retry process
            throw error;
          }
        } catch (fallbackError) {
          // If fallback fails, continue with original error
          debugLogger.warn('Fallback to Flash model failed:', fallbackError);
        }
      }
      // Check if we've exhausted retries or shouldn't retry
      if (
        attempt >= maxAttempts ||
        !shouldRetryOnError(error as Error, retryFetchErrors)
@@ -240,31 +200,17 @@ export async function retryWithBackoff<T>(
        throw error;
      }
-      const { delayDurationMs, errorStatus: delayErrorStatus } =
+      const errorStatus = getErrorStatus(error);
-        getDelayDurationAndStatus(error);
+      logRetryAttempt(attempt, error, errorStatus);
-      if (delayDurationMs > 0) {
+      // Exponential backoff with jitter for non-quota errors
-        // Respect Retry-After header if present and parsed
+      const jitter = currentDelay * 0.3 * (Math.random() * 2 - 1);
-        debugLogger.warn(
+      const delayWithJitter = Math.max(0, currentDelay + jitter);
-          `Attempt ${attempt} failed with status ${delayErrorStatus ?? 'unknown'}. Retrying after explicit delay of ${delayDurationMs}ms...`,
+      await delay(delayWithJitter, signal);
-          error,
+      currentDelay = Math.min(maxDelayMs, currentDelay * 2);
        );
        await delay(delayDurationMs, signal);
        // Reset currentDelay for next potential non-429 error, or if Retry-After is not present next time
        currentDelay = initialDelayMs;
      } else {
        // Fall back to exponential backoff with jitter
        logRetryAttempt(attempt, error, errorStatus);
        // Add jitter: +/- 30% of currentDelay
        const jitter = currentDelay * 0.3 * (Math.random() * 2 - 1);
        const delayWithJitter = Math.max(0, currentDelay + jitter);
        await delay(delayWithJitter, signal);
        currentDelay = Math.min(maxDelayMs, currentDelay * 2);
      }
    }
  }
-  // This line should theoretically be unreachable due to the throw in the catch block.
+
  // Added for type safety and to satisfy the compiler that a promise is always returned.
  throw new Error('Retry attempts exhausted');
 }
@@ -295,62 +241,6 @@ export function getErrorStatus(error: unknown): number | undefined {
  return undefined;
 }
 /**
 * Extracts the Retry-After delay from an error object's headers.
 * @param error The error object.
 * @returns The delay in milliseconds, or 0 if not found or invalid.
 */
 function getRetryAfterDelayMs(error: unknown): number {
  if (typeof error === 'object' && error !== null) {
    // Check for error.response.headers (common in axios errors)
    if (
      'response' in error &&
      typeof (error as { response?: unknown }).response === 'object' &&
      (error as { response?: unknown }).response !== null
    ) {
      const response = (error as { response: { headers?: unknown } }).response;
      if (
        'headers' in response &&
        typeof response.headers === 'object' &&
        response.headers !== null
      ) {
        const headers = response.headers as { 'retry-after'?: unknown };
        const retryAfterHeader = headers['retry-after'];
        if (typeof retryAfterHeader === 'string') {
          const retryAfterSeconds = parseInt(retryAfterHeader, 10);
          if (!isNaN(retryAfterSeconds)) {
            return retryAfterSeconds * 1000;
          }
          // It might be an HTTP date
          const retryAfterDate = new Date(retryAfterHeader);
          if (!isNaN(retryAfterDate.getTime())) {
            return Math.max(0, retryAfterDate.getTime() - Date.now());
          }
        }
      }
    }
  }
  return 0;
 }
 /**
 * Determines the delay duration based on the error, prioritizing Retry-After header.
 * @param error The error object.
 * @returns An object containing the delay duration in milliseconds and the error status.
 */
 function getDelayDurationAndStatus(error: unknown): {
  delayDurationMs: number;
  errorStatus: number | undefined;
 } {
  const errorStatus = getErrorStatus(error);
  let delayDurationMs = 0;
  if (errorStatus === 429) {
    delayDurationMs = getRetryAfterDelayMs(error);
  }
  return { delayDurationMs, errorStatus };
 }
 /**
 * Logs a message for a retry attempt when using exponential backoff.
 * @param attempt The current attempt number.