From 996c9f59552fc55fbd1552e93f44c56ddadd5f59 Mon Sep 17 00:00:00 2001 From: Gaurav <39389231+gsquared94@users.noreply.github.com> Date: Tue, 14 Oct 2025 16:30:59 -0700 Subject: [PATCH] Revert "fix: handle request retries and model fallback correctly" (#11164) --- .../src/ui/hooks/useQuotaAndFallback.test.ts | 67 ++++- .../cli/src/ui/hooks/useQuotaAndFallback.ts | 20 +- packages/core/index.ts | 2 - packages/core/src/utils/errorParsing.test.ts | 244 +++++++++++++++++ packages/core/src/utils/errorParsing.ts | 91 ++++++- packages/core/src/utils/flashFallback.test.ts | 76 +++--- packages/core/src/utils/googleErrors.test.ts | 250 ------------------ packages/core/src/utils/googleErrors.ts | 242 ----------------- .../core/src/utils/googleQuotaErrors.test.ts | 205 -------------- packages/core/src/utils/googleQuotaErrors.ts | 162 ------------ .../core/src/utils/quotaErrorDetection.ts | 65 +++++ packages/core/src/utils/retry.test.ts | 209 ++++++++++----- packages/core/src/utils/retry.ts | 201 +++++++++++--- 13 files changed, 821 insertions(+), 1013 deletions(-) delete mode 100644 packages/core/src/utils/googleErrors.test.ts delete mode 100644 packages/core/src/utils/googleErrors.ts delete mode 100644 packages/core/src/utils/googleQuotaErrors.test.ts delete mode 100644 packages/core/src/utils/googleQuotaErrors.ts diff --git a/packages/cli/src/ui/hooks/useQuotaAndFallback.test.ts b/packages/cli/src/ui/hooks/useQuotaAndFallback.test.ts index 424bf962aa..6d7782694f 100644 --- a/packages/cli/src/ui/hooks/useQuotaAndFallback.test.ts +++ b/packages/cli/src/ui/hooks/useQuotaAndFallback.test.ts @@ -19,14 +19,25 @@ import { type FallbackModelHandler, UserTierId, AuthType, - TerminalQuotaError, + isGenericQuotaExceededError, + isProQuotaExceededError, makeFakeConfig, - type GoogleApiError, } from '@google/gemini-cli-core'; import { useQuotaAndFallback } from './useQuotaAndFallback.js'; import type { UseHistoryManagerReturn } from './useHistoryManager.js'; import { AuthState, MessageType } from '../types.js'; +// Mock the error checking functions from the core package to control test scenarios +vi.mock('@google/gemini-cli-core', async (importOriginal) => { + const original = + await importOriginal(); + return { + ...original, + isGenericQuotaExceededError: vi.fn(), + isProQuotaExceededError: vi.fn(), + }; +}); + // Use a type alias for SpyInstance as it's not directly exported type SpyInstance = ReturnType; @@ -36,15 +47,12 @@ describe('useQuotaAndFallback', () => { let mockSetAuthState: Mock; let mockSetModelSwitchedFromQuotaError: Mock; let setFallbackHandlerSpy: SpyInstance; - let mockGoogleApiError: GoogleApiError; + + const mockedIsGenericQuotaExceededError = isGenericQuotaExceededError as Mock; + const mockedIsProQuotaExceededError = isProQuotaExceededError as Mock; beforeEach(() => { mockConfig = makeFakeConfig(); - mockGoogleApiError = { - code: 429, - message: 'mock error', - details: [], - }; // Spy on the method that requires the private field and mock its return. // This is cleaner than modifying the config class for tests. @@ -64,6 +72,9 @@ describe('useQuotaAndFallback', () => { setFallbackHandlerSpy = vi.spyOn(mockConfig, 'setFallbackModelHandler'); vi.spyOn(mockConfig, 'setQuotaErrorOccurred'); + + mockedIsGenericQuotaExceededError.mockReturnValue(false); + mockedIsProQuotaExceededError.mockReturnValue(false); }); afterEach(() => { @@ -128,6 +139,22 @@ describe('useQuotaAndFallback', () => { describe('Automatic Fallback Scenarios', () => { const testCases = [ + { + errorType: 'generic', + tier: UserTierId.FREE, + expectedMessageSnippets: [ + 'Automatically switching from model-A to model-B', + 'upgrade to a Gemini Code Assist Standard or Enterprise plan', + ], + }, + { + errorType: 'generic', + tier: UserTierId.STANDARD, // Paid tier + expectedMessageSnippets: [ + 'Automatically switching from model-A to model-B', + 'switch to using a paid API key from AI Studio', + ], + }, { errorType: 'other', tier: UserTierId.FREE, @@ -148,11 +175,15 @@ describe('useQuotaAndFallback', () => { for (const { errorType, tier, expectedMessageSnippets } of testCases) { it(`should handle ${errorType} error for ${tier} tier correctly`, async () => { + mockedIsGenericQuotaExceededError.mockReturnValue( + errorType === 'generic', + ); + const handler = getRegisteredHandler(tier); const result = await handler( 'model-A', 'model-B', - new Error('some error'), + new Error('quota exceeded'), ); // Automatic fallbacks should return 'stop' @@ -176,6 +207,10 @@ describe('useQuotaAndFallback', () => { }); describe('Interactive Fallback (Pro Quota Error)', () => { + beforeEach(() => { + mockedIsProQuotaExceededError.mockReturnValue(true); + }); + it('should set an interactive request and wait for user choice', async () => { const { result } = renderHook(() => useQuotaAndFallback({ @@ -194,7 +229,7 @@ describe('useQuotaAndFallback', () => { const promise = handler( 'gemini-pro', 'gemini-flash', - new TerminalQuotaError('pro quota', mockGoogleApiError), + new Error('pro quota'), ); await act(async () => {}); @@ -233,7 +268,7 @@ describe('useQuotaAndFallback', () => { const promise1 = handler( 'gemini-pro', 'gemini-flash', - new TerminalQuotaError('pro quota 1', mockGoogleApiError), + new Error('pro quota 1'), ); await act(async () => {}); @@ -243,7 +278,7 @@ describe('useQuotaAndFallback', () => { const result2 = await handler( 'gemini-pro', 'gemini-flash', - new TerminalQuotaError('pro quota 2', mockGoogleApiError), + new Error('pro quota 2'), ); // The lock should have stopped the second request @@ -262,6 +297,10 @@ describe('useQuotaAndFallback', () => { }); describe('handleProQuotaChoice', () => { + beforeEach(() => { + mockedIsProQuotaExceededError.mockReturnValue(true); + }); + it('should do nothing if there is no pending pro quota request', () => { const { result } = renderHook(() => useQuotaAndFallback({ @@ -297,7 +336,7 @@ describe('useQuotaAndFallback', () => { const promise = handler( 'gemini-pro', 'gemini-flash', - new TerminalQuotaError('pro quota', mockGoogleApiError), + new Error('pro quota'), ); await act(async () => {}); // Allow state to update @@ -328,7 +367,7 @@ describe('useQuotaAndFallback', () => { const promise = handler( 'gemini-pro', 'gemini-flash', - new TerminalQuotaError('pro quota', mockGoogleApiError), + new Error('pro quota'), ); await act(async () => {}); // Allow state to update diff --git a/packages/cli/src/ui/hooks/useQuotaAndFallback.ts b/packages/cli/src/ui/hooks/useQuotaAndFallback.ts index b196676e3b..a7eb77659a 100644 --- a/packages/cli/src/ui/hooks/useQuotaAndFallback.ts +++ b/packages/cli/src/ui/hooks/useQuotaAndFallback.ts @@ -9,7 +9,8 @@ import { type Config, type FallbackModelHandler, type FallbackIntent, - TerminalQuotaError, + isGenericQuotaExceededError, + isProQuotaExceededError, UserTierId, } from '@google/gemini-cli-core'; import { useCallback, useEffect, useRef, useState } from 'react'; @@ -62,7 +63,7 @@ export function useQuotaAndFallback({ let message: string; - if (error instanceof TerminalQuotaError) { + if (error && isProQuotaExceededError(error)) { // Pro Quota specific messages (Interactive) if (isPaidTier) { message = `⚡ You have reached your daily ${failedModel} quota limit. @@ -73,6 +74,19 @@ export function useQuotaAndFallback({ ⚡ You can choose to authenticate with a paid API key or continue with the fallback model. ⚡ To increase your limits, upgrade to a Gemini Code Assist Standard or Enterprise plan with higher limits at https://goo.gle/set-up-gemini-code-assist ⚡ Or you can utilize a Gemini API Key. See: https://goo.gle/gemini-cli-docs-auth#gemini-api-key +⚡ You can switch authentication methods by typing /auth`; + } + } else if (error && isGenericQuotaExceededError(error)) { + // Generic Quota (Automatic fallback) + const actionMessage = `⚡ You have reached your daily quota limit.\n⚡ Automatically switching from ${failedModel} to ${fallbackModel} for the remainder of this session.`; + + if (isPaidTier) { + message = `${actionMessage} +⚡ To continue accessing the ${failedModel} model today, consider using /auth to switch to using a paid API key from AI Studio at https://aistudio.google.com/apikey`; + } else { + message = `${actionMessage} +⚡ To increase your limits, upgrade to a Gemini Code Assist Standard or Enterprise plan with higher limits at https://goo.gle/set-up-gemini-code-assist +⚡ Or you can utilize a Gemini API Key. See: https://goo.gle/gemini-cli-docs-auth#gemini-api-key ⚡ You can switch authentication methods by typing /auth`; } } else { @@ -105,7 +119,7 @@ export function useQuotaAndFallback({ config.setQuotaErrorOccurred(true); // Interactive Fallback for Pro quota - if (error instanceof TerminalQuotaError) { + if (error && isProQuotaExceededError(error)) { if (isDialogPending.current) { return 'stop'; // A dialog is already active, so just stop this request. } diff --git a/packages/core/index.ts b/packages/core/index.ts index acc9743e61..729fcc8d48 100644 --- a/packages/core/index.ts +++ b/packages/core/index.ts @@ -44,5 +44,3 @@ export { makeFakeConfig } from './src/test-utils/config.js'; export * from './src/utils/pathReader.js'; export { ClearcutLogger } from './src/telemetry/clearcut-logger/clearcut-logger.js'; export { logModelSlashCommand } from './src/telemetry/loggers.js'; -export * from './src/utils/googleQuotaErrors.js'; -export type { GoogleApiError } from './src/utils/googleErrors.js'; diff --git a/packages/core/src/utils/errorParsing.test.ts b/packages/core/src/utils/errorParsing.test.ts index 291145d2e8..9c71f4d89b 100644 --- a/packages/core/src/utils/errorParsing.test.ts +++ b/packages/core/src/utils/errorParsing.test.ts @@ -6,7 +6,9 @@ import { describe, it, expect } from 'vitest'; import { parseAndFormatApiError } from './errorParsing.js'; +import { isProQuotaExceededError } from './quotaErrorDetection.js'; import { DEFAULT_GEMINI_FLASH_MODEL } from '../config/models.js'; +import { UserTierId } from '../code_assist/types.js'; import { AuthType } from '../core/contentGenerator.js'; import type { StructuredError } from '../core/turn.js'; @@ -38,6 +40,22 @@ describe('parseAndFormatApiError', () => { ); }); + it('should format a 429 API error with the personal message', () => { + const errorMessage = + 'got status: 429 Too Many Requests. {"error":{"code":429,"message":"Rate limit exceeded","status":"RESOURCE_EXHAUSTED"}}'; + const result = parseAndFormatApiError( + errorMessage, + AuthType.LOGIN_WITH_GOOGLE, + undefined, + 'gemini-2.5-pro', + DEFAULT_GEMINI_FLASH_MODEL, + ); + expect(result).toContain('[API Error: Rate limit exceeded'); + expect(result).toContain( + 'Possible quota limitations in place or slow response times detected. Switching to the gemini-2.5-flash model', + ); + }); + it('should format a 429 API error with the vertex message', () => { const errorMessage = 'got status: 429 Too Many Requests. {"error":{"code":429,"message":"Rate limit exceeded","status":"RESOURCE_EXHAUSTED"}}'; @@ -114,4 +132,230 @@ describe('parseAndFormatApiError', () => { const expected = '[API Error: An unknown error occurred.]'; expect(parseAndFormatApiError(error)).toBe(expected); }); + + it('should format a 429 API error with Pro quota exceeded message for Google auth (Free tier)', () => { + const errorMessage = + 'got status: 429 Too Many Requests. {"error":{"code":429,"message":"Quota exceeded for quota metric \'Gemini 2.5 Pro Requests\' and limit \'RequestsPerDay\' of service \'generativelanguage.googleapis.com\' for consumer \'project_number:123456789\'.","status":"RESOURCE_EXHAUSTED"}}'; + const result = parseAndFormatApiError( + errorMessage, + AuthType.LOGIN_WITH_GOOGLE, + undefined, + 'gemini-2.5-pro', + DEFAULT_GEMINI_FLASH_MODEL, + ); + expect(result).toContain( + "[API Error: Quota exceeded for quota metric 'Gemini 2.5 Pro Requests'", + ); + expect(result).toContain( + 'You have reached your daily gemini-2.5-pro quota limit', + ); + expect(result).toContain('upgrade to get higher limits'); + }); + + it('should format a regular 429 API error with standard message for Google auth', () => { + const errorMessage = + 'got status: 429 Too Many Requests. {"error":{"code":429,"message":"Rate limit exceeded","status":"RESOURCE_EXHAUSTED"}}'; + const result = parseAndFormatApiError( + errorMessage, + AuthType.LOGIN_WITH_GOOGLE, + undefined, + 'gemini-2.5-pro', + DEFAULT_GEMINI_FLASH_MODEL, + ); + expect(result).toContain('[API Error: Rate limit exceeded'); + expect(result).toContain( + 'Possible quota limitations in place or slow response times detected. Switching to the gemini-2.5-flash model', + ); + expect(result).not.toContain( + 'You have reached your daily gemini-2.5-pro quota limit', + ); + }); + + it('should format a 429 API error with generic quota exceeded message for Google auth', () => { + const errorMessage = + 'got status: 429 Too Many Requests. {"error":{"code":429,"message":"Quota exceeded for quota metric \'GenerationRequests\' and limit \'RequestsPerDay\' of service \'generativelanguage.googleapis.com\' for consumer \'project_number:123456789\'.","status":"RESOURCE_EXHAUSTED"}}'; + const result = parseAndFormatApiError( + errorMessage, + AuthType.LOGIN_WITH_GOOGLE, + undefined, + 'gemini-2.5-pro', + DEFAULT_GEMINI_FLASH_MODEL, + ); + expect(result).toContain( + "[API Error: Quota exceeded for quota metric 'GenerationRequests'", + ); + expect(result).toContain('You have reached your daily quota limit'); + expect(result).not.toContain( + 'You have reached your daily Gemini 2.5 Pro quota limit', + ); + }); + + it('should prioritize Pro quota message over generic quota message for Google auth', () => { + const errorMessage = + 'got status: 429 Too Many Requests. {"error":{"code":429,"message":"Quota exceeded for quota metric \'Gemini 2.5 Pro Requests\' and limit \'RequestsPerDay\' of service \'generativelanguage.googleapis.com\' for consumer \'project_number:123456789\'.","status":"RESOURCE_EXHAUSTED"}}'; + const result = parseAndFormatApiError( + errorMessage, + AuthType.LOGIN_WITH_GOOGLE, + undefined, + 'gemini-2.5-pro', + DEFAULT_GEMINI_FLASH_MODEL, + ); + expect(result).toContain( + "[API Error: Quota exceeded for quota metric 'Gemini 2.5 Pro Requests'", + ); + expect(result).toContain( + 'You have reached your daily gemini-2.5-pro quota limit', + ); + expect(result).not.toContain('You have reached your daily quota limit'); + }); + + it('should format a 429 API error with Pro quota exceeded message for Google auth (Standard tier)', () => { + const errorMessage = + 'got status: 429 Too Many Requests. {"error":{"code":429,"message":"Quota exceeded for quota metric \'Gemini 2.5 Pro Requests\' and limit \'RequestsPerDay\' of service \'generativelanguage.googleapis.com\' for consumer \'project_number:123456789\'.","status":"RESOURCE_EXHAUSTED"}}'; + const result = parseAndFormatApiError( + errorMessage, + AuthType.LOGIN_WITH_GOOGLE, + UserTierId.STANDARD, + 'gemini-2.5-pro', + DEFAULT_GEMINI_FLASH_MODEL, + ); + expect(result).toContain( + "[API Error: Quota exceeded for quota metric 'Gemini 2.5 Pro Requests'", + ); + expect(result).toContain( + 'You have reached your daily gemini-2.5-pro quota limit', + ); + expect(result).toContain( + 'We appreciate you for choosing Gemini Code Assist and the Gemini CLI', + ); + expect(result).not.toContain('upgrade to get higher limits'); + }); + + it('should format a 429 API error with Pro quota exceeded message for Google auth (Legacy tier)', () => { + const errorMessage = + 'got status: 429 Too Many Requests. {"error":{"code":429,"message":"Quota exceeded for quota metric \'Gemini 2.5 Pro Requests\' and limit \'RequestsPerDay\' of service \'generativelanguage.googleapis.com\' for consumer \'project_number:123456789\'.","status":"RESOURCE_EXHAUSTED"}}'; + const result = parseAndFormatApiError( + errorMessage, + AuthType.LOGIN_WITH_GOOGLE, + UserTierId.LEGACY, + 'gemini-2.5-pro', + DEFAULT_GEMINI_FLASH_MODEL, + ); + expect(result).toContain( + "[API Error: Quota exceeded for quota metric 'Gemini 2.5 Pro Requests'", + ); + expect(result).toContain( + 'You have reached your daily gemini-2.5-pro quota limit', + ); + expect(result).toContain( + 'We appreciate you for choosing Gemini Code Assist and the Gemini CLI', + ); + expect(result).not.toContain('upgrade to get higher limits'); + }); + + it('should handle different Gemini 2.5 version strings in Pro quota exceeded errors', () => { + const errorMessage25 = + 'got status: 429 Too Many Requests. {"error":{"code":429,"message":"Quota exceeded for quota metric \'Gemini 2.5 Pro Requests\' and limit \'RequestsPerDay\' of service \'generativelanguage.googleapis.com\' for consumer \'project_number:123456789\'.","status":"RESOURCE_EXHAUSTED"}}'; + const errorMessagePreview = + 'got status: 429 Too Many Requests. {"error":{"code":429,"message":"Quota exceeded for quota metric \'Gemini 2.5-preview Pro Requests\' and limit \'RequestsPerDay\' of service \'generativelanguage.googleapis.com\' for consumer \'project_number:123456789\'.","status":"RESOURCE_EXHAUSTED"}}'; + + const result25 = parseAndFormatApiError( + errorMessage25, + AuthType.LOGIN_WITH_GOOGLE, + undefined, + 'gemini-2.5-pro', + DEFAULT_GEMINI_FLASH_MODEL, + ); + const resultPreview = parseAndFormatApiError( + errorMessagePreview, + AuthType.LOGIN_WITH_GOOGLE, + undefined, + 'gemini-2.5-preview-pro', + DEFAULT_GEMINI_FLASH_MODEL, + ); + + expect(result25).toContain( + 'You have reached your daily gemini-2.5-pro quota limit', + ); + expect(resultPreview).toContain( + 'You have reached your daily gemini-2.5-preview-pro quota limit', + ); + expect(result25).toContain('upgrade to get higher limits'); + expect(resultPreview).toContain('upgrade to get higher limits'); + }); + + it('should not match non-Pro models with similar version strings', () => { + // Test that Flash models with similar version strings don't match + expect( + isProQuotaExceededError( + "Quota exceeded for quota metric 'Gemini 2.5 Flash Requests' and limit", + ), + ).toBe(false); + expect( + isProQuotaExceededError( + "Quota exceeded for quota metric 'Gemini 2.5-preview Flash Requests' and limit", + ), + ).toBe(false); + + // Test other model types + expect( + isProQuotaExceededError( + "Quota exceeded for quota metric 'Gemini 2.5 Ultra Requests' and limit", + ), + ).toBe(false); + expect( + isProQuotaExceededError( + "Quota exceeded for quota metric 'Gemini 2.5 Standard Requests' and limit", + ), + ).toBe(false); + + // Test generic quota messages + expect( + isProQuotaExceededError( + "Quota exceeded for quota metric 'GenerationRequests' and limit", + ), + ).toBe(false); + expect( + isProQuotaExceededError( + "Quota exceeded for quota metric 'EmbeddingRequests' and limit", + ), + ).toBe(false); + }); + + it('should format a generic quota exceeded message for Google auth (Standard tier)', () => { + const errorMessage = + 'got status: 429 Too Many Requests. {"error":{"code":429,"message":"Quota exceeded for quota metric \'GenerationRequests\' and limit \'RequestsPerDay\' of service \'generativelanguage.googleapis.com\' for consumer \'project_number:123456789\'.","status":"RESOURCE_EXHAUSTED"}}'; + const result = parseAndFormatApiError( + errorMessage, + AuthType.LOGIN_WITH_GOOGLE, + UserTierId.STANDARD, + 'gemini-2.5-pro', + DEFAULT_GEMINI_FLASH_MODEL, + ); + expect(result).toContain( + "[API Error: Quota exceeded for quota metric 'GenerationRequests'", + ); + expect(result).toContain('You have reached your daily quota limit'); + expect(result).toContain( + 'We appreciate you for choosing Gemini Code Assist and the Gemini CLI', + ); + expect(result).not.toContain('upgrade to get higher limits'); + }); + + it('should format a regular 429 API error with standard message for Google auth (Standard tier)', () => { + const errorMessage = + 'got status: 429 Too Many Requests. {"error":{"code":429,"message":"Rate limit exceeded","status":"RESOURCE_EXHAUSTED"}}'; + const result = parseAndFormatApiError( + errorMessage, + AuthType.LOGIN_WITH_GOOGLE, + UserTierId.STANDARD, + 'gemini-2.5-pro', + DEFAULT_GEMINI_FLASH_MODEL, + ); + expect(result).toContain('[API Error: Rate limit exceeded'); + expect(result).toContain( + 'We appreciate you for choosing Gemini Code Assist and the Gemini CLI', + ); + expect(result).not.toContain('upgrade to get higher limits'); + }); }); diff --git a/packages/core/src/utils/errorParsing.ts b/packages/core/src/utils/errorParsing.ts index bad61ea9e2..ecfc237573 100644 --- a/packages/core/src/utils/errorParsing.ts +++ b/packages/core/src/utils/errorParsing.ts @@ -4,11 +4,50 @@ * SPDX-License-Identifier: Apache-2.0 */ -import { isApiError, isStructuredError } from './quotaErrorDetection.js'; -import { DEFAULT_GEMINI_FLASH_MODEL } from '../config/models.js'; -import type { UserTierId } from '../code_assist/types.js'; +import { + isProQuotaExceededError, + isGenericQuotaExceededError, + isApiError, + isStructuredError, +} from './quotaErrorDetection.js'; +import { + DEFAULT_GEMINI_MODEL, + DEFAULT_GEMINI_FLASH_MODEL, +} from '../config/models.js'; +import { UserTierId } from '../code_assist/types.js'; import { AuthType } from '../core/contentGenerator.js'; +// Free Tier message functions +const getRateLimitErrorMessageGoogleFree = ( + fallbackModel: string = DEFAULT_GEMINI_FLASH_MODEL, +) => + `\nPossible quota limitations in place or slow response times detected. Switching to the ${fallbackModel} model for the rest of this session.`; + +const getRateLimitErrorMessageGoogleProQuotaFree = ( + currentModel: string = DEFAULT_GEMINI_MODEL, + fallbackModel: string = DEFAULT_GEMINI_FLASH_MODEL, +) => + `\nYou have reached your daily ${currentModel} quota limit. You will be switched to the ${fallbackModel} model for the rest of this session. To increase your limits, upgrade to get higher limits at https://goo.gle/set-up-gemini-code-assist, or use /auth to switch to using a paid API key from AI Studio at https://aistudio.google.com/apikey`; + +const getRateLimitErrorMessageGoogleGenericQuotaFree = () => + `\nYou have reached your daily quota limit. To increase your limits, upgrade to get higher limits at https://goo.gle/set-up-gemini-code-assist, or use /auth to switch to using a paid API key from AI Studio at https://aistudio.google.com/apikey`; + +// Legacy/Standard Tier message functions +const getRateLimitErrorMessageGooglePaid = ( + fallbackModel: string = DEFAULT_GEMINI_FLASH_MODEL, +) => + `\nPossible quota limitations in place or slow response times detected. Switching to the ${fallbackModel} model for the rest of this session. We appreciate you for choosing Gemini Code Assist and the Gemini CLI.`; + +const getRateLimitErrorMessageGoogleProQuotaPaid = ( + currentModel: string = DEFAULT_GEMINI_MODEL, + fallbackModel: string = DEFAULT_GEMINI_FLASH_MODEL, +) => + `\nYou have reached your daily ${currentModel} quota limit. You will be switched to the ${fallbackModel} model for the rest of this session. We appreciate you for choosing Gemini Code Assist and the Gemini CLI. To continue accessing the ${currentModel} model today, consider using /auth to switch to using a paid API key from AI Studio at https://aistudio.google.com/apikey`; + +const getRateLimitErrorMessageGoogleGenericQuotaPaid = ( + currentModel: string = DEFAULT_GEMINI_MODEL, +) => + `\nYou have reached your daily quota limit. We appreciate you for choosing Gemini Code Assist and the Gemini CLI. To continue accessing the ${currentModel} model today, consider using /auth to switch to using a paid API key from AI Studio at https://aistudio.google.com/apikey`; const RATE_LIMIT_ERROR_MESSAGE_USE_GEMINI = '\nPlease wait and try again later. To increase your limits, request a quota increase through AI Studio, or switch to another /auth method'; const RATE_LIMIT_ERROR_MESSAGE_VERTEX = @@ -20,9 +59,39 @@ const getRateLimitErrorMessageDefault = ( function getRateLimitMessage( authType?: AuthType, + error?: unknown, + userTier?: UserTierId, + currentModel?: string, fallbackModel?: string, ): string { switch (authType) { + case AuthType.LOGIN_WITH_GOOGLE: { + // Determine if user is on a paid tier (Legacy or Standard) - default to FREE if not specified + const isPaidTier = + userTier === UserTierId.LEGACY || userTier === UserTierId.STANDARD; + + if (isProQuotaExceededError(error)) { + return isPaidTier + ? getRateLimitErrorMessageGoogleProQuotaPaid( + currentModel || DEFAULT_GEMINI_MODEL, + fallbackModel, + ) + : getRateLimitErrorMessageGoogleProQuotaFree( + currentModel || DEFAULT_GEMINI_MODEL, + fallbackModel, + ); + } else if (isGenericQuotaExceededError(error)) { + return isPaidTier + ? getRateLimitErrorMessageGoogleGenericQuotaPaid( + currentModel || DEFAULT_GEMINI_MODEL, + ) + : getRateLimitErrorMessageGoogleGenericQuotaFree(); + } else { + return isPaidTier + ? getRateLimitErrorMessageGooglePaid(fallbackModel) + : getRateLimitErrorMessageGoogleFree(fallbackModel); + } + } case AuthType.USE_GEMINI: return RATE_LIMIT_ERROR_MESSAGE_USE_GEMINI; case AuthType.USE_VERTEX_AI: @@ -42,7 +111,13 @@ export function parseAndFormatApiError( if (isStructuredError(error)) { let text = `[API Error: ${error.message}]`; if (error.status === 429) { - text += getRateLimitMessage(authType, fallbackModel); + text += getRateLimitMessage( + authType, + error, + userTier, + currentModel, + fallbackModel, + ); } return text; } @@ -71,7 +146,13 @@ export function parseAndFormatApiError( } let text = `[API Error: ${finalMessage} (Status: ${parsedError.error.status})]`; if (parsedError.error.code === 429) { - text += getRateLimitMessage(authType, fallbackModel); + text += getRateLimitMessage( + authType, + parsedError, + userTier, + currentModel, + fallbackModel, + ); } return text; } diff --git a/packages/core/src/utils/flashFallback.test.ts b/packages/core/src/utils/flashFallback.test.ts index a3f08f5df6..8ef9665f42 100644 --- a/packages/core/src/utils/flashFallback.test.ts +++ b/packages/core/src/utils/flashFallback.test.ts @@ -11,6 +11,7 @@ import { setSimulate429, disableSimulationAfterFallback, shouldSimulate429, + createSimulated429Error, resetRequestCounter, } from './testUtils.js'; import { DEFAULT_GEMINI_FLASH_MODEL } from '../config/models.js'; @@ -18,15 +19,12 @@ import { retryWithBackoff } from './retry.js'; import { AuthType } from '../core/contentGenerator.js'; // Import the new types (Assuming this test file is in packages/core/src/utils/) import type { FallbackModelHandler } from '../fallback/types.js'; -import type { GoogleApiError } from './googleErrors.js'; -import { TerminalQuotaError } from './googleQuotaErrors.js'; vi.mock('node:fs'); // Update the description to reflect that this tests the retry utility's integration describe('Retry Utility Fallback Integration', () => { let config: Config; - let mockGoogleApiError: GoogleApiError; beforeEach(() => { vi.mocked(fs.existsSync).mockReturnValue(true); @@ -40,11 +38,6 @@ describe('Retry Utility Fallback Integration', () => { cwd: '/test', model: 'gemini-2.5-pro', }); - mockGoogleApiError = { - code: 429, - message: 'mock error', - details: [], - }; // Reset simulation state for each test setSimulate429(false); @@ -63,7 +56,6 @@ describe('Retry Utility Fallback Integration', () => { const result = await config.fallbackModelHandler!( 'gemini-2.5-pro', DEFAULT_GEMINI_FLASH_MODEL, - new Error('test'), ); // Verify it returns the correct intent @@ -71,61 +63,81 @@ describe('Retry Utility Fallback Integration', () => { }); // This test validates the retry utility's logic for triggering the callback. - it('should trigger onPersistent429 on TerminalQuotaError for OAuth users', async () => { + it('should trigger onPersistent429 after 2 consecutive 429 errors for OAuth users', async () => { let fallbackCalled = false; + // Removed fallbackModel variable as it's no longer relevant here. + // Mock function that simulates exactly 2 429 errors, then succeeds after fallback const mockApiCall = vi .fn() - .mockRejectedValueOnce( - new TerminalQuotaError('Daily limit', mockGoogleApiError), - ) - .mockRejectedValueOnce( - new TerminalQuotaError('Daily limit', mockGoogleApiError), - ) + .mockRejectedValueOnce(createSimulated429Error()) + .mockRejectedValueOnce(createSimulated429Error()) .mockResolvedValueOnce('success after fallback'); + // Mock the onPersistent429 callback (this is what client.ts/geminiChat.ts provides) const mockPersistent429Callback = vi.fn(async (_authType?: string) => { fallbackCalled = true; + // Return true to signal retryWithBackoff to reset attempts and continue. return true; }); + // Test with OAuth personal auth type, with maxAttempts = 2 to ensure fallback triggers const result = await retryWithBackoff(mockApiCall, { maxAttempts: 2, initialDelayMs: 1, maxDelayMs: 10, + shouldRetryOnError: (error: Error) => { + const status = (error as Error & { status?: number }).status; + return status === 429; + }, onPersistent429: mockPersistent429Callback, authType: AuthType.LOGIN_WITH_GOOGLE, }); + // Verify fallback mechanism was triggered expect(fallbackCalled).toBe(true); expect(mockPersistent429Callback).toHaveBeenCalledWith( AuthType.LOGIN_WITH_GOOGLE, - expect.any(TerminalQuotaError), + expect.any(Error), ); expect(result).toBe('success after fallback'); + // Should have: 2 failures, then fallback triggered, then 1 success after retry reset expect(mockApiCall).toHaveBeenCalledTimes(3); }); it('should not trigger onPersistent429 for API key users', async () => { - const fallbackCallback = vi.fn(); + let fallbackCalled = false; - const mockApiCall = vi - .fn() - .mockRejectedValueOnce( - new TerminalQuotaError('Daily limit', mockGoogleApiError), - ); + // Mock function that simulates 429 errors + const mockApiCall = vi.fn().mockRejectedValue(createSimulated429Error()); - const promise = retryWithBackoff(mockApiCall, { - maxAttempts: 2, - initialDelayMs: 1, - maxDelayMs: 10, - onPersistent429: fallbackCallback, - authType: AuthType.USE_GEMINI, // API key auth type + // Mock the callback + const mockPersistent429Callback = vi.fn(async () => { + fallbackCalled = true; + return true; }); - await expect(promise).rejects.toThrow('Daily limit'); - expect(fallbackCallback).not.toHaveBeenCalled(); - expect(mockApiCall).toHaveBeenCalledTimes(1); + // Test with API key auth type - should not trigger fallback + try { + await retryWithBackoff(mockApiCall, { + maxAttempts: 5, + initialDelayMs: 10, + maxDelayMs: 100, + shouldRetryOnError: (error: Error) => { + const status = (error as Error & { status?: number }).status; + return status === 429; + }, + onPersistent429: mockPersistent429Callback, + authType: AuthType.USE_GEMINI, // API key auth type + }); + } catch (error) { + // Expected to throw after max attempts + expect((error as Error).message).toContain('Rate limit exceeded'); + } + + // Verify fallback was NOT triggered for API key users + expect(fallbackCalled).toBe(false); + expect(mockPersistent429Callback).not.toHaveBeenCalled(); }); // This test validates the test utilities themselves. diff --git a/packages/core/src/utils/googleErrors.test.ts b/packages/core/src/utils/googleErrors.test.ts deleted file mode 100644 index bb6043b596..0000000000 --- a/packages/core/src/utils/googleErrors.test.ts +++ /dev/null @@ -1,250 +0,0 @@ -/** - * @license - * Copyright 2025 Google LLC - * SPDX-License-Identifier: Apache-2.0 - */ - -import { describe, it, expect } from 'vitest'; -import { parseGoogleApiError } from './googleErrors.js'; -import type { QuotaFailure } from './googleErrors.js'; - -describe('parseGoogleApiError', () => { - it('should return null for non-gaxios errors', () => { - expect(parseGoogleApiError(new Error('vanilla error'))).toBeNull(); - expect(parseGoogleApiError(null)).toBeNull(); - expect(parseGoogleApiError({})).toBeNull(); - }); - - it('should parse a standard gaxios error', () => { - const mockError = { - response: { - status: 429, - data: { - error: { - code: 429, - message: 'Quota exceeded', - details: [ - { - '@type': 'type.googleapis.com/google.rpc.QuotaFailure', - violations: [{ subject: 'user', description: 'daily limit' }], - }, - ], - }, - }, - }, - }; - - const parsed = parseGoogleApiError(mockError); - expect(parsed).not.toBeNull(); - expect(parsed?.code).toBe(429); - expect(parsed?.message).toBe('Quota exceeded'); - expect(parsed?.details).toHaveLength(1); - const detail = parsed?.details[0] as QuotaFailure; - expect(detail['@type']).toBe('type.googleapis.com/google.rpc.QuotaFailure'); - expect(detail.violations[0].description).toBe('daily limit'); - }); - - it('should parse an error with details stringified in the message', () => { - const innerError = { - error: { - code: 429, - message: 'Inner quota message', - details: [ - { - '@type': 'type.googleapis.com/google.rpc.RetryInfo', - retryDelay: '10s', - }, - ], - }, - }; - - const mockError = { - response: { - status: 429, - data: { - error: { - code: 429, - message: JSON.stringify(innerError), - details: [], // Top-level details are empty - }, - }, - }, - }; - - const parsed = parseGoogleApiError(mockError); - expect(parsed).not.toBeNull(); - expect(parsed?.code).toBe(429); - expect(parsed?.message).toBe('Inner quota message'); - expect(parsed?.details).toHaveLength(1); - expect(parsed?.details[0]['@type']).toBe( - 'type.googleapis.com/google.rpc.RetryInfo', - ); - }); - - it('should return null if details are not in the expected format', () => { - const mockError = { - response: { - status: 400, - data: { - error: { - code: 400, - message: 'Bad Request', - details: 'just a string', // Invalid details format - }, - }, - }, - }; - expect(parseGoogleApiError(mockError)).toBeNull(); - }); - - it('should return null if there are no valid details', () => { - const mockError = { - response: { - status: 400, - data: { - error: { - code: 400, - message: 'Bad Request', - details: [ - { - // missing '@type' - reason: 'some reason', - }, - ], - }, - }, - }, - }; - expect(parseGoogleApiError(mockError)).toBeNull(); - }); - - it('should parse a doubly nested error in the message', () => { - const innerError = { - error: { - code: 429, - message: 'Innermost quota message', - details: [ - { - '@type': 'type.googleapis.com/google.rpc.RetryInfo', - retryDelay: '20s', - }, - ], - }, - }; - - const middleError = { - error: { - code: 429, - message: JSON.stringify(innerError), - details: [], - }, - }; - - const mockError = { - response: { - status: 429, - data: { - error: { - code: 429, - message: JSON.stringify(middleError), - details: [], - }, - }, - }, - }; - - const parsed = parseGoogleApiError(mockError); - expect(parsed).not.toBeNull(); - expect(parsed?.code).toBe(429); - expect(parsed?.message).toBe('Innermost quota message'); - expect(parsed?.details).toHaveLength(1); - expect(parsed?.details[0]['@type']).toBe( - 'type.googleapis.com/google.rpc.RetryInfo', - ); - }); - - it('should parse an error that is not in a response object', () => { - const innerError = { - error: { - code: 429, - message: 'Innermost quota message', - details: [ - { - '@type': 'type.googleapis.com/google.rpc.RetryInfo', - retryDelay: '20s', - }, - ], - }, - }; - - const mockError = { - error: { - code: 429, - message: JSON.stringify(innerError), - details: [], - }, - }; - - const parsed = parseGoogleApiError(mockError); - expect(parsed).not.toBeNull(); - expect(parsed?.code).toBe(429); - expect(parsed?.message).toBe('Innermost quota message'); - expect(parsed?.details).toHaveLength(1); - expect(parsed?.details[0]['@type']).toBe( - 'type.googleapis.com/google.rpc.RetryInfo', - ); - }); - - it('should parse an error that is a JSON string', () => { - const innerError = { - error: { - code: 429, - message: 'Innermost quota message', - details: [ - { - '@type': 'type.googleapis.com/google.rpc.RetryInfo', - retryDelay: '20s', - }, - ], - }, - }; - - const mockError = { - error: { - code: 429, - message: JSON.stringify(innerError), - details: [], - }, - }; - - const parsed = parseGoogleApiError(JSON.stringify(mockError)); - expect(parsed).not.toBeNull(); - expect(parsed?.code).toBe(429); - expect(parsed?.message).toBe('Innermost quota message'); - expect(parsed?.details).toHaveLength(1); - expect(parsed?.details[0]['@type']).toBe( - 'type.googleapis.com/google.rpc.RetryInfo', - ); - }); - - it('should parse the user-provided nested error string', () => { - const userErrorString = - '{"error":{"message":"{\\n \\"error\\": {\\n \\"code\\": 429,\\n \\"message\\": \\"You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits.\\\\n* Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_paid_tier_input_token_count, limit: 10000\\\\nPlease retry in 40.025771073s.\\",\\n \\"status\\": \\"RESOURCE_EXHAUSTED\\",\\n \\"details\\": [\\n {\\n \\"@type\\": \\"type.googleapis.com/google.rpc.DebugInfo\\",\\n \\"detail\\": \\"[ORIGINAL ERROR] generic::resource_exhausted: You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits.\\\\n* Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_paid_tier_input_token_count, limit: 10000\\\\nPlease retry in 40.025771073s. [google.rpc.error_details_ext] { message: \\\\\\"You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits.\\\\\\\\n* Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_paid_tier_input_token_count, limit: 10000\\\\\\\\nPlease retry in 40.025771073s.\\\\\\" }\\"\\n },\\n {\\n \\"@type\\": \\"type.googleapis.com/google.rpc.QuotaFailure\\",\\n \\"violations\\": [\\n {\\n \\"quotaMetric\\": \\"generativelanguage.googleapis.com/generate_content_paid_tier_input_token_count\\",\\n \\"quotaId\\": \\"GenerateContentPaidTierInputTokensPerModelPerMinute\\",\\n \\"quotaDimensions\\": {\\n \\"location\\": \\"global\\",\\n \\"model\\": \\"gemini-2.5-pro\\"\\n },\\n \\"quotaValue\\": \\"10000\\"\\n }\\n ]\\n },\\n {\\n \\"@type\\": \\"type.googleapis.com/google.rpc.Help\\",\\n \\"links\\": [\\n {\\n \\"description\\": \\"Learn more about Gemini API quotas\\",\\n \\"url\\": \\"https://ai.google.dev/gemini-api/docs/rate-limits\\"\\n }\\n ]\\n },\\n {\\n \\"@type\\": \\"type.googleapis.com/google.rpc.RetryInfo\\",\\n \\"retryDelay\\": \\"40s\\"\\n }\\n ]\\n }\\n}\\n","code":429,"status":"Too Many Requests"}}'; - - const parsed = parseGoogleApiError(userErrorString); - expect(parsed).not.toBeNull(); - expect(parsed?.code).toBe(429); - expect(parsed?.message).toContain('You exceeded your current quota'); - expect(parsed?.details).toHaveLength(4); - expect( - parsed?.details.some( - (d) => d['@type'] === 'type.googleapis.com/google.rpc.QuotaFailure', - ), - ).toBe(true); - expect( - parsed?.details.some( - (d) => d['@type'] === 'type.googleapis.com/google.rpc.RetryInfo', - ), - ).toBe(true); - }); -}); diff --git a/packages/core/src/utils/googleErrors.ts b/packages/core/src/utils/googleErrors.ts deleted file mode 100644 index 52e58ec999..0000000000 --- a/packages/core/src/utils/googleErrors.ts +++ /dev/null @@ -1,242 +0,0 @@ -/** - * @license - * Copyright 2025 Google LLC - * SPDX-License-Identifier: Apache-2.0 - */ - -/** - * @fileoverview - * This file contains types and functions for parsing structured Google API errors. - */ - -/** - * Based on google/rpc/error_details.proto - */ - -export interface ErrorInfo { - '@type': 'type.googleapis.com/google.rpc.ErrorInfo'; - reason: string; - domain: string; - metadata: { [key: string]: string }; -} - -export interface RetryInfo { - '@type': 'type.googleapis.com/google.rpc.RetryInfo'; - retryDelay: string; // e.g. "51820.638305887s" -} - -export interface DebugInfo { - '@type': 'type.googleapis.com/google.rpc.DebugInfo'; - stackEntries: string[]; - detail: string; -} - -export interface QuotaFailure { - '@type': 'type.googleapis.com/google.rpc.QuotaFailure'; - violations: Array<{ - subject: string; - description: string; - apiService?: string; - quotaMetric?: string; - quotaId?: string; - quotaDimensions?: { [key: string]: string }; - quotaValue?: number; - futureQuotaValue?: number; - }>; -} - -export interface PreconditionFailure { - '@type': 'type.googleapis.com/google.rpc.PreconditionFailure'; - violations: Array<{ - type: string; - subject: string; - description: string; - }>; -} - -export interface LocalizedMessage { - '@type': 'type.googleapis.com/google.rpc.LocalizedMessage'; - locale: string; - message: string; -} - -export interface BadRequest { - '@type': 'type.googleapis.com/google.rpc.BadRequest'; - fieldViolations: Array<{ - field: string; - description: string; - reason?: string; - localizedMessage?: LocalizedMessage; - }>; -} - -export interface RequestInfo { - '@type': 'type.googleapis.com/google.rpc.RequestInfo'; - requestId: string; - servingData: string; -} - -export interface ResourceInfo { - '@type': 'type.googleapis.com/google.rpc.ResourceInfo'; - resourceType: string; - resourceName: string; - owner: string; - description: string; -} - -export interface Help { - '@type': 'type.googleapis.com/google.rpc.Help'; - links: Array<{ - description: string; - url: string; - }>; -} - -export type GoogleApiErrorDetail = - | ErrorInfo - | RetryInfo - | DebugInfo - | QuotaFailure - | PreconditionFailure - | BadRequest - | RequestInfo - | ResourceInfo - | Help - | LocalizedMessage; - -export interface GoogleApiError { - code: number; - message: string; - details: GoogleApiErrorDetail[]; -} - -/** - * Parses an error object to check if it's a structured Google API error - * and extracts all details. - * - * This function can handle two formats: - * 1. Standard Google API errors where `details` is a top-level field. - * 2. Errors where the entire structured error object is stringified inside - * the `message` field of a wrapper error. - * - * @param error The error object to inspect. - * @returns A GoogleApiError object if the error matches, otherwise null. - */ -export function parseGoogleApiError(error: unknown): GoogleApiError | null { - if (!error) { - return null; - } - - let errorObj: unknown = error; - - // If error is a string, try to parse it. - if (typeof errorObj === 'string') { - try { - errorObj = JSON.parse(errorObj); - } catch (_) { - // Not a JSON string, can't parse. - return null; - } - } - - if (typeof errorObj !== 'object' || errorObj === null) { - return null; - } - - type ErrorShape = { - message?: string; - details?: unknown[]; - code?: number; - }; - - const gaxiosError = errorObj as { - response?: { - status?: number; - data?: - | { - error?: ErrorShape; - } - | string; - }; - error?: ErrorShape; - code?: number; - }; - - let outerError: ErrorShape | undefined; - if (gaxiosError.response?.data) { - if (typeof gaxiosError.response.data === 'string') { - try { - const parsedData = JSON.parse(gaxiosError.response.data); - if (parsedData.error) { - outerError = parsedData.error; - } - } catch (_) { - // Not a JSON string, or doesn't contain .error - } - } else if ( - typeof gaxiosError.response.data === 'object' && - gaxiosError.response.data !== null - ) { - outerError = ( - gaxiosError.response.data as { - error?: ErrorShape; - } - ).error; - } - } - const responseStatus = gaxiosError.response?.status; - - if (!outerError) { - // If the gaxios structure isn't there, check for a top-level `error` property. - if (gaxiosError.error) { - outerError = gaxiosError.error; - } else { - return null; - } - } - - let currentError = outerError; - let depth = 0; - const maxDepth = 10; - // Handle cases where the actual error object is stringified inside the message - // by drilling down until we find an error that doesn't have a stringified message. - while (typeof currentError.message === 'string' && depth < maxDepth) { - try { - const parsedMessage = JSON.parse(currentError.message); - if (parsedMessage.error) { - currentError = parsedMessage.error; - depth++; - } else { - // The message is a JSON string, but not a nested error object. - break; - } - } catch (_) { - // It wasn't a JSON string, so we've drilled down as far as we can. - break; - } - } - - const code = responseStatus ?? currentError.code ?? gaxiosError.code; - const message = currentError.message; - const errorDetails = currentError.details; - - if (Array.isArray(errorDetails) && code && message) { - const details: GoogleApiErrorDetail[] = []; - for (const detail of errorDetails) { - if (detail && typeof detail === 'object' && '@type' in detail) { - // We can just cast it; the consumer will have to switch on @type - details.push(detail as GoogleApiErrorDetail); - } - } - - if (details.length > 0) { - return { - code, - message, - details, - }; - } - } - - return null; -} diff --git a/packages/core/src/utils/googleQuotaErrors.test.ts b/packages/core/src/utils/googleQuotaErrors.test.ts deleted file mode 100644 index 7555add873..0000000000 --- a/packages/core/src/utils/googleQuotaErrors.test.ts +++ /dev/null @@ -1,205 +0,0 @@ -/** - * @license - * Copyright 2025 Google LLC - * SPDX-License-Identifier: Apache-2.0 - */ - -import { describe, it, expect, vi, afterEach } from 'vitest'; -import { - classifyGoogleError, - RetryableQuotaError, - TerminalQuotaError, -} from './googleQuotaErrors.js'; -import * as errorParser from './googleErrors.js'; -import type { GoogleApiError } from './googleErrors.js'; - -describe('classifyGoogleError', () => { - afterEach(() => { - vi.restoreAllMocks(); - }); - - it('should return original error if not a Google API error', () => { - const regularError = new Error('Something went wrong'); - vi.spyOn(errorParser, 'parseGoogleApiError').mockReturnValue(null); - const result = classifyGoogleError(regularError); - expect(result).toBe(regularError); - }); - - it('should return original error if code is not 429', () => { - const apiError: GoogleApiError = { - code: 500, - message: 'Server error', - details: [], - }; - vi.spyOn(errorParser, 'parseGoogleApiError').mockReturnValue(apiError); - const originalError = new Error(); - const result = classifyGoogleError(originalError); - expect(result).toBe(originalError); - expect(result).not.toBeInstanceOf(TerminalQuotaError); - expect(result).not.toBeInstanceOf(RetryableQuotaError); - }); - - it('should return TerminalQuotaError for daily quota violations in QuotaFailure', () => { - const apiError: GoogleApiError = { - code: 429, - message: 'Quota exceeded', - details: [ - { - '@type': 'type.googleapis.com/google.rpc.QuotaFailure', - violations: [ - { - subject: 'user', - description: 'daily limit', - quotaId: 'RequestsPerDay-limit', - }, - ], - }, - ], - }; - vi.spyOn(errorParser, 'parseGoogleApiError').mockReturnValue(apiError); - const result = classifyGoogleError(new Error()); - expect(result).toBeInstanceOf(TerminalQuotaError); - expect((result as TerminalQuotaError).cause).toBe(apiError); - }); - - it('should return TerminalQuotaError for daily quota violations in ErrorInfo', () => { - const apiError: GoogleApiError = { - code: 429, - message: 'Quota exceeded', - details: [ - { - '@type': 'type.googleapis.com/google.rpc.ErrorInfo', - reason: 'QUOTA_EXCEEDED', - domain: 'googleapis.com', - metadata: { - quota_limit: 'RequestsPerDay_PerProject_PerUser', - }, - }, - ], - }; - vi.spyOn(errorParser, 'parseGoogleApiError').mockReturnValue(apiError); - const result = classifyGoogleError(new Error()); - expect(result).toBeInstanceOf(TerminalQuotaError); - }); - - it('should return TerminalQuotaError for long retry delays', () => { - const apiError: GoogleApiError = { - code: 429, - message: 'Too many requests', - details: [ - { - '@type': 'type.googleapis.com/google.rpc.RetryInfo', - retryDelay: '301s', // > 5 minutes - }, - ], - }; - vi.spyOn(errorParser, 'parseGoogleApiError').mockReturnValue(apiError); - const result = classifyGoogleError(new Error()); - expect(result).toBeInstanceOf(TerminalQuotaError); - }); - - it('should return RetryableQuotaError for short retry delays', () => { - const apiError: GoogleApiError = { - code: 429, - message: 'Too many requests', - details: [ - { - '@type': 'type.googleapis.com/google.rpc.RetryInfo', - retryDelay: '45.123s', - }, - ], - }; - vi.spyOn(errorParser, 'parseGoogleApiError').mockReturnValue(apiError); - const result = classifyGoogleError(new Error()); - expect(result).toBeInstanceOf(RetryableQuotaError); - expect((result as RetryableQuotaError).retryDelayMs).toBe(45123); - }); - - it('should return RetryableQuotaError for per-minute quota violations in QuotaFailure', () => { - const apiError: GoogleApiError = { - code: 429, - message: 'Quota exceeded', - details: [ - { - '@type': 'type.googleapis.com/google.rpc.QuotaFailure', - violations: [ - { - subject: 'user', - description: 'per minute limit', - quotaId: 'RequestsPerMinute-limit', - }, - ], - }, - ], - }; - vi.spyOn(errorParser, 'parseGoogleApiError').mockReturnValue(apiError); - const result = classifyGoogleError(new Error()); - expect(result).toBeInstanceOf(RetryableQuotaError); - expect((result as RetryableQuotaError).retryDelayMs).toBe(60000); - }); - - it('should return RetryableQuotaError for per-minute quota violations in ErrorInfo', () => { - const apiError: GoogleApiError = { - code: 429, - message: 'Quota exceeded', - details: [ - { - '@type': 'type.googleapis.com/google.rpc.ErrorInfo', - reason: 'QUOTA_EXCEEDED', - domain: 'googleapis.com', - metadata: { - quota_limit: 'RequestsPerMinute_PerProject_PerUser', - }, - }, - ], - }; - vi.spyOn(errorParser, 'parseGoogleApiError').mockReturnValue(apiError); - const result = classifyGoogleError(new Error()); - expect(result).toBeInstanceOf(RetryableQuotaError); - expect((result as RetryableQuotaError).retryDelayMs).toBe(60000); - }); - - it('should prioritize daily limit over retry info', () => { - const apiError: GoogleApiError = { - code: 429, - message: 'Quota exceeded', - details: [ - { - '@type': 'type.googleapis.com/google.rpc.QuotaFailure', - violations: [ - { - subject: 'user', - description: 'daily limit', - quotaId: 'RequestsPerDay-limit', - }, - ], - }, - { - '@type': 'type.googleapis.com/google.rpc.RetryInfo', - retryDelay: '10s', - }, - ], - }; - vi.spyOn(errorParser, 'parseGoogleApiError').mockReturnValue(apiError); - const result = classifyGoogleError(new Error()); - expect(result).toBeInstanceOf(TerminalQuotaError); - }); - - it('should return original error for 429 without specific details', () => { - const apiError: GoogleApiError = { - code: 429, - message: 'Too many requests', - details: [ - { - '@type': 'type.googleapis.com/google.rpc.DebugInfo', - detail: 'some debug info', - stackEntries: [], - }, - ], - }; - vi.spyOn(errorParser, 'parseGoogleApiError').mockReturnValue(apiError); - const originalError = new Error(); - const result = classifyGoogleError(originalError); - expect(result).toBe(originalError); - }); -}); diff --git a/packages/core/src/utils/googleQuotaErrors.ts b/packages/core/src/utils/googleQuotaErrors.ts deleted file mode 100644 index 82def25618..0000000000 --- a/packages/core/src/utils/googleQuotaErrors.ts +++ /dev/null @@ -1,162 +0,0 @@ -/** - * @license - * Copyright 2025 Google LLC - * SPDX-License-Identifier: Apache-2.0 - */ - -import type { - ErrorInfo, - GoogleApiError, - QuotaFailure, - RetryInfo, -} from './googleErrors.js'; -import { parseGoogleApiError } from './googleErrors.js'; - -const FIVE_MINUTES_IN_SECONDS = 5 * 60; - -/** - * A non-retryable error indicating a hard quota limit has been reached (e.g., daily limit). - */ -export class TerminalQuotaError extends Error { - constructor( - message: string, - override readonly cause: GoogleApiError, - ) { - super(message); - this.name = 'TerminalQuotaError'; - } -} - -/** - * A retryable error indicating a temporary quota issue (e.g., per-minute limit). - */ -export class RetryableQuotaError extends Error { - retryDelayMs: number; - - constructor( - message: string, - override readonly cause: GoogleApiError, - retryDelaySeconds: number, - ) { - super(message); - this.name = 'RetryableQuotaError'; - this.retryDelayMs = retryDelaySeconds * 1000; - } -} - -/** - * Parses a duration string (e.g., "34.074824224s", "60s") and returns the time in seconds. - * @param duration The duration string to parse. - * @returns The duration in seconds, or null if parsing fails. - */ -function parseDurationInSeconds(duration: string): number | null { - if (!duration.endsWith('s')) { - return null; - } - const seconds = parseFloat(duration.slice(0, -1)); - return isNaN(seconds) ? null : seconds; -} - -/** - * Analyzes a caught error and classifies it as a specific quota-related error if applicable. - * - * It decides whether an error is a `TerminalQuotaError` or a `RetryableQuotaError` based on - * the following logic: - * - If the error indicates a daily limit, it's a `TerminalQuotaError`. - * - If the error suggests a retry delay of more than 5 minutes, it's a `TerminalQuotaError`. - * - If the error suggests a retry delay of 5 minutes or less, it's a `RetryableQuotaError`. - * - If the error indicates a per-minute limit, it's a `RetryableQuotaError`. - * - * @param error The error to classify. - * @returns A `TerminalQuotaError`, `RetryableQuotaError`, or the original `unknown` error. - */ -export function classifyGoogleError(error: unknown): unknown { - const googleApiError = parseGoogleApiError(error); - - if (!googleApiError || googleApiError.code !== 429) { - return error; // Not a 429 error we can handle. - } - - const quotaFailure = googleApiError.details.find( - (d): d is QuotaFailure => - d['@type'] === 'type.googleapis.com/google.rpc.QuotaFailure', - ); - - const errorInfo = googleApiError.details.find( - (d): d is ErrorInfo => - d['@type'] === 'type.googleapis.com/google.rpc.ErrorInfo', - ); - - const retryInfo = googleApiError.details.find( - (d): d is RetryInfo => - d['@type'] === 'type.googleapis.com/google.rpc.RetryInfo', - ); - - // 1. Check for long-term limits in QuotaFailure or ErrorInfo - if (quotaFailure) { - for (const violation of quotaFailure.violations) { - const quotaId = violation.quotaId ?? ''; - if (quotaId.includes('PerDay') || quotaId.includes('Daily')) { - return new TerminalQuotaError( - `Reached a daily quota limit: ${violation.description}`, - googleApiError, - ); - } - } - } - - if (errorInfo) { - const quotaLimit = errorInfo.metadata?.['quota_limit'] ?? ''; - if (quotaLimit.includes('PerDay') || quotaLimit.includes('Daily')) { - return new TerminalQuotaError( - `Reached a daily quota limit: ${errorInfo.reason}`, - googleApiError, - ); - } - } - - // 2. Check for long delays in RetryInfo - if (retryInfo?.retryDelay) { - const delaySeconds = parseDurationInSeconds(retryInfo.retryDelay); - if (delaySeconds !== null) { - if (delaySeconds > FIVE_MINUTES_IN_SECONDS) { - return new TerminalQuotaError( - `Quota limit requires a long delay of ${retryInfo.retryDelay}.`, - googleApiError, - ); - } - // This is a retryable error with a specific delay. - return new RetryableQuotaError( - `Quota limit hit. Retrying after ${retryInfo.retryDelay}.`, - googleApiError, - delaySeconds, - ); - } - } - - // 3. Check for short-term limits in QuotaFailure or ErrorInfo - if (quotaFailure) { - for (const violation of quotaFailure.violations) { - const quotaId = violation.quotaId ?? ''; - if (quotaId.includes('PerMinute')) { - return new RetryableQuotaError( - `Quota limit hit: ${violation.description}. Retrying after 60s.`, - googleApiError, - 60, - ); - } - } - } - - if (errorInfo) { - const quotaLimit = errorInfo.metadata?.['quota_limit'] ?? ''; - if (quotaLimit.includes('PerMinute')) { - return new RetryableQuotaError( - `Quota limit hit: ${errorInfo.reason}. Retrying after 60s.`, - googleApiError, - 60, - ); - } - } - return error; // Fallback to original error if no specific classification fits. -} diff --git a/packages/core/src/utils/quotaErrorDetection.ts b/packages/core/src/utils/quotaErrorDetection.ts index 893e48b0f2..6417e0db57 100644 --- a/packages/core/src/utils/quotaErrorDetection.ts +++ b/packages/core/src/utils/quotaErrorDetection.ts @@ -33,3 +33,68 @@ export function isStructuredError(error: unknown): error is StructuredError { typeof (error as StructuredError).message === 'string' ); } + +export function isProQuotaExceededError(error: unknown): boolean { + // Check for Pro quota exceeded errors by looking for the specific pattern + // This will match patterns like: + // - "Quota exceeded for quota metric 'Gemini 2.5 Pro Requests'" + // - "Quota exceeded for quota metric 'Gemini 2.5-preview Pro Requests'" + // We use string methods instead of regex to avoid ReDoS vulnerabilities + + const checkMessage = (message: string): boolean => + message.includes("Quota exceeded for quota metric 'Gemini") && + message.includes("Pro Requests'"); + + if (typeof error === 'string') { + return checkMessage(error); + } + + if (isStructuredError(error)) { + return checkMessage(error.message); + } + + if (isApiError(error)) { + return checkMessage(error.error.message); + } + + // Check if it's a Gaxios error with response data + if (error && typeof error === 'object' && 'response' in error) { + const gaxiosError = error as { + response?: { + data?: unknown; + }; + }; + if (gaxiosError.response && gaxiosError.response.data) { + if (typeof gaxiosError.response.data === 'string') { + return checkMessage(gaxiosError.response.data); + } + if ( + typeof gaxiosError.response.data === 'object' && + gaxiosError.response.data !== null && + 'error' in gaxiosError.response.data + ) { + const errorData = gaxiosError.response.data as { + error?: { message?: string }; + }; + return checkMessage(errorData.error?.message || ''); + } + } + } + return false; +} + +export function isGenericQuotaExceededError(error: unknown): boolean { + if (typeof error === 'string') { + return error.includes('Quota exceeded for quota metric'); + } + + if (isStructuredError(error)) { + return error.message.includes('Quota exceeded for quota metric'); + } + + if (isApiError(error)) { + return error.error.message.includes('Quota exceeded for quota metric'); + } + + return false; +} diff --git a/packages/core/src/utils/retry.test.ts b/packages/core/src/utils/retry.test.ts index 6b2d4e4312..9461b39b69 100644 --- a/packages/core/src/utils/retry.test.ts +++ b/packages/core/src/utils/retry.test.ts @@ -7,14 +7,9 @@ /* eslint-disable @typescript-eslint/no-explicit-any */ import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest'; import { ApiError } from '@google/genai'; -import { AuthType } from '../core/contentGenerator.js'; import type { HttpError } from './retry.js'; import { retryWithBackoff } from './retry.js'; import { setSimulate429 } from './testUtils.js'; -import { - TerminalQuotaError, - RetryableQuotaError, -} from './googleQuotaErrors.js'; // Helper to create a mock function that fails a certain number of times const createFailingFunction = ( @@ -104,26 +99,26 @@ describe('retryWithBackoff', () => { // Expect it to fail with the error from the 5th attempt. await Promise.all([ - expect(promise).rejects.toThrow('Simulated error attempt 3'), + expect(promise).rejects.toThrow('Simulated error attempt 5'), vi.runAllTimersAsync(), ]); - expect(mockFn).toHaveBeenCalledTimes(3); + expect(mockFn).toHaveBeenCalledTimes(5); }); - it('should default to 3 maxAttempts if options.maxAttempts is undefined', async () => { - // This function will fail more than 3 times to ensure all retries are used. + it('should default to 5 maxAttempts if options.maxAttempts is undefined', async () => { + // This function will fail more than 5 times to ensure all retries are used. const mockFn = createFailingFunction(10); const promise = retryWithBackoff(mockFn, { maxAttempts: undefined }); // Expect it to fail with the error from the 5th attempt. await Promise.all([ - expect(promise).rejects.toThrow('Simulated error attempt 3'), + expect(promise).rejects.toThrow('Simulated error attempt 5'), vi.runAllTimersAsync(), ]); - expect(mockFn).toHaveBeenCalledTimes(3); + expect(mockFn).toHaveBeenCalledTimes(5); }); it('should not retry if shouldRetry returns false', async () => { @@ -340,13 +335,15 @@ describe('retryWithBackoff', () => { }); describe('Flash model fallback for OAuth users', () => { - it('should trigger fallback for OAuth personal users on TerminalQuotaError', async () => { + it('should trigger fallback for OAuth personal users after persistent 429 errors', async () => { const fallbackCallback = vi.fn().mockResolvedValue('gemini-2.5-flash'); let fallbackOccurred = false; const mockFn = vi.fn().mockImplementation(async () => { if (!fallbackOccurred) { - throw new TerminalQuotaError('Daily limit reached', {} as any); + const error: HttpError = new Error('Rate limit exceeded'); + error.status = 429; + throw error; } return 'success'; }); @@ -354,9 +351,143 @@ describe('retryWithBackoff', () => { const promise = retryWithBackoff(mockFn, { maxAttempts: 3, initialDelayMs: 100, - onPersistent429: async (authType?: string, error?: unknown) => { + onPersistent429: async (authType?: string) => { fallbackOccurred = true; - return await fallbackCallback(authType, error); + return await fallbackCallback(authType); + }, + authType: 'oauth-personal', + }); + + // Advance all timers to complete retries + await vi.runAllTimersAsync(); + + // Should succeed after fallback + await expect(promise).resolves.toBe('success'); + + // Verify callback was called with correct auth type + expect(fallbackCallback).toHaveBeenCalledWith('oauth-personal'); + + // Should retry again after fallback + expect(mockFn).toHaveBeenCalledTimes(3); // 2 initial attempts + 1 after fallback + }); + + it('should NOT trigger fallback for API key users', async () => { + const fallbackCallback = vi.fn(); + + const mockFn = vi.fn(async () => { + const error: HttpError = new Error('Rate limit exceeded'); + error.status = 429; + throw error; + }); + + const promise = retryWithBackoff(mockFn, { + maxAttempts: 3, + initialDelayMs: 100, + onPersistent429: fallbackCallback, + authType: 'gemini-api-key', + }); + + // Handle the promise properly to avoid unhandled rejections + const resultPromise = promise.catch((error) => error); + await vi.runAllTimersAsync(); + const result = await resultPromise; + + // Should fail after all retries without fallback + expect(result).toBeInstanceOf(Error); + expect(result.message).toBe('Rate limit exceeded'); + + // Callback should not be called for API key users + expect(fallbackCallback).not.toHaveBeenCalled(); + }); + + it('should reset attempt counter and continue after successful fallback', async () => { + let fallbackCalled = false; + const fallbackCallback = vi.fn().mockImplementation(async () => { + fallbackCalled = true; + return 'gemini-2.5-flash'; + }); + + const mockFn = vi.fn().mockImplementation(async () => { + if (!fallbackCalled) { + const error: HttpError = new Error('Rate limit exceeded'); + error.status = 429; + throw error; + } + return 'success'; + }); + + const promise = retryWithBackoff(mockFn, { + maxAttempts: 3, + initialDelayMs: 100, + onPersistent429: fallbackCallback, + authType: 'oauth-personal', + }); + + await vi.runAllTimersAsync(); + + await expect(promise).resolves.toBe('success'); + expect(fallbackCallback).toHaveBeenCalledOnce(); + }); + + it('should continue with original error if fallback is rejected', async () => { + const fallbackCallback = vi.fn().mockResolvedValue(null); // User rejected fallback + + const mockFn = vi.fn(async () => { + const error: HttpError = new Error('Rate limit exceeded'); + error.status = 429; + throw error; + }); + + const promise = retryWithBackoff(mockFn, { + maxAttempts: 3, + initialDelayMs: 100, + onPersistent429: fallbackCallback, + authType: 'oauth-personal', + }); + + // Handle the promise properly to avoid unhandled rejections + const resultPromise = promise.catch((error) => error); + await vi.runAllTimersAsync(); + const result = await resultPromise; + + // Should fail with original error when fallback is rejected + expect(result).toBeInstanceOf(Error); + expect(result.message).toBe('Rate limit exceeded'); + expect(fallbackCallback).toHaveBeenCalledWith( + 'oauth-personal', + expect.any(Error), + ); + }); + + it('should handle mixed error types (only count consecutive 429s)', async () => { + const fallbackCallback = vi.fn().mockResolvedValue('gemini-2.5-flash'); + let attempts = 0; + let fallbackOccurred = false; + + const mockFn = vi.fn().mockImplementation(async () => { + attempts++; + if (fallbackOccurred) { + return 'success'; + } + if (attempts === 1) { + // First attempt: 500 error (resets consecutive count) + const error: HttpError = new Error('Server error'); + error.status = 500; + throw error; + } else { + // Remaining attempts: 429 errors + const error: HttpError = new Error('Rate limit exceeded'); + error.status = 429; + throw error; + } + }); + + const promise = retryWithBackoff(mockFn, { + maxAttempts: 5, + initialDelayMs: 100, + onPersistent429: async (authType?: string) => { + fallbackOccurred = true; + return await fallbackCallback(authType); }, authType: 'oauth-personal', }); @@ -364,51 +495,9 @@ describe('retryWithBackoff', () => { await vi.runAllTimersAsync(); await expect(promise).resolves.toBe('success'); - expect(fallbackCallback).toHaveBeenCalledWith( - 'oauth-personal', - expect.any(TerminalQuotaError), - ); - expect(mockFn).toHaveBeenCalledTimes(2); + + // Should trigger fallback after 2 consecutive 429s (attempts 2-3) + expect(fallbackCallback).toHaveBeenCalledWith('oauth-personal'); }); - - it('should use retryDelayMs from RetryableQuotaError', async () => { - const setTimeoutSpy = vi.spyOn(global, 'setTimeout'); - const mockFn = vi.fn().mockImplementation(async () => { - throw new RetryableQuotaError('Per-minute limit', {} as any, 12.345); - }); - - const promise = retryWithBackoff(mockFn, { - maxAttempts: 2, - initialDelayMs: 100, - }); - - // Attach the rejection expectation *before* running timers - // eslint-disable-next-line vitest/valid-expect - const assertionPromise = expect(promise).rejects.toThrow(); - await vi.runAllTimersAsync(); - await assertionPromise; - - expect(setTimeoutSpy).toHaveBeenCalledWith(expect.any(Function), 12345); - }); - - it.each([[AuthType.USE_GEMINI], [AuthType.USE_VERTEX_AI], [undefined]])( - 'should not trigger fallback for non-Google auth users (authType: %s) on TerminalQuotaError', - async (authType) => { - const fallbackCallback = vi.fn(); - const mockFn = vi.fn().mockImplementation(async () => { - throw new TerminalQuotaError('Daily limit reached', {} as any); - }); - - const promise = retryWithBackoff(mockFn, { - maxAttempts: 3, - onPersistent429: fallbackCallback, - authType, - }); - - await expect(promise).rejects.toThrow('Daily limit reached'); - expect(fallbackCallback).not.toHaveBeenCalled(); - expect(mockFn).toHaveBeenCalledTimes(1); - }, - ); }); }); diff --git a/packages/core/src/utils/retry.ts b/packages/core/src/utils/retry.ts index 7de10eb8d1..007874f965 100644 --- a/packages/core/src/utils/retry.ts +++ b/packages/core/src/utils/retry.ts @@ -8,10 +8,9 @@ import type { GenerateContentResponse } from '@google/genai'; import { ApiError } from '@google/genai'; import { AuthType } from '../core/contentGenerator.js'; import { - classifyGoogleError, - RetryableQuotaError, - TerminalQuotaError, -} from './googleQuotaErrors.js'; + isProQuotaExceededError, + isGenericQuotaExceededError, +} from './quotaErrorDetection.js'; const FETCH_FAILED_MESSAGE = 'exception TypeError: fetch failed sending request'; @@ -35,7 +34,7 @@ export interface RetryOptions { } const DEFAULT_RETRY_OPTIONS: RetryOptions = { - maxAttempts: 3, + maxAttempts: 5, initialDelayMs: 5000, maxDelayMs: 30000, // 30 seconds shouldRetryOnError: defaultShouldRetry, @@ -120,6 +119,7 @@ export async function retryWithBackoff( let attempt = 0; let currentDelay = initialDelayMs; + let consecutive429Count = 0; while (attempt < maxAttempts) { attempt++; @@ -139,39 +139,94 @@ export async function retryWithBackoff( return result; } catch (error) { - const classifiedError = classifyGoogleError(error); + const errorStatus = getErrorStatus(error); - if (classifiedError instanceof TerminalQuotaError) { - if (onPersistent429 && authType === AuthType.LOGIN_WITH_GOOGLE) { - try { - const fallbackModel = await onPersistent429( - authType, - classifiedError, - ); - if (fallbackModel) { - attempt = 0; // Reset attempts and retry with the new model. - currentDelay = initialDelayMs; - continue; - } - } catch (fallbackError) { - console.warn('Model fallback failed:', fallbackError); + // Check for Pro quota exceeded error first - immediate fallback for OAuth users + if ( + errorStatus === 429 && + authType === AuthType.LOGIN_WITH_GOOGLE && + isProQuotaExceededError(error) && + onPersistent429 + ) { + try { + const fallbackModel = await onPersistent429(authType, error); + if (fallbackModel !== false && fallbackModel !== null) { + // Reset attempt counter and try with new model + attempt = 0; + consecutive429Count = 0; + currentDelay = initialDelayMs; + // With the model updated, we continue to the next attempt + continue; + } else { + // Fallback handler returned null/false, meaning don't continue - stop retry process + throw error; } + } catch (fallbackError) { + // If fallback fails, continue with original error + console.warn('Fallback to Flash model failed:', fallbackError); } - throw classifiedError; // Throw if no fallback or fallback failed. } - if (classifiedError instanceof RetryableQuotaError) { - if (attempt >= maxAttempts) { - throw classifiedError; + // Check for generic quota exceeded error (but not Pro, which was handled above) - immediate fallback for OAuth users + if ( + errorStatus === 429 && + authType === AuthType.LOGIN_WITH_GOOGLE && + !isProQuotaExceededError(error) && + isGenericQuotaExceededError(error) && + onPersistent429 + ) { + try { + const fallbackModel = await onPersistent429(authType, error); + if (fallbackModel !== false && fallbackModel !== null) { + // Reset attempt counter and try with new model + attempt = 0; + consecutive429Count = 0; + currentDelay = initialDelayMs; + // With the model updated, we continue to the next attempt + continue; + } else { + // Fallback handler returned null/false, meaning don't continue - stop retry process + throw error; + } + } catch (fallbackError) { + // If fallback fails, continue with original error + console.warn('Fallback to Flash model failed:', fallbackError); } - console.warn( - `Attempt ${attempt} failed: ${classifiedError.message}. Retrying after ${classifiedError.retryDelayMs}ms...`, - ); - await delay(classifiedError.retryDelayMs); - continue; } - // Generic retry logic for other errors + // Track consecutive 429 errors + if (errorStatus === 429) { + consecutive429Count++; + } else { + consecutive429Count = 0; + } + + // If we have persistent 429s and a fallback callback for OAuth + if ( + consecutive429Count >= 2 && + onPersistent429 && + authType === AuthType.LOGIN_WITH_GOOGLE + ) { + try { + const fallbackModel = await onPersistent429(authType, error); + if (fallbackModel !== false && fallbackModel !== null) { + // Reset attempt counter and try with new model + attempt = 0; + consecutive429Count = 0; + currentDelay = initialDelayMs; + // With the model updated, we continue to the next attempt + continue; + } else { + // Fallback handler returned null/false, meaning don't continue - stop retry process + throw error; + } + } catch (fallbackError) { + // If fallback fails, continue with original error + console.warn('Fallback to Flash model failed:', fallbackError); + } + } + + // Check if we've exhausted retries or shouldn't retry if ( attempt >= maxAttempts || !shouldRetryOnError(error as Error, retryFetchErrors) @@ -179,17 +234,31 @@ export async function retryWithBackoff( throw error; } - const errorStatus = getErrorStatus(error); - logRetryAttempt(attempt, error, errorStatus); + const { delayDurationMs, errorStatus: delayErrorStatus } = + getDelayDurationAndStatus(error); - // Exponential backoff with jitter for non-quota errors - const jitter = currentDelay * 0.3 * (Math.random() * 2 - 1); - const delayWithJitter = Math.max(0, currentDelay + jitter); - await delay(delayWithJitter); - currentDelay = Math.min(maxDelayMs, currentDelay * 2); + if (delayDurationMs > 0) { + // Respect Retry-After header if present and parsed + console.warn( + `Attempt ${attempt} failed with status ${delayErrorStatus ?? 'unknown'}. Retrying after explicit delay of ${delayDurationMs}ms...`, + error, + ); + await delay(delayDurationMs); + // Reset currentDelay for next potential non-429 error, or if Retry-After is not present next time + currentDelay = initialDelayMs; + } else { + // Fall back to exponential backoff with jitter + logRetryAttempt(attempt, error, errorStatus); + // Add jitter: +/- 30% of currentDelay + const jitter = currentDelay * 0.3 * (Math.random() * 2 - 1); + const delayWithJitter = Math.max(0, currentDelay + jitter); + await delay(delayWithJitter); + currentDelay = Math.min(maxDelayMs, currentDelay * 2); + } } } - + // This line should theoretically be unreachable due to the throw in the catch block. + // Added for type safety and to satisfy the compiler that a promise is always returned. throw new Error('Retry attempts exhausted'); } @@ -220,6 +289,62 @@ export function getErrorStatus(error: unknown): number | undefined { return undefined; } +/** + * Extracts the Retry-After delay from an error object's headers. + * @param error The error object. + * @returns The delay in milliseconds, or 0 if not found or invalid. + */ +function getRetryAfterDelayMs(error: unknown): number { + if (typeof error === 'object' && error !== null) { + // Check for error.response.headers (common in axios errors) + if ( + 'response' in error && + typeof (error as { response?: unknown }).response === 'object' && + (error as { response?: unknown }).response !== null + ) { + const response = (error as { response: { headers?: unknown } }).response; + if ( + 'headers' in response && + typeof response.headers === 'object' && + response.headers !== null + ) { + const headers = response.headers as { 'retry-after'?: unknown }; + const retryAfterHeader = headers['retry-after']; + if (typeof retryAfterHeader === 'string') { + const retryAfterSeconds = parseInt(retryAfterHeader, 10); + if (!isNaN(retryAfterSeconds)) { + return retryAfterSeconds * 1000; + } + // It might be an HTTP date + const retryAfterDate = new Date(retryAfterHeader); + if (!isNaN(retryAfterDate.getTime())) { + return Math.max(0, retryAfterDate.getTime() - Date.now()); + } + } + } + } + } + return 0; +} + +/** + * Determines the delay duration based on the error, prioritizing Retry-After header. + * @param error The error object. + * @returns An object containing the delay duration in milliseconds and the error status. + */ +function getDelayDurationAndStatus(error: unknown): { + delayDurationMs: number; + errorStatus: number | undefined; +} { + const errorStatus = getErrorStatus(error); + let delayDurationMs = 0; + + if (errorStatus === 429) { + delayDurationMs = getRetryAfterDelayMs(error); + } + return { delayDurationMs, errorStatus }; +} + /** * Logs a message for a retry attempt when using exponential backoff. * @param attempt The current attempt number.