From ee92db7533d33335f4146359a9338d451296105f Mon Sep 17 00:00:00 2001 From: Gaurav <39389231+gsquared94@users.noreply.github.com> Date: Fri, 24 Oct 2025 11:09:06 -0700 Subject: [PATCH] fix: handle request retries and model fallback correctly (#11624) --- .../src/ui/hooks/useQuotaAndFallback.test.ts | 99 +++-- .../cli/src/ui/hooks/useQuotaAndFallback.ts | 29 +- packages/core/index.ts | 2 + packages/core/src/index.ts | 1 + packages/core/src/utils/errorParsing.test.ts | 244 ------------ packages/core/src/utils/errorParsing.ts | 91 +---- packages/core/src/utils/flashFallback.test.ts | 76 ++-- packages/core/src/utils/googleErrors.test.ts | 356 ++++++++++++++++++ packages/core/src/utils/googleErrors.ts | 305 +++++++++++++++ .../core/src/utils/googleQuotaErrors.test.ts | 306 +++++++++++++++ packages/core/src/utils/googleQuotaErrors.ts | 192 ++++++++++ .../core/src/utils/quotaErrorDetection.ts | 65 ---- packages/core/src/utils/retry.test.ts | 181 +++------ packages/core/src/utils/retry.ts | 214 +++-------- 14 files changed, 1357 insertions(+), 804 deletions(-) create mode 100644 packages/core/src/utils/googleErrors.test.ts create mode 100644 packages/core/src/utils/googleErrors.ts create mode 100644 packages/core/src/utils/googleQuotaErrors.test.ts create mode 100644 packages/core/src/utils/googleQuotaErrors.ts diff --git a/packages/cli/src/ui/hooks/useQuotaAndFallback.test.ts b/packages/cli/src/ui/hooks/useQuotaAndFallback.test.ts index 6d7782694f..0e94a1874d 100644 --- a/packages/cli/src/ui/hooks/useQuotaAndFallback.test.ts +++ b/packages/cli/src/ui/hooks/useQuotaAndFallback.test.ts @@ -19,25 +19,15 @@ import { type FallbackModelHandler, UserTierId, AuthType, - isGenericQuotaExceededError, - isProQuotaExceededError, + TerminalQuotaError, makeFakeConfig, + type GoogleApiError, + RetryableQuotaError, } from '@google/gemini-cli-core'; import { useQuotaAndFallback } from './useQuotaAndFallback.js'; import type { UseHistoryManagerReturn } from './useHistoryManager.js'; import { AuthState, MessageType } from '../types.js'; -// Mock the error checking functions from the core package to control test scenarios -vi.mock('@google/gemini-cli-core', async (importOriginal) => { - const original = - await importOriginal(); - return { - ...original, - isGenericQuotaExceededError: vi.fn(), - isProQuotaExceededError: vi.fn(), - }; -}); - // Use a type alias for SpyInstance as it's not directly exported type SpyInstance = ReturnType; @@ -47,12 +37,15 @@ describe('useQuotaAndFallback', () => { let mockSetAuthState: Mock; let mockSetModelSwitchedFromQuotaError: Mock; let setFallbackHandlerSpy: SpyInstance; - - const mockedIsGenericQuotaExceededError = isGenericQuotaExceededError as Mock; - const mockedIsProQuotaExceededError = isProQuotaExceededError as Mock; + let mockGoogleApiError: GoogleApiError; beforeEach(() => { mockConfig = makeFakeConfig(); + mockGoogleApiError = { + code: 429, + message: 'mock error', + details: [], + }; // Spy on the method that requires the private field and mock its return. // This is cleaner than modifying the config class for tests. @@ -72,9 +65,6 @@ describe('useQuotaAndFallback', () => { setFallbackHandlerSpy = vi.spyOn(mockConfig, 'setFallbackModelHandler'); vi.spyOn(mockConfig, 'setQuotaErrorOccurred'); - - mockedIsGenericQuotaExceededError.mockReturnValue(false); - mockedIsProQuotaExceededError.mockReturnValue(false); }); afterEach(() => { @@ -140,51 +130,62 @@ describe('useQuotaAndFallback', () => { describe('Automatic Fallback Scenarios', () => { const testCases = [ { - errorType: 'generic', + description: 'other error for FREE tier', tier: UserTierId.FREE, + error: new Error('some error'), expectedMessageSnippets: [ - 'Automatically switching from model-A to model-B', + 'Automatically switching from model-A to model-B for faster responses', 'upgrade to a Gemini Code Assist Standard or Enterprise plan', ], }, { - errorType: 'generic', - tier: UserTierId.STANDARD, // Paid tier + description: 'other error for LEGACY tier', + tier: UserTierId.LEGACY, // Paid tier + error: new Error('some error'), expectedMessageSnippets: [ - 'Automatically switching from model-A to model-B', + 'Automatically switching from model-A to model-B for faster responses', 'switch to using a paid API key from AI Studio', ], }, { - errorType: 'other', + description: 'retryable quota error for FREE tier', tier: UserTierId.FREE, + error: new RetryableQuotaError( + 'retryable quota', + mockGoogleApiError, + 5, + ), expectedMessageSnippets: [ - 'Automatically switching from model-A to model-B for faster responses', - 'upgrade to a Gemini Code Assist Standard or Enterprise plan', + 'Your requests are being throttled right now due to server being at capacity for model-A', + 'Automatically switching from model-A to model-B', + 'upgrading to a Gemini Code Assist Standard or Enterprise plan', ], }, { - errorType: 'other', + description: 'retryable quota error for LEGACY tier', tier: UserTierId.LEGACY, // Paid tier + error: new RetryableQuotaError( + 'retryable quota', + mockGoogleApiError, + 5, + ), expectedMessageSnippets: [ - 'Automatically switching from model-A to model-B for faster responses', + 'Your requests are being throttled right now due to server being at capacity for model-A', + 'Automatically switching from model-A to model-B', 'switch to using a paid API key from AI Studio', ], }, ]; - for (const { errorType, tier, expectedMessageSnippets } of testCases) { - it(`should handle ${errorType} error for ${tier} tier correctly`, async () => { - mockedIsGenericQuotaExceededError.mockReturnValue( - errorType === 'generic', - ); - + for (const { + description, + tier, + error, + expectedMessageSnippets, + } of testCases) { + it(`should handle ${description} correctly`, async () => { const handler = getRegisteredHandler(tier); - const result = await handler( - 'model-A', - 'model-B', - new Error('quota exceeded'), - ); + const result = await handler('model-A', 'model-B', error); // Automatic fallbacks should return 'stop' expect(result).toBe('stop'); @@ -207,10 +208,6 @@ describe('useQuotaAndFallback', () => { }); describe('Interactive Fallback (Pro Quota Error)', () => { - beforeEach(() => { - mockedIsProQuotaExceededError.mockReturnValue(true); - }); - it('should set an interactive request and wait for user choice', async () => { const { result } = renderHook(() => useQuotaAndFallback({ @@ -229,7 +226,7 @@ describe('useQuotaAndFallback', () => { const promise = handler( 'gemini-pro', 'gemini-flash', - new Error('pro quota'), + new TerminalQuotaError('pro quota', mockGoogleApiError), ); await act(async () => {}); @@ -268,7 +265,7 @@ describe('useQuotaAndFallback', () => { const promise1 = handler( 'gemini-pro', 'gemini-flash', - new Error('pro quota 1'), + new TerminalQuotaError('pro quota 1', mockGoogleApiError), ); await act(async () => {}); @@ -278,7 +275,7 @@ describe('useQuotaAndFallback', () => { const result2 = await handler( 'gemini-pro', 'gemini-flash', - new Error('pro quota 2'), + new TerminalQuotaError('pro quota 2', mockGoogleApiError), ); // The lock should have stopped the second request @@ -297,10 +294,6 @@ describe('useQuotaAndFallback', () => { }); describe('handleProQuotaChoice', () => { - beforeEach(() => { - mockedIsProQuotaExceededError.mockReturnValue(true); - }); - it('should do nothing if there is no pending pro quota request', () => { const { result } = renderHook(() => useQuotaAndFallback({ @@ -336,7 +329,7 @@ describe('useQuotaAndFallback', () => { const promise = handler( 'gemini-pro', 'gemini-flash', - new Error('pro quota'), + new TerminalQuotaError('pro quota', mockGoogleApiError), ); await act(async () => {}); // Allow state to update @@ -367,7 +360,7 @@ describe('useQuotaAndFallback', () => { const promise = handler( 'gemini-pro', 'gemini-flash', - new Error('pro quota'), + new TerminalQuotaError('pro quota', mockGoogleApiError), ); await act(async () => {}); // Allow state to update diff --git a/packages/cli/src/ui/hooks/useQuotaAndFallback.ts b/packages/cli/src/ui/hooks/useQuotaAndFallback.ts index a7eb77659a..194f5f27fc 100644 --- a/packages/cli/src/ui/hooks/useQuotaAndFallback.ts +++ b/packages/cli/src/ui/hooks/useQuotaAndFallback.ts @@ -9,9 +9,9 @@ import { type Config, type FallbackModelHandler, type FallbackIntent, - isGenericQuotaExceededError, - isProQuotaExceededError, + TerminalQuotaError, UserTierId, + RetryableQuotaError, } from '@google/gemini-cli-core'; import { useCallback, useEffect, useRef, useState } from 'react'; import { type UseHistoryManagerReturn } from './useHistoryManager.js'; @@ -63,7 +63,7 @@ export function useQuotaAndFallback({ let message: string; - if (error && isProQuotaExceededError(error)) { + if (error instanceof TerminalQuotaError) { // Pro Quota specific messages (Interactive) if (isPaidTier) { message = `⚡ You have reached your daily ${failedModel} quota limit. @@ -76,31 +76,30 @@ export function useQuotaAndFallback({ ⚡ Or you can utilize a Gemini API Key. See: https://goo.gle/gemini-cli-docs-auth#gemini-api-key ⚡ You can switch authentication methods by typing /auth`; } - } else if (error && isGenericQuotaExceededError(error)) { - // Generic Quota (Automatic fallback) - const actionMessage = `⚡ You have reached your daily quota limit.\n⚡ Automatically switching from ${failedModel} to ${fallbackModel} for the remainder of this session.`; + } else if (error instanceof RetryableQuotaError) { + // Short term quota retries exhausted (Automatic fallback) + const actionMessage = `⚡ Your requests are being throttled right now due to server being at capacity for ${failedModel}.\n⚡ Automatically switching from ${failedModel} to ${fallbackModel} for the remainder of this session.`; if (isPaidTier) { message = `${actionMessage} -⚡ To continue accessing the ${failedModel} model today, consider using /auth to switch to using a paid API key from AI Studio at https://aistudio.google.com/apikey`; +⚡ To continue accessing the ${failedModel} model, retry your request after some time or consider using /auth to switch to using a paid API key from AI Studio at https://aistudio.google.com/apikey`; } else { message = `${actionMessage} -⚡ To increase your limits, upgrade to a Gemini Code Assist Standard or Enterprise plan with higher limits at https://goo.gle/set-up-gemini-code-assist -⚡ Or you can utilize a Gemini API Key. See: https://goo.gle/gemini-cli-docs-auth#gemini-api-key +⚡ Retry your requests after some time. Otherwise consider upgrading to a Gemini Code Assist Standard or Enterprise plan with higher limits at https://goo.gle/set-up-gemini-code-assist ⚡ You can switch authentication methods by typing /auth`; } } else { - // Consecutive 429s or other errors (Automatic fallback) + // Other errors (Automatic fallback) const actionMessage = `⚡ Automatically switching from ${failedModel} to ${fallbackModel} for faster responses for the remainder of this session.`; if (isPaidTier) { message = `${actionMessage} -⚡ Possible reasons for this are that you have received multiple consecutive capacity errors or you have reached your daily ${failedModel} quota limit -⚡ To continue accessing the ${failedModel} model today, consider using /auth to switch to using a paid API key from AI Studio at https://aistudio.google.com/apikey`; +⚡ Your requests are being throttled temporarily due to server being at capacity for ${failedModel} or there is a service outage. +⚡ To continue accessing the ${failedModel} model, you can retry your request after some time or consider using /auth to switch to using a paid API key from AI Studio at https://aistudio.google.com/apikey`; } else { message = `${actionMessage} -⚡ Possible reasons for this are that you have received multiple consecutive capacity errors or you have reached your daily ${failedModel} quota limit -⚡ To increase your limits, upgrade to a Gemini Code Assist Standard or Enterprise plan with higher limits at https://goo.gle/set-up-gemini-code-assist +⚡ Your requests are being throttled temporarily due to server being at capacity for ${failedModel} or there is a service outage. +⚡ To avoid being throttled, you can retry your request after some time or upgrade to a Gemini Code Assist Standard or Enterprise plan with higher limits at https://goo.gle/set-up-gemini-code-assist ⚡ Or you can utilize a Gemini API Key. See: https://goo.gle/gemini-cli-docs-auth#gemini-api-key ⚡ You can switch authentication methods by typing /auth`; } @@ -119,7 +118,7 @@ export function useQuotaAndFallback({ config.setQuotaErrorOccurred(true); // Interactive Fallback for Pro quota - if (error && isProQuotaExceededError(error)) { + if (error instanceof TerminalQuotaError) { if (isDialogPending.current) { return 'stop'; // A dialog is already active, so just stop this request. } diff --git a/packages/core/index.ts b/packages/core/index.ts index 729fcc8d48..acc9743e61 100644 --- a/packages/core/index.ts +++ b/packages/core/index.ts @@ -44,3 +44,5 @@ export { makeFakeConfig } from './src/test-utils/config.js'; export * from './src/utils/pathReader.js'; export { ClearcutLogger } from './src/telemetry/clearcut-logger/clearcut-logger.js'; export { logModelSlashCommand } from './src/telemetry/loggers.js'; +export * from './src/utils/googleQuotaErrors.js'; +export type { GoogleApiError } from './src/utils/googleErrors.js'; diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts index 42ced4457f..bc2eab2147 100644 --- a/packages/core/src/index.ts +++ b/packages/core/src/index.ts @@ -48,6 +48,7 @@ export * from './utils/gitIgnoreParser.js'; export * from './utils/gitUtils.js'; export * from './utils/editor.js'; export * from './utils/quotaErrorDetection.js'; +export * from './utils/googleQuotaErrors.js'; export * from './utils/fileUtils.js'; export * from './utils/retry.js'; export * from './utils/shell-utils.js'; diff --git a/packages/core/src/utils/errorParsing.test.ts b/packages/core/src/utils/errorParsing.test.ts index 9c71f4d89b..291145d2e8 100644 --- a/packages/core/src/utils/errorParsing.test.ts +++ b/packages/core/src/utils/errorParsing.test.ts @@ -6,9 +6,7 @@ import { describe, it, expect } from 'vitest'; import { parseAndFormatApiError } from './errorParsing.js'; -import { isProQuotaExceededError } from './quotaErrorDetection.js'; import { DEFAULT_GEMINI_FLASH_MODEL } from '../config/models.js'; -import { UserTierId } from '../code_assist/types.js'; import { AuthType } from '../core/contentGenerator.js'; import type { StructuredError } from '../core/turn.js'; @@ -40,22 +38,6 @@ describe('parseAndFormatApiError', () => { ); }); - it('should format a 429 API error with the personal message', () => { - const errorMessage = - 'got status: 429 Too Many Requests. {"error":{"code":429,"message":"Rate limit exceeded","status":"RESOURCE_EXHAUSTED"}}'; - const result = parseAndFormatApiError( - errorMessage, - AuthType.LOGIN_WITH_GOOGLE, - undefined, - 'gemini-2.5-pro', - DEFAULT_GEMINI_FLASH_MODEL, - ); - expect(result).toContain('[API Error: Rate limit exceeded'); - expect(result).toContain( - 'Possible quota limitations in place or slow response times detected. Switching to the gemini-2.5-flash model', - ); - }); - it('should format a 429 API error with the vertex message', () => { const errorMessage = 'got status: 429 Too Many Requests. {"error":{"code":429,"message":"Rate limit exceeded","status":"RESOURCE_EXHAUSTED"}}'; @@ -132,230 +114,4 @@ describe('parseAndFormatApiError', () => { const expected = '[API Error: An unknown error occurred.]'; expect(parseAndFormatApiError(error)).toBe(expected); }); - - it('should format a 429 API error with Pro quota exceeded message for Google auth (Free tier)', () => { - const errorMessage = - 'got status: 429 Too Many Requests. {"error":{"code":429,"message":"Quota exceeded for quota metric \'Gemini 2.5 Pro Requests\' and limit \'RequestsPerDay\' of service \'generativelanguage.googleapis.com\' for consumer \'project_number:123456789\'.","status":"RESOURCE_EXHAUSTED"}}'; - const result = parseAndFormatApiError( - errorMessage, - AuthType.LOGIN_WITH_GOOGLE, - undefined, - 'gemini-2.5-pro', - DEFAULT_GEMINI_FLASH_MODEL, - ); - expect(result).toContain( - "[API Error: Quota exceeded for quota metric 'Gemini 2.5 Pro Requests'", - ); - expect(result).toContain( - 'You have reached your daily gemini-2.5-pro quota limit', - ); - expect(result).toContain('upgrade to get higher limits'); - }); - - it('should format a regular 429 API error with standard message for Google auth', () => { - const errorMessage = - 'got status: 429 Too Many Requests. {"error":{"code":429,"message":"Rate limit exceeded","status":"RESOURCE_EXHAUSTED"}}'; - const result = parseAndFormatApiError( - errorMessage, - AuthType.LOGIN_WITH_GOOGLE, - undefined, - 'gemini-2.5-pro', - DEFAULT_GEMINI_FLASH_MODEL, - ); - expect(result).toContain('[API Error: Rate limit exceeded'); - expect(result).toContain( - 'Possible quota limitations in place or slow response times detected. Switching to the gemini-2.5-flash model', - ); - expect(result).not.toContain( - 'You have reached your daily gemini-2.5-pro quota limit', - ); - }); - - it('should format a 429 API error with generic quota exceeded message for Google auth', () => { - const errorMessage = - 'got status: 429 Too Many Requests. {"error":{"code":429,"message":"Quota exceeded for quota metric \'GenerationRequests\' and limit \'RequestsPerDay\' of service \'generativelanguage.googleapis.com\' for consumer \'project_number:123456789\'.","status":"RESOURCE_EXHAUSTED"}}'; - const result = parseAndFormatApiError( - errorMessage, - AuthType.LOGIN_WITH_GOOGLE, - undefined, - 'gemini-2.5-pro', - DEFAULT_GEMINI_FLASH_MODEL, - ); - expect(result).toContain( - "[API Error: Quota exceeded for quota metric 'GenerationRequests'", - ); - expect(result).toContain('You have reached your daily quota limit'); - expect(result).not.toContain( - 'You have reached your daily Gemini 2.5 Pro quota limit', - ); - }); - - it('should prioritize Pro quota message over generic quota message for Google auth', () => { - const errorMessage = - 'got status: 429 Too Many Requests. {"error":{"code":429,"message":"Quota exceeded for quota metric \'Gemini 2.5 Pro Requests\' and limit \'RequestsPerDay\' of service \'generativelanguage.googleapis.com\' for consumer \'project_number:123456789\'.","status":"RESOURCE_EXHAUSTED"}}'; - const result = parseAndFormatApiError( - errorMessage, - AuthType.LOGIN_WITH_GOOGLE, - undefined, - 'gemini-2.5-pro', - DEFAULT_GEMINI_FLASH_MODEL, - ); - expect(result).toContain( - "[API Error: Quota exceeded for quota metric 'Gemini 2.5 Pro Requests'", - ); - expect(result).toContain( - 'You have reached your daily gemini-2.5-pro quota limit', - ); - expect(result).not.toContain('You have reached your daily quota limit'); - }); - - it('should format a 429 API error with Pro quota exceeded message for Google auth (Standard tier)', () => { - const errorMessage = - 'got status: 429 Too Many Requests. {"error":{"code":429,"message":"Quota exceeded for quota metric \'Gemini 2.5 Pro Requests\' and limit \'RequestsPerDay\' of service \'generativelanguage.googleapis.com\' for consumer \'project_number:123456789\'.","status":"RESOURCE_EXHAUSTED"}}'; - const result = parseAndFormatApiError( - errorMessage, - AuthType.LOGIN_WITH_GOOGLE, - UserTierId.STANDARD, - 'gemini-2.5-pro', - DEFAULT_GEMINI_FLASH_MODEL, - ); - expect(result).toContain( - "[API Error: Quota exceeded for quota metric 'Gemini 2.5 Pro Requests'", - ); - expect(result).toContain( - 'You have reached your daily gemini-2.5-pro quota limit', - ); - expect(result).toContain( - 'We appreciate you for choosing Gemini Code Assist and the Gemini CLI', - ); - expect(result).not.toContain('upgrade to get higher limits'); - }); - - it('should format a 429 API error with Pro quota exceeded message for Google auth (Legacy tier)', () => { - const errorMessage = - 'got status: 429 Too Many Requests. {"error":{"code":429,"message":"Quota exceeded for quota metric \'Gemini 2.5 Pro Requests\' and limit \'RequestsPerDay\' of service \'generativelanguage.googleapis.com\' for consumer \'project_number:123456789\'.","status":"RESOURCE_EXHAUSTED"}}'; - const result = parseAndFormatApiError( - errorMessage, - AuthType.LOGIN_WITH_GOOGLE, - UserTierId.LEGACY, - 'gemini-2.5-pro', - DEFAULT_GEMINI_FLASH_MODEL, - ); - expect(result).toContain( - "[API Error: Quota exceeded for quota metric 'Gemini 2.5 Pro Requests'", - ); - expect(result).toContain( - 'You have reached your daily gemini-2.5-pro quota limit', - ); - expect(result).toContain( - 'We appreciate you for choosing Gemini Code Assist and the Gemini CLI', - ); - expect(result).not.toContain('upgrade to get higher limits'); - }); - - it('should handle different Gemini 2.5 version strings in Pro quota exceeded errors', () => { - const errorMessage25 = - 'got status: 429 Too Many Requests. {"error":{"code":429,"message":"Quota exceeded for quota metric \'Gemini 2.5 Pro Requests\' and limit \'RequestsPerDay\' of service \'generativelanguage.googleapis.com\' for consumer \'project_number:123456789\'.","status":"RESOURCE_EXHAUSTED"}}'; - const errorMessagePreview = - 'got status: 429 Too Many Requests. {"error":{"code":429,"message":"Quota exceeded for quota metric \'Gemini 2.5-preview Pro Requests\' and limit \'RequestsPerDay\' of service \'generativelanguage.googleapis.com\' for consumer \'project_number:123456789\'.","status":"RESOURCE_EXHAUSTED"}}'; - - const result25 = parseAndFormatApiError( - errorMessage25, - AuthType.LOGIN_WITH_GOOGLE, - undefined, - 'gemini-2.5-pro', - DEFAULT_GEMINI_FLASH_MODEL, - ); - const resultPreview = parseAndFormatApiError( - errorMessagePreview, - AuthType.LOGIN_WITH_GOOGLE, - undefined, - 'gemini-2.5-preview-pro', - DEFAULT_GEMINI_FLASH_MODEL, - ); - - expect(result25).toContain( - 'You have reached your daily gemini-2.5-pro quota limit', - ); - expect(resultPreview).toContain( - 'You have reached your daily gemini-2.5-preview-pro quota limit', - ); - expect(result25).toContain('upgrade to get higher limits'); - expect(resultPreview).toContain('upgrade to get higher limits'); - }); - - it('should not match non-Pro models with similar version strings', () => { - // Test that Flash models with similar version strings don't match - expect( - isProQuotaExceededError( - "Quota exceeded for quota metric 'Gemini 2.5 Flash Requests' and limit", - ), - ).toBe(false); - expect( - isProQuotaExceededError( - "Quota exceeded for quota metric 'Gemini 2.5-preview Flash Requests' and limit", - ), - ).toBe(false); - - // Test other model types - expect( - isProQuotaExceededError( - "Quota exceeded for quota metric 'Gemini 2.5 Ultra Requests' and limit", - ), - ).toBe(false); - expect( - isProQuotaExceededError( - "Quota exceeded for quota metric 'Gemini 2.5 Standard Requests' and limit", - ), - ).toBe(false); - - // Test generic quota messages - expect( - isProQuotaExceededError( - "Quota exceeded for quota metric 'GenerationRequests' and limit", - ), - ).toBe(false); - expect( - isProQuotaExceededError( - "Quota exceeded for quota metric 'EmbeddingRequests' and limit", - ), - ).toBe(false); - }); - - it('should format a generic quota exceeded message for Google auth (Standard tier)', () => { - const errorMessage = - 'got status: 429 Too Many Requests. {"error":{"code":429,"message":"Quota exceeded for quota metric \'GenerationRequests\' and limit \'RequestsPerDay\' of service \'generativelanguage.googleapis.com\' for consumer \'project_number:123456789\'.","status":"RESOURCE_EXHAUSTED"}}'; - const result = parseAndFormatApiError( - errorMessage, - AuthType.LOGIN_WITH_GOOGLE, - UserTierId.STANDARD, - 'gemini-2.5-pro', - DEFAULT_GEMINI_FLASH_MODEL, - ); - expect(result).toContain( - "[API Error: Quota exceeded for quota metric 'GenerationRequests'", - ); - expect(result).toContain('You have reached your daily quota limit'); - expect(result).toContain( - 'We appreciate you for choosing Gemini Code Assist and the Gemini CLI', - ); - expect(result).not.toContain('upgrade to get higher limits'); - }); - - it('should format a regular 429 API error with standard message for Google auth (Standard tier)', () => { - const errorMessage = - 'got status: 429 Too Many Requests. {"error":{"code":429,"message":"Rate limit exceeded","status":"RESOURCE_EXHAUSTED"}}'; - const result = parseAndFormatApiError( - errorMessage, - AuthType.LOGIN_WITH_GOOGLE, - UserTierId.STANDARD, - 'gemini-2.5-pro', - DEFAULT_GEMINI_FLASH_MODEL, - ); - expect(result).toContain('[API Error: Rate limit exceeded'); - expect(result).toContain( - 'We appreciate you for choosing Gemini Code Assist and the Gemini CLI', - ); - expect(result).not.toContain('upgrade to get higher limits'); - }); }); diff --git a/packages/core/src/utils/errorParsing.ts b/packages/core/src/utils/errorParsing.ts index ecfc237573..bad61ea9e2 100644 --- a/packages/core/src/utils/errorParsing.ts +++ b/packages/core/src/utils/errorParsing.ts @@ -4,50 +4,11 @@ * SPDX-License-Identifier: Apache-2.0 */ -import { - isProQuotaExceededError, - isGenericQuotaExceededError, - isApiError, - isStructuredError, -} from './quotaErrorDetection.js'; -import { - DEFAULT_GEMINI_MODEL, - DEFAULT_GEMINI_FLASH_MODEL, -} from '../config/models.js'; -import { UserTierId } from '../code_assist/types.js'; +import { isApiError, isStructuredError } from './quotaErrorDetection.js'; +import { DEFAULT_GEMINI_FLASH_MODEL } from '../config/models.js'; +import type { UserTierId } from '../code_assist/types.js'; import { AuthType } from '../core/contentGenerator.js'; -// Free Tier message functions -const getRateLimitErrorMessageGoogleFree = ( - fallbackModel: string = DEFAULT_GEMINI_FLASH_MODEL, -) => - `\nPossible quota limitations in place or slow response times detected. Switching to the ${fallbackModel} model for the rest of this session.`; - -const getRateLimitErrorMessageGoogleProQuotaFree = ( - currentModel: string = DEFAULT_GEMINI_MODEL, - fallbackModel: string = DEFAULT_GEMINI_FLASH_MODEL, -) => - `\nYou have reached your daily ${currentModel} quota limit. You will be switched to the ${fallbackModel} model for the rest of this session. To increase your limits, upgrade to get higher limits at https://goo.gle/set-up-gemini-code-assist, or use /auth to switch to using a paid API key from AI Studio at https://aistudio.google.com/apikey`; - -const getRateLimitErrorMessageGoogleGenericQuotaFree = () => - `\nYou have reached your daily quota limit. To increase your limits, upgrade to get higher limits at https://goo.gle/set-up-gemini-code-assist, or use /auth to switch to using a paid API key from AI Studio at https://aistudio.google.com/apikey`; - -// Legacy/Standard Tier message functions -const getRateLimitErrorMessageGooglePaid = ( - fallbackModel: string = DEFAULT_GEMINI_FLASH_MODEL, -) => - `\nPossible quota limitations in place or slow response times detected. Switching to the ${fallbackModel} model for the rest of this session. We appreciate you for choosing Gemini Code Assist and the Gemini CLI.`; - -const getRateLimitErrorMessageGoogleProQuotaPaid = ( - currentModel: string = DEFAULT_GEMINI_MODEL, - fallbackModel: string = DEFAULT_GEMINI_FLASH_MODEL, -) => - `\nYou have reached your daily ${currentModel} quota limit. You will be switched to the ${fallbackModel} model for the rest of this session. We appreciate you for choosing Gemini Code Assist and the Gemini CLI. To continue accessing the ${currentModel} model today, consider using /auth to switch to using a paid API key from AI Studio at https://aistudio.google.com/apikey`; - -const getRateLimitErrorMessageGoogleGenericQuotaPaid = ( - currentModel: string = DEFAULT_GEMINI_MODEL, -) => - `\nYou have reached your daily quota limit. We appreciate you for choosing Gemini Code Assist and the Gemini CLI. To continue accessing the ${currentModel} model today, consider using /auth to switch to using a paid API key from AI Studio at https://aistudio.google.com/apikey`; const RATE_LIMIT_ERROR_MESSAGE_USE_GEMINI = '\nPlease wait and try again later. To increase your limits, request a quota increase through AI Studio, or switch to another /auth method'; const RATE_LIMIT_ERROR_MESSAGE_VERTEX = @@ -59,39 +20,9 @@ const getRateLimitErrorMessageDefault = ( function getRateLimitMessage( authType?: AuthType, - error?: unknown, - userTier?: UserTierId, - currentModel?: string, fallbackModel?: string, ): string { switch (authType) { - case AuthType.LOGIN_WITH_GOOGLE: { - // Determine if user is on a paid tier (Legacy or Standard) - default to FREE if not specified - const isPaidTier = - userTier === UserTierId.LEGACY || userTier === UserTierId.STANDARD; - - if (isProQuotaExceededError(error)) { - return isPaidTier - ? getRateLimitErrorMessageGoogleProQuotaPaid( - currentModel || DEFAULT_GEMINI_MODEL, - fallbackModel, - ) - : getRateLimitErrorMessageGoogleProQuotaFree( - currentModel || DEFAULT_GEMINI_MODEL, - fallbackModel, - ); - } else if (isGenericQuotaExceededError(error)) { - return isPaidTier - ? getRateLimitErrorMessageGoogleGenericQuotaPaid( - currentModel || DEFAULT_GEMINI_MODEL, - ) - : getRateLimitErrorMessageGoogleGenericQuotaFree(); - } else { - return isPaidTier - ? getRateLimitErrorMessageGooglePaid(fallbackModel) - : getRateLimitErrorMessageGoogleFree(fallbackModel); - } - } case AuthType.USE_GEMINI: return RATE_LIMIT_ERROR_MESSAGE_USE_GEMINI; case AuthType.USE_VERTEX_AI: @@ -111,13 +42,7 @@ export function parseAndFormatApiError( if (isStructuredError(error)) { let text = `[API Error: ${error.message}]`; if (error.status === 429) { - text += getRateLimitMessage( - authType, - error, - userTier, - currentModel, - fallbackModel, - ); + text += getRateLimitMessage(authType, fallbackModel); } return text; } @@ -146,13 +71,7 @@ export function parseAndFormatApiError( } let text = `[API Error: ${finalMessage} (Status: ${parsedError.error.status})]`; if (parsedError.error.code === 429) { - text += getRateLimitMessage( - authType, - parsedError, - userTier, - currentModel, - fallbackModel, - ); + text += getRateLimitMessage(authType, fallbackModel); } return text; } diff --git a/packages/core/src/utils/flashFallback.test.ts b/packages/core/src/utils/flashFallback.test.ts index 8ef9665f42..a3f08f5df6 100644 --- a/packages/core/src/utils/flashFallback.test.ts +++ b/packages/core/src/utils/flashFallback.test.ts @@ -11,7 +11,6 @@ import { setSimulate429, disableSimulationAfterFallback, shouldSimulate429, - createSimulated429Error, resetRequestCounter, } from './testUtils.js'; import { DEFAULT_GEMINI_FLASH_MODEL } from '../config/models.js'; @@ -19,12 +18,15 @@ import { retryWithBackoff } from './retry.js'; import { AuthType } from '../core/contentGenerator.js'; // Import the new types (Assuming this test file is in packages/core/src/utils/) import type { FallbackModelHandler } from '../fallback/types.js'; +import type { GoogleApiError } from './googleErrors.js'; +import { TerminalQuotaError } from './googleQuotaErrors.js'; vi.mock('node:fs'); // Update the description to reflect that this tests the retry utility's integration describe('Retry Utility Fallback Integration', () => { let config: Config; + let mockGoogleApiError: GoogleApiError; beforeEach(() => { vi.mocked(fs.existsSync).mockReturnValue(true); @@ -38,6 +40,11 @@ describe('Retry Utility Fallback Integration', () => { cwd: '/test', model: 'gemini-2.5-pro', }); + mockGoogleApiError = { + code: 429, + message: 'mock error', + details: [], + }; // Reset simulation state for each test setSimulate429(false); @@ -56,6 +63,7 @@ describe('Retry Utility Fallback Integration', () => { const result = await config.fallbackModelHandler!( 'gemini-2.5-pro', DEFAULT_GEMINI_FLASH_MODEL, + new Error('test'), ); // Verify it returns the correct intent @@ -63,81 +71,61 @@ describe('Retry Utility Fallback Integration', () => { }); // This test validates the retry utility's logic for triggering the callback. - it('should trigger onPersistent429 after 2 consecutive 429 errors for OAuth users', async () => { + it('should trigger onPersistent429 on TerminalQuotaError for OAuth users', async () => { let fallbackCalled = false; - // Removed fallbackModel variable as it's no longer relevant here. - // Mock function that simulates exactly 2 429 errors, then succeeds after fallback const mockApiCall = vi .fn() - .mockRejectedValueOnce(createSimulated429Error()) - .mockRejectedValueOnce(createSimulated429Error()) + .mockRejectedValueOnce( + new TerminalQuotaError('Daily limit', mockGoogleApiError), + ) + .mockRejectedValueOnce( + new TerminalQuotaError('Daily limit', mockGoogleApiError), + ) .mockResolvedValueOnce('success after fallback'); - // Mock the onPersistent429 callback (this is what client.ts/geminiChat.ts provides) const mockPersistent429Callback = vi.fn(async (_authType?: string) => { fallbackCalled = true; - // Return true to signal retryWithBackoff to reset attempts and continue. return true; }); - // Test with OAuth personal auth type, with maxAttempts = 2 to ensure fallback triggers const result = await retryWithBackoff(mockApiCall, { maxAttempts: 2, initialDelayMs: 1, maxDelayMs: 10, - shouldRetryOnError: (error: Error) => { - const status = (error as Error & { status?: number }).status; - return status === 429; - }, onPersistent429: mockPersistent429Callback, authType: AuthType.LOGIN_WITH_GOOGLE, }); - // Verify fallback mechanism was triggered expect(fallbackCalled).toBe(true); expect(mockPersistent429Callback).toHaveBeenCalledWith( AuthType.LOGIN_WITH_GOOGLE, - expect.any(Error), + expect.any(TerminalQuotaError), ); expect(result).toBe('success after fallback'); - // Should have: 2 failures, then fallback triggered, then 1 success after retry reset expect(mockApiCall).toHaveBeenCalledTimes(3); }); it('should not trigger onPersistent429 for API key users', async () => { - let fallbackCalled = false; + const fallbackCallback = vi.fn(); - // Mock function that simulates 429 errors - const mockApiCall = vi.fn().mockRejectedValue(createSimulated429Error()); + const mockApiCall = vi + .fn() + .mockRejectedValueOnce( + new TerminalQuotaError('Daily limit', mockGoogleApiError), + ); - // Mock the callback - const mockPersistent429Callback = vi.fn(async () => { - fallbackCalled = true; - return true; + const promise = retryWithBackoff(mockApiCall, { + maxAttempts: 2, + initialDelayMs: 1, + maxDelayMs: 10, + onPersistent429: fallbackCallback, + authType: AuthType.USE_GEMINI, // API key auth type }); - // Test with API key auth type - should not trigger fallback - try { - await retryWithBackoff(mockApiCall, { - maxAttempts: 5, - initialDelayMs: 10, - maxDelayMs: 100, - shouldRetryOnError: (error: Error) => { - const status = (error as Error & { status?: number }).status; - return status === 429; - }, - onPersistent429: mockPersistent429Callback, - authType: AuthType.USE_GEMINI, // API key auth type - }); - } catch (error) { - // Expected to throw after max attempts - expect((error as Error).message).toContain('Rate limit exceeded'); - } - - // Verify fallback was NOT triggered for API key users - expect(fallbackCalled).toBe(false); - expect(mockPersistent429Callback).not.toHaveBeenCalled(); + await expect(promise).rejects.toThrow('Daily limit'); + expect(fallbackCallback).not.toHaveBeenCalled(); + expect(mockApiCall).toHaveBeenCalledTimes(1); }); // This test validates the test utilities themselves. diff --git a/packages/core/src/utils/googleErrors.test.ts b/packages/core/src/utils/googleErrors.test.ts new file mode 100644 index 0000000000..c051fb0310 --- /dev/null +++ b/packages/core/src/utils/googleErrors.test.ts @@ -0,0 +1,356 @@ +/** + * @license + * Copyright 2025 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { describe, it, expect } from 'vitest'; +import { parseGoogleApiError } from './googleErrors.js'; +import type { QuotaFailure } from './googleErrors.js'; + +describe('parseGoogleApiError', () => { + it('should return null for non-gaxios errors', () => { + expect(parseGoogleApiError(new Error('vanilla error'))).toBeNull(); + expect(parseGoogleApiError(null)).toBeNull(); + expect(parseGoogleApiError({})).toBeNull(); + }); + + it('should parse a standard gaxios error', () => { + const mockError = { + response: { + status: 429, + data: { + error: { + code: 429, + message: 'Quota exceeded', + details: [ + { + '@type': 'type.googleapis.com/google.rpc.QuotaFailure', + violations: [{ subject: 'user', description: 'daily limit' }], + }, + ], + }, + }, + }, + }; + + const parsed = parseGoogleApiError(mockError); + expect(parsed).not.toBeNull(); + expect(parsed?.code).toBe(429); + expect(parsed?.message).toBe('Quota exceeded'); + expect(parsed?.details).toHaveLength(1); + const detail = parsed?.details[0] as QuotaFailure; + expect(detail['@type']).toBe('type.googleapis.com/google.rpc.QuotaFailure'); + expect(detail.violations[0].description).toBe('daily limit'); + }); + + it('should parse an error with details stringified in the message', () => { + const innerError = { + error: { + code: 429, + message: 'Inner quota message', + details: [ + { + '@type': 'type.googleapis.com/google.rpc.RetryInfo', + retryDelay: '10s', + }, + ], + }, + }; + + const mockError = { + response: { + status: 429, + data: { + error: { + code: 429, + message: JSON.stringify(innerError), + details: [], // Top-level details are empty + }, + }, + }, + }; + + const parsed = parseGoogleApiError(mockError); + expect(parsed).not.toBeNull(); + expect(parsed?.code).toBe(429); + expect(parsed?.message).toBe('Inner quota message'); + expect(parsed?.details).toHaveLength(1); + expect(parsed?.details[0]['@type']).toBe( + 'type.googleapis.com/google.rpc.RetryInfo', + ); + }); + + it('should return null if details are not in the expected format', () => { + const mockError = { + response: { + status: 400, + data: { + error: { + code: 400, + message: 'Bad Request', + details: 'just a string', // Invalid details format + }, + }, + }, + }; + expect(parseGoogleApiError(mockError)).toBeNull(); + }); + + it('should return null if there are no valid details', () => { + const mockError = { + response: { + status: 400, + data: { + error: { + code: 400, + message: 'Bad Request', + details: [ + { + // missing '@type' + reason: 'some reason', + }, + ], + }, + }, + }, + }; + expect(parseGoogleApiError(mockError)).toBeNull(); + }); + + it('should parse a doubly nested error in the message', () => { + const innerError = { + error: { + code: 429, + message: 'Innermost quota message', + details: [ + { + '@type': 'type.googleapis.com/google.rpc.RetryInfo', + retryDelay: '20s', + }, + ], + }, + }; + + const middleError = { + error: { + code: 429, + message: JSON.stringify(innerError), + details: [], + }, + }; + + const mockError = { + response: { + status: 429, + data: { + error: { + code: 429, + message: JSON.stringify(middleError), + details: [], + }, + }, + }, + }; + + const parsed = parseGoogleApiError(mockError); + expect(parsed).not.toBeNull(); + expect(parsed?.code).toBe(429); + expect(parsed?.message).toBe('Innermost quota message'); + expect(parsed?.details).toHaveLength(1); + expect(parsed?.details[0]['@type']).toBe( + 'type.googleapis.com/google.rpc.RetryInfo', + ); + }); + + it('should parse an error that is not in a response object', () => { + const innerError = { + error: { + code: 429, + message: 'Innermost quota message', + details: [ + { + '@type': 'type.googleapis.com/google.rpc.RetryInfo', + retryDelay: '20s', + }, + ], + }, + }; + + const mockError = { + error: { + code: 429, + message: JSON.stringify(innerError), + details: [], + }, + }; + + const parsed = parseGoogleApiError(mockError); + expect(parsed).not.toBeNull(); + expect(parsed?.code).toBe(429); + expect(parsed?.message).toBe('Innermost quota message'); + expect(parsed?.details).toHaveLength(1); + expect(parsed?.details[0]['@type']).toBe( + 'type.googleapis.com/google.rpc.RetryInfo', + ); + }); + + it('should parse an error that is a JSON string', () => { + const innerError = { + error: { + code: 429, + message: 'Innermost quota message', + details: [ + { + '@type': 'type.googleapis.com/google.rpc.RetryInfo', + retryDelay: '20s', + }, + ], + }, + }; + + const mockError = { + error: { + code: 429, + message: JSON.stringify(innerError), + details: [], + }, + }; + + const parsed = parseGoogleApiError(JSON.stringify(mockError)); + expect(parsed).not.toBeNull(); + expect(parsed?.code).toBe(429); + expect(parsed?.message).toBe('Innermost quota message'); + expect(parsed?.details).toHaveLength(1); + expect(parsed?.details[0]['@type']).toBe( + 'type.googleapis.com/google.rpc.RetryInfo', + ); + }); + + it('should parse the user-provided nested error string', () => { + const userErrorString = + '{"error":{"message":"{\\n \\"error\\": {\\n \\"code\\": 429,\\n \\"message\\": \\"You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits.\\\\n* Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_paid_tier_input_token_count, limit: 10000\\\\nPlease retry in 40.025771073s.\\",\\n \\"status\\": \\"RESOURCE_EXHAUSTED\\",\\n \\"details\\": [\\n {\\n \\"@type\\": \\"type.googleapis.com/google.rpc.DebugInfo\\",\\n \\"detail\\": \\"[ORIGINAL ERROR] generic::resource_exhausted: You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits.\\\\n* Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_paid_tier_input_token_count, limit: 10000\\\\nPlease retry in 40.025771073s. [google.rpc.error_details_ext] { message: \\\\\\"You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits.\\\\\\\\n* Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_paid_tier_input_token_count, limit: 10000\\\\\\\\nPlease retry in 40.025771073s.\\\\\\" }\\"\\n },\\n {\\n \\"@type\\": \\"type.googleapis.com/google.rpc.QuotaFailure\\",\\n \\"violations\\": [\\n {\\n \\"quotaMetric\\": \\"generativelanguage.googleapis.com/generate_content_paid_tier_input_token_count\\",\\n \\"quotaId\\": \\"GenerateContentPaidTierInputTokensPerModelPerMinute\\",\\n \\"quotaDimensions\\": {\\n \\"location\\": \\"global\\",\\n \\"model\\": \\"gemini-2.5-pro\\"\\n },\\n \\"quotaValue\\": \\"10000\\"\\n }\\n ]\\n },\\n {\\n \\"@type\\": \\"type.googleapis.com/google.rpc.Help\\",\\n \\"links\\": [\\n {\\n \\"description\\": \\"Learn more about Gemini API quotas\\",\\n \\"url\\": \\"https://ai.google.dev/gemini-api/docs/rate-limits\\"\\n }\\n ]\\n },\\n {\\n \\"@type\\": \\"type.googleapis.com/google.rpc.RetryInfo\\",\\n \\"retryDelay\\": \\"40s\\"\\n }\\n ]\\n }\\n}\\n","code":429,"status":"Too Many Requests"}}'; + + const parsed = parseGoogleApiError(userErrorString); + expect(parsed).not.toBeNull(); + expect(parsed?.code).toBe(429); + expect(parsed?.message).toContain('You exceeded your current quota'); + expect(parsed?.details).toHaveLength(4); + expect( + parsed?.details.some( + (d) => d['@type'] === 'type.googleapis.com/google.rpc.QuotaFailure', + ), + ).toBe(true); + expect( + parsed?.details.some( + (d) => d['@type'] === 'type.googleapis.com/google.rpc.RetryInfo', + ), + ).toBe(true); + }); + + it('should parse an error that is an array', () => { + const mockError = [ + { + error: { + code: 429, + message: 'Quota exceeded', + details: [ + { + '@type': 'type.googleapis.com/google.rpc.QuotaFailure', + violations: [{ subject: 'user', description: 'daily limit' }], + }, + ], + }, + }, + ]; + + const parsed = parseGoogleApiError(mockError); + expect(parsed).not.toBeNull(); + expect(parsed?.code).toBe(429); + expect(parsed?.message).toBe('Quota exceeded'); + }); + + it('should parse a gaxios error where data is an array', () => { + const mockError = { + response: { + status: 429, + data: [ + { + error: { + code: 429, + message: 'Quota exceeded', + details: [ + { + '@type': 'type.googleapis.com/google.rpc.QuotaFailure', + violations: [{ subject: 'user', description: 'daily limit' }], + }, + ], + }, + }, + ], + }, + }; + + const parsed = parseGoogleApiError(mockError); + expect(parsed).not.toBeNull(); + expect(parsed?.code).toBe(429); + expect(parsed?.message).toBe('Quota exceeded'); + }); + + it('should parse a gaxios error where data is a stringified array', () => { + const mockError = { + response: { + status: 429, + data: JSON.stringify([ + { + error: { + code: 429, + message: 'Quota exceeded', + details: [ + { + '@type': 'type.googleapis.com/google.rpc.QuotaFailure', + violations: [{ subject: 'user', description: 'daily limit' }], + }, + ], + }, + }, + ]), + }, + }; + + const parsed = parseGoogleApiError(mockError); + expect(parsed).not.toBeNull(); + expect(parsed?.code).toBe(429); + expect(parsed?.message).toBe('Quota exceeded'); + }); + + it('should parse an error with a malformed @type key (returned by Gemini API)', () => { + const malformedError = { + name: 'API Error', + message: { + error: { + message: + '{\n "error": {\n "code": 429,\n "message": "You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits.\\n* Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_free_tier_requests, limit: 2\nPlease retry in 54.887755558s.",\n "status": "RESOURCE_EXHAUSTED",\n "details": [\n {\n " @type": "type.googleapis.com/google.rpc.DebugInfo",\n "detail": "[ORIGINAL ERROR] generic::resource_exhausted: You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits.\\n* Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_free_tier_requests, limit: 2\\nPlease retry in 54.887755558s. [google.rpc.error_details_ext] { message: \\"You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits.\\\\n* Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_free_tier_requests, limit: 2\\\\nPlease retry in 54.887755558s.\\" }"\n },\n {\n" @type": "type.googleapis.com/google.rpc.QuotaFailure",\n "violations": [\n {\n "quotaMetric": "generativelanguage.googleapis.com/generate_content_free_tier_requests",\n "quotaId": "GenerateRequestsPerMinutePerProjectPerModel-FreeTier",\n "quotaDimensions": {\n "location": "global",\n"model": "gemini-2.5-pro"\n },\n "quotaValue": "2"\n }\n ]\n },\n {\n" @type": "type.googleapis.com/google.rpc.Help",\n "links": [\n {\n "description": "Learn more about Gemini API quotas",\n "url": "https://ai.google.dev/gemini-api/docs/rate-limits"\n }\n ]\n },\n {\n" @type": "type.googleapis.com/google.rpc.RetryInfo",\n "retryDelay": "54s"\n }\n ]\n }\n}\n', + code: 429, + status: 'Too Many Requests', + }, + }, + }; + + const parsed = parseGoogleApiError(malformedError); + expect(parsed).not.toBeNull(); + expect(parsed?.code).toBe(429); + expect(parsed?.message).toContain('You exceeded your current quota'); + expect(parsed?.details).toHaveLength(4); + expect( + parsed?.details.some( + (d) => d['@type'] === 'type.googleapis.com/google.rpc.QuotaFailure', + ), + ).toBe(true); + expect( + parsed?.details.some( + (d) => d['@type'] === 'type.googleapis.com/google.rpc.RetryInfo', + ), + ).toBe(true); + }); +}); diff --git a/packages/core/src/utils/googleErrors.ts b/packages/core/src/utils/googleErrors.ts new file mode 100644 index 0000000000..d7c15ac0b6 --- /dev/null +++ b/packages/core/src/utils/googleErrors.ts @@ -0,0 +1,305 @@ +/** + * @license + * Copyright 2025 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +/** + * @fileoverview + * This file contains types and functions for parsing structured Google API errors. + */ + +/** + * Based on google/rpc/error_details.proto + */ + +export interface ErrorInfo { + '@type': 'type.googleapis.com/google.rpc.ErrorInfo'; + reason: string; + domain: string; + metadata: { [key: string]: string }; +} + +export interface RetryInfo { + '@type': 'type.googleapis.com/google.rpc.RetryInfo'; + retryDelay: string; // e.g. "51820.638305887s" +} + +export interface DebugInfo { + '@type': 'type.googleapis.com/google.rpc.DebugInfo'; + stackEntries: string[]; + detail: string; +} + +export interface QuotaFailure { + '@type': 'type.googleapis.com/google.rpc.QuotaFailure'; + violations: Array<{ + subject?: string; + description?: string; + apiService?: string; + quotaMetric?: string; + quotaId?: string; + quotaDimensions?: { [key: string]: string }; + quotaValue?: string | number; + futureQuotaValue?: number; + }>; +} + +export interface PreconditionFailure { + '@type': 'type.googleapis.com/google.rpc.PreconditionFailure'; + violations: Array<{ + type: string; + subject: string; + description: string; + }>; +} + +export interface LocalizedMessage { + '@type': 'type.googleapis.com/google.rpc.LocalizedMessage'; + locale: string; + message: string; +} + +export interface BadRequest { + '@type': 'type.googleapis.com/google.rpc.BadRequest'; + fieldViolations: Array<{ + field: string; + description: string; + reason?: string; + localizedMessage?: LocalizedMessage; + }>; +} + +export interface RequestInfo { + '@type': 'type.googleapis.com/google.rpc.RequestInfo'; + requestId: string; + servingData: string; +} + +export interface ResourceInfo { + '@type': 'type.googleapis.com/google.rpc.ResourceInfo'; + resourceType: string; + resourceName: string; + owner: string; + description: string; +} + +export interface Help { + '@type': 'type.googleapis.com/google.rpc.Help'; + links: Array<{ + description: string; + url: string; + }>; +} + +export type GoogleApiErrorDetail = + | ErrorInfo + | RetryInfo + | DebugInfo + | QuotaFailure + | PreconditionFailure + | BadRequest + | RequestInfo + | ResourceInfo + | Help + | LocalizedMessage; + +export interface GoogleApiError { + code: number; + message: string; + details: GoogleApiErrorDetail[]; +} + +type ErrorShape = { + message?: string; + details?: unknown[]; + code?: number; +}; + +/** + * Parses an error object to check if it's a structured Google API error + * and extracts all details. + * + * This function can handle two formats: + * 1. Standard Google API errors where `details` is a top-level field. + * 2. Errors where the entire structured error object is stringified inside + * the `message` field of a wrapper error. + * + * @param error The error object to inspect. + * @returns A GoogleApiError object if the error matches, otherwise null. + */ +export function parseGoogleApiError(error: unknown): GoogleApiError | null { + if (!error) { + return null; + } + + let errorObj: unknown = error; + + // If error is a string, try to parse it. + if (typeof errorObj === 'string') { + try { + errorObj = JSON.parse(errorObj); + } catch (_) { + // Not a JSON string, can't parse. + return null; + } + } + + if (Array.isArray(errorObj) && errorObj.length > 0) { + errorObj = errorObj[0]; + } + + if (typeof errorObj !== 'object' || errorObj === null) { + return null; + } + + let currentError: ErrorShape | undefined = + fromGaxiosError(errorObj) ?? fromApiError(errorObj); + + let depth = 0; + const maxDepth = 10; + // Handle cases where the actual error object is stringified inside the message + // by drilling down until we find an error that doesn't have a stringified message. + while ( + currentError && + typeof currentError.message === 'string' && + depth < maxDepth + ) { + try { + const parsedMessage = JSON.parse( + currentError.message.replace(/\u00A0/g, '').replace(/\n/g, ' '), + ); + if (parsedMessage.error) { + currentError = parsedMessage.error; + depth++; + } else { + // The message is a JSON string, but not a nested error object. + break; + } + } catch (_error) { + // It wasn't a JSON string, so we've drilled down as far as we can. + break; + } + } + + if (!currentError) { + return null; + } + + const code = currentError.code; + const message = currentError.message; + const errorDetails = currentError.details; + + if (Array.isArray(errorDetails) && code && message) { + const details: GoogleApiErrorDetail[] = []; + for (const detail of errorDetails) { + if (detail && typeof detail === 'object') { + const detailObj = detail as Record; + const typeKey = Object.keys(detailObj).find( + (key) => key.trim() === '@type', + ); + if (typeKey) { + if (typeKey !== '@type') { + detailObj['@type'] = detailObj[typeKey]; + delete detailObj[typeKey]; + } + // We can just cast it; the consumer will have to switch on @type + details.push(detailObj as unknown as GoogleApiErrorDetail); + } + } + } + + if (details.length > 0) { + return { + code, + message, + details, + }; + } + } + + return null; +} + +function fromGaxiosError(errorObj: object): ErrorShape | undefined { + const gaxiosError = errorObj as { + response?: { + status?: number; + data?: + | { + error?: ErrorShape; + } + | string; + }; + error?: ErrorShape; + code?: number; + }; + + let outerError: ErrorShape | undefined; + if (gaxiosError.response?.data) { + let data = gaxiosError.response.data; + + if (typeof data === 'string') { + try { + data = JSON.parse(data); + } catch (_) { + // Not a JSON string, can't parse. + } + } + + if (Array.isArray(data) && data.length > 0) { + data = data[0]; + } + + if (typeof data === 'object' && data !== null) { + if ('error' in data) { + outerError = (data as { error: ErrorShape }).error; + } + } + } + + if (!outerError) { + // If the gaxios structure isn't there, check for a top-level `error` property. + if (gaxiosError.error) { + outerError = gaxiosError.error; + } else { + return undefined; + } + } + return outerError; +} + +function fromApiError(errorObj: object): ErrorShape | undefined { + const apiError = errorObj as { + message?: + | { + error?: ErrorShape; + } + | string; + code?: number; + }; + + let outerError: ErrorShape | undefined; + if (apiError.message) { + let data = apiError.message; + + if (typeof data === 'string') { + try { + data = JSON.parse(data); + } catch (_) { + // Not a JSON string, can't parse. + } + } + + if (Array.isArray(data) && data.length > 0) { + data = data[0]; + } + + if (typeof data === 'object' && data !== null) { + if ('error' in data) { + outerError = (data as { error: ErrorShape }).error; + } + } + } + return outerError; +} diff --git a/packages/core/src/utils/googleQuotaErrors.test.ts b/packages/core/src/utils/googleQuotaErrors.test.ts new file mode 100644 index 0000000000..cc5e5de43a --- /dev/null +++ b/packages/core/src/utils/googleQuotaErrors.test.ts @@ -0,0 +1,306 @@ +/** + * @license + * Copyright 2025 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { describe, it, expect, vi, afterEach } from 'vitest'; +import { + classifyGoogleError, + RetryableQuotaError, + TerminalQuotaError, +} from './googleQuotaErrors.js'; +import * as errorParser from './googleErrors.js'; +import type { GoogleApiError } from './googleErrors.js'; + +describe('classifyGoogleError', () => { + afterEach(() => { + vi.restoreAllMocks(); + }); + + it('should return original error if not a Google API error', () => { + const regularError = new Error('Something went wrong'); + vi.spyOn(errorParser, 'parseGoogleApiError').mockReturnValue(null); + const result = classifyGoogleError(regularError); + expect(result).toBe(regularError); + }); + + it('should return original error if code is not 429', () => { + const apiError: GoogleApiError = { + code: 500, + message: 'Server error', + details: [], + }; + vi.spyOn(errorParser, 'parseGoogleApiError').mockReturnValue(apiError); + const originalError = new Error(); + const result = classifyGoogleError(originalError); + expect(result).toBe(originalError); + expect(result).not.toBeInstanceOf(TerminalQuotaError); + expect(result).not.toBeInstanceOf(RetryableQuotaError); + }); + + it('should return TerminalQuotaError for daily quota violations in QuotaFailure', () => { + const apiError: GoogleApiError = { + code: 429, + message: 'Quota exceeded', + details: [ + { + '@type': 'type.googleapis.com/google.rpc.QuotaFailure', + violations: [ + { + subject: 'user', + description: 'daily limit', + quotaId: 'RequestsPerDay-limit', + }, + ], + }, + ], + }; + vi.spyOn(errorParser, 'parseGoogleApiError').mockReturnValue(apiError); + const result = classifyGoogleError(new Error()); + expect(result).toBeInstanceOf(TerminalQuotaError); + expect((result as TerminalQuotaError).cause).toBe(apiError); + }); + + it('should return TerminalQuotaError for daily quota violations in ErrorInfo', () => { + const apiError: GoogleApiError = { + code: 429, + message: 'Quota exceeded', + details: [ + { + '@type': 'type.googleapis.com/google.rpc.ErrorInfo', + reason: 'QUOTA_EXCEEDED', + domain: 'googleapis.com', + metadata: { + quota_limit: 'RequestsPerDay_PerProject_PerUser', + }, + }, + ], + }; + vi.spyOn(errorParser, 'parseGoogleApiError').mockReturnValue(apiError); + const result = classifyGoogleError(new Error()); + expect(result).toBeInstanceOf(TerminalQuotaError); + }); + + it('should return TerminalQuotaError for long retry delays', () => { + const apiError: GoogleApiError = { + code: 429, + message: 'Too many requests', + details: [ + { + '@type': 'type.googleapis.com/google.rpc.RetryInfo', + retryDelay: '301s', // > 5 minutes + }, + ], + }; + vi.spyOn(errorParser, 'parseGoogleApiError').mockReturnValue(apiError); + const result = classifyGoogleError(new Error()); + expect(result).toBeInstanceOf(TerminalQuotaError); + }); + + it('should return RetryableQuotaError for short retry delays', () => { + const apiError: GoogleApiError = { + code: 429, + message: 'Too many requests', + details: [ + { + '@type': 'type.googleapis.com/google.rpc.RetryInfo', + retryDelay: '45.123s', + }, + ], + }; + vi.spyOn(errorParser, 'parseGoogleApiError').mockReturnValue(apiError); + const result = classifyGoogleError(new Error()); + expect(result).toBeInstanceOf(RetryableQuotaError); + expect((result as RetryableQuotaError).retryDelayMs).toBe(45123); + }); + + it('should return RetryableQuotaError for per-minute quota violations in QuotaFailure', () => { + const apiError: GoogleApiError = { + code: 429, + message: 'Quota exceeded', + details: [ + { + '@type': 'type.googleapis.com/google.rpc.QuotaFailure', + violations: [ + { + subject: 'user', + description: 'per minute limit', + quotaId: 'RequestsPerMinute-limit', + }, + ], + }, + ], + }; + vi.spyOn(errorParser, 'parseGoogleApiError').mockReturnValue(apiError); + const result = classifyGoogleError(new Error()); + expect(result).toBeInstanceOf(RetryableQuotaError); + expect((result as RetryableQuotaError).retryDelayMs).toBe(60000); + }); + + it('should return RetryableQuotaError for per-minute quota violations in ErrorInfo', () => { + const apiError: GoogleApiError = { + code: 429, + message: 'Quota exceeded', + details: [ + { + '@type': 'type.googleapis.com/google.rpc.ErrorInfo', + reason: 'QUOTA_EXCEEDED', + domain: 'googleapis.com', + metadata: { + quota_limit: 'RequestsPerMinute_PerProject_PerUser', + }, + }, + ], + }; + vi.spyOn(errorParser, 'parseGoogleApiError').mockReturnValue(apiError); + const result = classifyGoogleError(new Error()); + expect(result).toBeInstanceOf(RetryableQuotaError); + expect((result as RetryableQuotaError).retryDelayMs).toBe(60000); + }); + + it('should return RetryableQuotaError for another short retry delay', () => { + const apiError: GoogleApiError = { + code: 429, + message: + 'You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits.\n* Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_free_tier_requests, limit: 2\nPlease retry in 56.185908122s.', + details: [ + { + '@type': 'type.googleapis.com/google.rpc.QuotaFailure', + violations: [ + { + quotaMetric: + 'generativelanguage.googleapis.com/generate_content_free_tier_requests', + quotaId: 'GenerateRequestsPerMinutePerProjectPerModel-FreeTier', + quotaDimensions: { + location: 'global', + model: 'gemini-2.5-pro', + }, + quotaValue: '2', + }, + ], + }, + { + '@type': 'type.googleapis.com/google.rpc.Help', + links: [ + { + description: 'Learn more about Gemini API quotas', + url: 'https://ai.google.dev/gemini-api/docs/rate-limits', + }, + ], + }, + { + '@type': 'type.googleapis.com/google.rpc.RetryInfo', + retryDelay: '56s', + }, + ], + }; + vi.spyOn(errorParser, 'parseGoogleApiError').mockReturnValue(apiError); + const result = classifyGoogleError(new Error()); + expect(result).toBeInstanceOf(RetryableQuotaError); + expect((result as RetryableQuotaError).retryDelayMs).toBe(56000); + }); + + it('should return RetryableQuotaError for Cloud Code RATE_LIMIT_EXCEEDED with retry delay', () => { + const apiError: GoogleApiError = { + code: 429, + message: + 'You have exhausted your capacity on this model. Your quota will reset after 0s.', + details: [ + { + '@type': 'type.googleapis.com/google.rpc.ErrorInfo', + reason: 'RATE_LIMIT_EXCEEDED', + domain: 'cloudcode-pa.googleapis.com', + metadata: { + uiMessage: 'true', + model: 'gemini-2.5-pro', + quotaResetDelay: '539.477544ms', + quotaResetTimeStamp: '2025-10-20T19:14:08Z', + }, + }, + { + '@type': 'type.googleapis.com/google.rpc.RetryInfo', + retryDelay: '0.539477544s', + }, + ], + }; + vi.spyOn(errorParser, 'parseGoogleApiError').mockReturnValue(apiError); + const result = classifyGoogleError(new Error()); + expect(result).toBeInstanceOf(RetryableQuotaError); + expect((result as RetryableQuotaError).retryDelayMs).toBeCloseTo( + 539.477544, + ); + }); + + it('should return TerminalQuotaError for Cloud Code QUOTA_EXHAUSTED', () => { + const apiError: GoogleApiError = { + code: 429, + message: + 'You have exhausted your capacity on this model. Your quota will reset after 0s.', + details: [ + { + '@type': 'type.googleapis.com/google.rpc.ErrorInfo', + reason: 'QUOTA_EXHAUSTED', + domain: 'cloudcode-pa.googleapis.com', + metadata: { + uiMessage: 'true', + model: 'gemini-2.5-pro', + quotaResetDelay: '539.477544ms', + quotaResetTimeStamp: '2025-10-20T19:14:08Z', + }, + }, + { + '@type': 'type.googleapis.com/google.rpc.RetryInfo', + retryDelay: '0.539477544s', + }, + ], + }; + vi.spyOn(errorParser, 'parseGoogleApiError').mockReturnValue(apiError); + const result = classifyGoogleError(new Error()); + expect(result).toBeInstanceOf(TerminalQuotaError); + }); + + it('should prioritize daily limit over retry info', () => { + const apiError: GoogleApiError = { + code: 429, + message: 'Quota exceeded', + details: [ + { + '@type': 'type.googleapis.com/google.rpc.QuotaFailure', + violations: [ + { + subject: 'user', + description: 'daily limit', + quotaId: 'RequestsPerDay-limit', + }, + ], + }, + { + '@type': 'type.googleapis.com/google.rpc.RetryInfo', + retryDelay: '10s', + }, + ], + }; + vi.spyOn(errorParser, 'parseGoogleApiError').mockReturnValue(apiError); + const result = classifyGoogleError(new Error()); + expect(result).toBeInstanceOf(TerminalQuotaError); + }); + + it('should return original error for 429 without specific details', () => { + const apiError: GoogleApiError = { + code: 429, + message: 'Too many requests', + details: [ + { + '@type': 'type.googleapis.com/google.rpc.DebugInfo', + detail: 'some debug info', + stackEntries: [], + }, + ], + }; + vi.spyOn(errorParser, 'parseGoogleApiError').mockReturnValue(apiError); + const originalError = new Error(); + const result = classifyGoogleError(originalError); + expect(result).toBe(originalError); + }); +}); diff --git a/packages/core/src/utils/googleQuotaErrors.ts b/packages/core/src/utils/googleQuotaErrors.ts new file mode 100644 index 0000000000..4de1a81710 --- /dev/null +++ b/packages/core/src/utils/googleQuotaErrors.ts @@ -0,0 +1,192 @@ +/** + * @license + * Copyright 2025 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import type { + ErrorInfo, + GoogleApiError, + QuotaFailure, + RetryInfo, +} from './googleErrors.js'; +import { parseGoogleApiError } from './googleErrors.js'; + +/** + * A non-retryable error indicating a hard quota limit has been reached (e.g., daily limit). + */ +export class TerminalQuotaError extends Error { + constructor( + message: string, + override readonly cause: GoogleApiError, + ) { + super(message); + this.name = 'TerminalQuotaError'; + } +} + +/** + * A retryable error indicating a temporary quota issue (e.g., per-minute limit). + */ +export class RetryableQuotaError extends Error { + retryDelayMs: number; + + constructor( + message: string, + override readonly cause: GoogleApiError, + retryDelaySeconds: number, + ) { + super(message); + this.name = 'RetryableQuotaError'; + this.retryDelayMs = retryDelaySeconds * 1000; + } +} + +/** + * Parses a duration string (e.g., "34.074824224s", "60s") and returns the time in seconds. + * @param duration The duration string to parse. + * @returns The duration in seconds, or null if parsing fails. + */ +function parseDurationInSeconds(duration: string): number | null { + if (!duration.endsWith('s')) { + return null; + } + const seconds = parseFloat(duration.slice(0, -1)); + return isNaN(seconds) ? null : seconds; +} + +/** + * Analyzes a caught error and classifies it as a specific quota-related error if applicable. + * + * It decides whether an error is a `TerminalQuotaError` or a `RetryableQuotaError` based on + * the following logic: + * - If the error indicates a daily limit, it's a `TerminalQuotaError`. + * - If the error suggests a retry delay of more than 2 minutes, it's a `TerminalQuotaError`. + * - If the error suggests a retry delay of 2 minutes or less, it's a `RetryableQuotaError`. + * - If the error indicates a per-minute limit, it's a `RetryableQuotaError`. + * + * @param error The error to classify. + * @returns A `TerminalQuotaError`, `RetryableQuotaError`, or the original `unknown` error. + */ +export function classifyGoogleError(error: unknown): unknown { + const googleApiError = parseGoogleApiError(error); + + if (!googleApiError || googleApiError.code !== 429) { + return error; // Not a 429 error we can handle. + } + + const quotaFailure = googleApiError.details.find( + (d): d is QuotaFailure => + d['@type'] === 'type.googleapis.com/google.rpc.QuotaFailure', + ); + + const errorInfo = googleApiError.details.find( + (d): d is ErrorInfo => + d['@type'] === 'type.googleapis.com/google.rpc.ErrorInfo', + ); + + const retryInfo = googleApiError.details.find( + (d): d is RetryInfo => + d['@type'] === 'type.googleapis.com/google.rpc.RetryInfo', + ); + + // 1. Check for long-term limits in QuotaFailure or ErrorInfo + if (quotaFailure) { + for (const violation of quotaFailure.violations) { + const quotaId = violation.quotaId ?? ''; + if (quotaId.includes('PerDay') || quotaId.includes('Daily')) { + return new TerminalQuotaError( + `${googleApiError.message}\nExpected quota reset within 24h.`, + googleApiError, + ); + } + } + } + + if (errorInfo) { + // New Cloud Code API quota handling + if (errorInfo.domain) { + const validDomains = [ + 'cloudcode-pa.googleapis.com', + 'staging-cloudcode-pa.googleapis.com', + 'autopush-cloudcode-pa.googleapis.com', + ]; + if (validDomains.includes(errorInfo.domain)) { + if (errorInfo.reason === 'RATE_LIMIT_EXCEEDED') { + let delaySeconds = 10; // Default retry of 10s + if (retryInfo?.retryDelay) { + const parsedDelay = parseDurationInSeconds(retryInfo.retryDelay); + if (parsedDelay) { + delaySeconds = parsedDelay; + } + } + return new RetryableQuotaError( + `${googleApiError.message}`, + googleApiError, + delaySeconds, + ); + } + if (errorInfo.reason === 'QUOTA_EXHAUSTED') { + return new TerminalQuotaError( + `${googleApiError.message}`, + googleApiError, + ); + } + } + } + + // Existing Cloud Code API quota handling + const quotaLimit = errorInfo.metadata?.['quota_limit'] ?? ''; + if (quotaLimit.includes('PerDay') || quotaLimit.includes('Daily')) { + return new TerminalQuotaError( + `${googleApiError.message}\nExpected quota reset within 24h.`, + googleApiError, + ); + } + } + + // 2. Check for long delays in RetryInfo + if (retryInfo?.retryDelay) { + const delaySeconds = parseDurationInSeconds(retryInfo.retryDelay); + if (delaySeconds) { + if (delaySeconds > 120) { + return new TerminalQuotaError( + `${googleApiError.message}\nSuggested retry after ${retryInfo.retryDelay}.`, + googleApiError, + ); + } + // This is a retryable error with a specific delay. + return new RetryableQuotaError( + `${googleApiError.message}\nSuggested retry after ${retryInfo.retryDelay}.`, + googleApiError, + delaySeconds, + ); + } + } + + // 3. Check for short-term limits in QuotaFailure or ErrorInfo + if (quotaFailure) { + for (const violation of quotaFailure.violations) { + const quotaId = violation.quotaId ?? ''; + if (quotaId.includes('PerMinute')) { + return new RetryableQuotaError( + `${googleApiError.message}\nSuggested retry after 60s.`, + googleApiError, + 60, + ); + } + } + } + + if (errorInfo) { + const quotaLimit = errorInfo.metadata?.['quota_limit'] ?? ''; + if (quotaLimit.includes('PerMinute')) { + return new RetryableQuotaError( + `${errorInfo.reason}\nSuggested retry after 60s.`, + googleApiError, + 60, + ); + } + } + return error; // Fallback to original error if no specific classification fits. +} diff --git a/packages/core/src/utils/quotaErrorDetection.ts b/packages/core/src/utils/quotaErrorDetection.ts index 6417e0db57..893e48b0f2 100644 --- a/packages/core/src/utils/quotaErrorDetection.ts +++ b/packages/core/src/utils/quotaErrorDetection.ts @@ -33,68 +33,3 @@ export function isStructuredError(error: unknown): error is StructuredError { typeof (error as StructuredError).message === 'string' ); } - -export function isProQuotaExceededError(error: unknown): boolean { - // Check for Pro quota exceeded errors by looking for the specific pattern - // This will match patterns like: - // - "Quota exceeded for quota metric 'Gemini 2.5 Pro Requests'" - // - "Quota exceeded for quota metric 'Gemini 2.5-preview Pro Requests'" - // We use string methods instead of regex to avoid ReDoS vulnerabilities - - const checkMessage = (message: string): boolean => - message.includes("Quota exceeded for quota metric 'Gemini") && - message.includes("Pro Requests'"); - - if (typeof error === 'string') { - return checkMessage(error); - } - - if (isStructuredError(error)) { - return checkMessage(error.message); - } - - if (isApiError(error)) { - return checkMessage(error.error.message); - } - - // Check if it's a Gaxios error with response data - if (error && typeof error === 'object' && 'response' in error) { - const gaxiosError = error as { - response?: { - data?: unknown; - }; - }; - if (gaxiosError.response && gaxiosError.response.data) { - if (typeof gaxiosError.response.data === 'string') { - return checkMessage(gaxiosError.response.data); - } - if ( - typeof gaxiosError.response.data === 'object' && - gaxiosError.response.data !== null && - 'error' in gaxiosError.response.data - ) { - const errorData = gaxiosError.response.data as { - error?: { message?: string }; - }; - return checkMessage(errorData.error?.message || ''); - } - } - } - return false; -} - -export function isGenericQuotaExceededError(error: unknown): boolean { - if (typeof error === 'string') { - return error.includes('Quota exceeded for quota metric'); - } - - if (isStructuredError(error)) { - return error.message.includes('Quota exceeded for quota metric'); - } - - if (isApiError(error)) { - return error.error.message.includes('Quota exceeded for quota metric'); - } - - return false; -} diff --git a/packages/core/src/utils/retry.test.ts b/packages/core/src/utils/retry.test.ts index 13af50b475..e0297e8903 100644 --- a/packages/core/src/utils/retry.test.ts +++ b/packages/core/src/utils/retry.test.ts @@ -7,10 +7,15 @@ /* eslint-disable @typescript-eslint/no-explicit-any */ import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest'; import { ApiError } from '@google/genai'; +import { AuthType } from '../core/contentGenerator.js'; import type { HttpError } from './retry.js'; import { retryWithBackoff } from './retry.js'; import { setSimulate429 } from './testUtils.js'; import { debugLogger } from './debugLogger.js'; +import { + TerminalQuotaError, + RetryableQuotaError, +} from './googleQuotaErrors.js'; // Helper to create a mock function that fails a certain number of times const createFailingFunction = ( @@ -100,26 +105,26 @@ describe('retryWithBackoff', () => { // Expect it to fail with the error from the 5th attempt. await Promise.all([ - expect(promise).rejects.toThrow('Simulated error attempt 5'), + expect(promise).rejects.toThrow('Simulated error attempt 3'), vi.runAllTimersAsync(), ]); - expect(mockFn).toHaveBeenCalledTimes(5); + expect(mockFn).toHaveBeenCalledTimes(3); }); - it('should default to 5 maxAttempts if options.maxAttempts is undefined', async () => { - // This function will fail more than 5 times to ensure all retries are used. + it('should default to 3 maxAttempts if options.maxAttempts is undefined', async () => { + // This function will fail more than 3 times to ensure all retries are used. const mockFn = createFailingFunction(10); const promise = retryWithBackoff(mockFn, { maxAttempts: undefined }); // Expect it to fail with the error from the 5th attempt. await Promise.all([ - expect(promise).rejects.toThrow('Simulated error attempt 5'), + expect(promise).rejects.toThrow('Simulated error attempt 3'), vi.runAllTimersAsync(), ]); - expect(mockFn).toHaveBeenCalledTimes(5); + expect(mockFn).toHaveBeenCalledTimes(3); }); it('should not retry if shouldRetry returns false', async () => { @@ -336,15 +341,13 @@ describe('retryWithBackoff', () => { }); describe('Flash model fallback for OAuth users', () => { - it('should trigger fallback for OAuth personal users after persistent 429 errors', async () => { + it('should trigger fallback for OAuth personal users on TerminalQuotaError', async () => { const fallbackCallback = vi.fn().mockResolvedValue('gemini-2.5-flash'); let fallbackOccurred = false; const mockFn = vi.fn().mockImplementation(async () => { if (!fallbackOccurred) { - const error: HttpError = new Error('Rate limit exceeded'); - error.status = 429; - throw error; + throw new TerminalQuotaError('Daily limit reached', {} as any); } return 'success'; }); @@ -352,154 +355,62 @@ describe('retryWithBackoff', () => { const promise = retryWithBackoff(mockFn, { maxAttempts: 3, initialDelayMs: 100, - onPersistent429: async (authType?: string) => { + onPersistent429: async (authType?: string, error?: unknown) => { fallbackOccurred = true; - return await fallbackCallback(authType); + return await fallbackCallback(authType, error); }, authType: 'oauth-personal', }); - // Advance all timers to complete retries - await vi.runAllTimersAsync(); - - // Should succeed after fallback - await expect(promise).resolves.toBe('success'); - - // Verify callback was called with correct auth type - expect(fallbackCallback).toHaveBeenCalledWith('oauth-personal'); - - // Should retry again after fallback - expect(mockFn).toHaveBeenCalledTimes(3); // 2 initial attempts + 1 after fallback - }); - - it('should NOT trigger fallback for API key users', async () => { - const fallbackCallback = vi.fn(); - - const mockFn = vi.fn(async () => { - const error: HttpError = new Error('Rate limit exceeded'); - error.status = 429; - throw error; - }); - - const promise = retryWithBackoff(mockFn, { - maxAttempts: 3, - initialDelayMs: 100, - onPersistent429: fallbackCallback, - authType: 'gemini-api-key', - }); - - // Handle the promise properly to avoid unhandled rejections - const resultPromise = promise.catch((error) => error); - await vi.runAllTimersAsync(); - const result = await resultPromise; - - // Should fail after all retries without fallback - expect(result).toBeInstanceOf(Error); - expect(result.message).toBe('Rate limit exceeded'); - - // Callback should not be called for API key users - expect(fallbackCallback).not.toHaveBeenCalled(); - }); - - it('should reset attempt counter and continue after successful fallback', async () => { - let fallbackCalled = false; - const fallbackCallback = vi.fn().mockImplementation(async () => { - fallbackCalled = true; - return 'gemini-2.5-flash'; - }); - - const mockFn = vi.fn().mockImplementation(async () => { - if (!fallbackCalled) { - const error: HttpError = new Error('Rate limit exceeded'); - error.status = 429; - throw error; - } - return 'success'; - }); - - const promise = retryWithBackoff(mockFn, { - maxAttempts: 3, - initialDelayMs: 100, - onPersistent429: fallbackCallback, - authType: 'oauth-personal', - }); - await vi.runAllTimersAsync(); await expect(promise).resolves.toBe('success'); - expect(fallbackCallback).toHaveBeenCalledOnce(); - }); - - it('should continue with original error if fallback is rejected', async () => { - const fallbackCallback = vi.fn().mockResolvedValue(null); // User rejected fallback - - const mockFn = vi.fn(async () => { - const error: HttpError = new Error('Rate limit exceeded'); - error.status = 429; - throw error; - }); - - const promise = retryWithBackoff(mockFn, { - maxAttempts: 3, - initialDelayMs: 100, - onPersistent429: fallbackCallback, - authType: 'oauth-personal', - }); - - // Handle the promise properly to avoid unhandled rejections - const resultPromise = promise.catch((error) => error); - await vi.runAllTimersAsync(); - const result = await resultPromise; - - // Should fail with original error when fallback is rejected - expect(result).toBeInstanceOf(Error); - expect(result.message).toBe('Rate limit exceeded'); expect(fallbackCallback).toHaveBeenCalledWith( 'oauth-personal', - expect.any(Error), + expect.any(TerminalQuotaError), ); + expect(mockFn).toHaveBeenCalledTimes(2); }); - it('should handle mixed error types (only count consecutive 429s)', async () => { - const fallbackCallback = vi.fn().mockResolvedValue('gemini-2.5-flash'); - let attempts = 0; - let fallbackOccurred = false; - + it('should use retryDelayMs from RetryableQuotaError', async () => { + const setTimeoutSpy = vi.spyOn(global, 'setTimeout'); const mockFn = vi.fn().mockImplementation(async () => { - attempts++; - if (fallbackOccurred) { - return 'success'; - } - if (attempts === 1) { - // First attempt: 500 error (resets consecutive count) - const error: HttpError = new Error('Server error'); - error.status = 500; - throw error; - } else { - // Remaining attempts: 429 errors - const error: HttpError = new Error('Rate limit exceeded'); - error.status = 429; - throw error; - } + throw new RetryableQuotaError('Per-minute limit', {} as any, 12.345); }); const promise = retryWithBackoff(mockFn, { - maxAttempts: 5, + maxAttempts: 2, initialDelayMs: 100, - onPersistent429: async (authType?: string) => { - fallbackOccurred = true; - return await fallbackCallback(authType); - }, - authType: 'oauth-personal', }); + // Attach the rejection expectation *before* running timers + // eslint-disable-next-line vitest/valid-expect + const assertionPromise = expect(promise).rejects.toThrow(); await vi.runAllTimersAsync(); + await assertionPromise; - await expect(promise).resolves.toBe('success'); - - // Should trigger fallback after 2 consecutive 429s (attempts 2-3) - expect(fallbackCallback).toHaveBeenCalledWith('oauth-personal'); + expect(setTimeoutSpy).toHaveBeenCalledWith(expect.any(Function), 12345); }); + + it.each([[AuthType.USE_GEMINI], [AuthType.USE_VERTEX_AI], [undefined]])( + 'should not trigger fallback for non-Google auth users (authType: %s) on TerminalQuotaError', + async (authType) => { + const fallbackCallback = vi.fn(); + const mockFn = vi.fn().mockImplementation(async () => { + throw new TerminalQuotaError('Daily limit reached', {} as any); + }); + + const promise = retryWithBackoff(mockFn, { + maxAttempts: 3, + onPersistent429: fallbackCallback, + authType, + }); + + await expect(promise).rejects.toThrow('Daily limit reached'); + expect(fallbackCallback).not.toHaveBeenCalled(); + expect(mockFn).toHaveBeenCalledTimes(1); + }, + ); }); it('should abort the retry loop when the signal is aborted', async () => { const abortController = new AbortController(); diff --git a/packages/core/src/utils/retry.ts b/packages/core/src/utils/retry.ts index 70afe42f5d..edb8f9bb85 100644 --- a/packages/core/src/utils/retry.ts +++ b/packages/core/src/utils/retry.ts @@ -8,9 +8,10 @@ import type { GenerateContentResponse } from '@google/genai'; import { ApiError } from '@google/genai'; import { AuthType } from '../core/contentGenerator.js'; import { - isProQuotaExceededError, - isGenericQuotaExceededError, -} from './quotaErrorDetection.js'; + classifyGoogleError, + RetryableQuotaError, + TerminalQuotaError, +} from './googleQuotaErrors.js'; import { delay, createAbortError } from './delay.js'; import { debugLogger } from './debugLogger.js'; @@ -37,7 +38,7 @@ export interface RetryOptions { } const DEFAULT_RETRY_OPTIONS: RetryOptions = { - maxAttempts: 5, + maxAttempts: 3, initialDelayMs: 5000, maxDelayMs: 30000, // 30 seconds shouldRetryOnError: defaultShouldRetry, @@ -118,7 +119,6 @@ export async function retryWithBackoff( let attempt = 0; let currentDelay = initialDelayMs; - let consecutive429Count = 0; while (attempt < maxAttempts) { if (signal?.aborted) { @@ -145,94 +145,54 @@ export async function retryWithBackoff( throw error; } - const errorStatus = getErrorStatus(error); + const classifiedError = classifyGoogleError(error); - // Check for Pro quota exceeded error first - immediate fallback for OAuth users - if ( - errorStatus === 429 && - authType === AuthType.LOGIN_WITH_GOOGLE && - isProQuotaExceededError(error) && - onPersistent429 - ) { - try { - const fallbackModel = await onPersistent429(authType, error); - if (fallbackModel !== false && fallbackModel !== null) { - // Reset attempt counter and try with new model - attempt = 0; - consecutive429Count = 0; - currentDelay = initialDelayMs; - // With the model updated, we continue to the next attempt - continue; - } else { - // Fallback handler returned null/false, meaning don't continue - stop retry process - throw error; + if (classifiedError instanceof TerminalQuotaError) { + if (onPersistent429 && authType === AuthType.LOGIN_WITH_GOOGLE) { + try { + const fallbackModel = await onPersistent429( + authType, + classifiedError, + ); + if (fallbackModel) { + attempt = 0; // Reset attempts and retry with the new model. + currentDelay = initialDelayMs; + continue; + } + } catch (fallbackError) { + debugLogger.warn('Fallback to Flash model failed:', fallbackError); } - } catch (fallbackError) { - // If fallback fails, continue with original error - debugLogger.warn('Fallback to Flash model failed:', fallbackError); } + throw classifiedError; // Throw if no fallback or fallback failed. } - // Check for generic quota exceeded error (but not Pro, which was handled above) - immediate fallback for OAuth users - if ( - errorStatus === 429 && - authType === AuthType.LOGIN_WITH_GOOGLE && - !isProQuotaExceededError(error) && - isGenericQuotaExceededError(error) && - onPersistent429 - ) { - try { - const fallbackModel = await onPersistent429(authType, error); - if (fallbackModel !== false && fallbackModel !== null) { - // Reset attempt counter and try with new model - attempt = 0; - consecutive429Count = 0; - currentDelay = initialDelayMs; - // With the model updated, we continue to the next attempt - continue; - } else { - // Fallback handler returned null/false, meaning don't continue - stop retry process - throw error; + if (classifiedError instanceof RetryableQuotaError) { + if (attempt >= maxAttempts) { + if (onPersistent429 && authType === AuthType.LOGIN_WITH_GOOGLE) { + try { + const fallbackModel = await onPersistent429( + authType, + classifiedError, + ); + if (fallbackModel) { + attempt = 0; // Reset attempts and retry with the new model. + currentDelay = initialDelayMs; + continue; + } + } catch (fallbackError) { + console.warn('Model fallback failed:', fallbackError); + } } - } catch (fallbackError) { - // If fallback fails, continue with original error - debugLogger.warn('Fallback to Flash model failed:', fallbackError); + throw classifiedError; } + console.warn( + `Attempt ${attempt} failed: ${classifiedError.message}. Retrying after ${classifiedError.retryDelayMs}ms...`, + ); + await delay(classifiedError.retryDelayMs, signal); + continue; } - // Track consecutive 429 errors - if (errorStatus === 429) { - consecutive429Count++; - } else { - consecutive429Count = 0; - } - - // If we have persistent 429s and a fallback callback for OAuth - if ( - consecutive429Count >= 2 && - onPersistent429 && - authType === AuthType.LOGIN_WITH_GOOGLE - ) { - try { - const fallbackModel = await onPersistent429(authType, error); - if (fallbackModel !== false && fallbackModel !== null) { - // Reset attempt counter and try with new model - attempt = 0; - consecutive429Count = 0; - currentDelay = initialDelayMs; - // With the model updated, we continue to the next attempt - continue; - } else { - // Fallback handler returned null/false, meaning don't continue - stop retry process - throw error; - } - } catch (fallbackError) { - // If fallback fails, continue with original error - debugLogger.warn('Fallback to Flash model failed:', fallbackError); - } - } - - // Check if we've exhausted retries or shouldn't retry + // Generic retry logic for other errors if ( attempt >= maxAttempts || !shouldRetryOnError(error as Error, retryFetchErrors) @@ -240,31 +200,17 @@ export async function retryWithBackoff( throw error; } - const { delayDurationMs, errorStatus: delayErrorStatus } = - getDelayDurationAndStatus(error); + const errorStatus = getErrorStatus(error); + logRetryAttempt(attempt, error, errorStatus); - if (delayDurationMs > 0) { - // Respect Retry-After header if present and parsed - debugLogger.warn( - `Attempt ${attempt} failed with status ${delayErrorStatus ?? 'unknown'}. Retrying after explicit delay of ${delayDurationMs}ms...`, - error, - ); - await delay(delayDurationMs, signal); - // Reset currentDelay for next potential non-429 error, or if Retry-After is not present next time - currentDelay = initialDelayMs; - } else { - // Fall back to exponential backoff with jitter - logRetryAttempt(attempt, error, errorStatus); - // Add jitter: +/- 30% of currentDelay - const jitter = currentDelay * 0.3 * (Math.random() * 2 - 1); - const delayWithJitter = Math.max(0, currentDelay + jitter); - await delay(delayWithJitter, signal); - currentDelay = Math.min(maxDelayMs, currentDelay * 2); - } + // Exponential backoff with jitter for non-quota errors + const jitter = currentDelay * 0.3 * (Math.random() * 2 - 1); + const delayWithJitter = Math.max(0, currentDelay + jitter); + await delay(delayWithJitter, signal); + currentDelay = Math.min(maxDelayMs, currentDelay * 2); } } - // This line should theoretically be unreachable due to the throw in the catch block. - // Added for type safety and to satisfy the compiler that a promise is always returned. + throw new Error('Retry attempts exhausted'); } @@ -295,62 +241,6 @@ export function getErrorStatus(error: unknown): number | undefined { return undefined; } -/** - * Extracts the Retry-After delay from an error object's headers. - * @param error The error object. - * @returns The delay in milliseconds, or 0 if not found or invalid. - */ -function getRetryAfterDelayMs(error: unknown): number { - if (typeof error === 'object' && error !== null) { - // Check for error.response.headers (common in axios errors) - if ( - 'response' in error && - typeof (error as { response?: unknown }).response === 'object' && - (error as { response?: unknown }).response !== null - ) { - const response = (error as { response: { headers?: unknown } }).response; - if ( - 'headers' in response && - typeof response.headers === 'object' && - response.headers !== null - ) { - const headers = response.headers as { 'retry-after'?: unknown }; - const retryAfterHeader = headers['retry-after']; - if (typeof retryAfterHeader === 'string') { - const retryAfterSeconds = parseInt(retryAfterHeader, 10); - if (!isNaN(retryAfterSeconds)) { - return retryAfterSeconds * 1000; - } - // It might be an HTTP date - const retryAfterDate = new Date(retryAfterHeader); - if (!isNaN(retryAfterDate.getTime())) { - return Math.max(0, retryAfterDate.getTime() - Date.now()); - } - } - } - } - } - return 0; -} - -/** - * Determines the delay duration based on the error, prioritizing Retry-After header. - * @param error The error object. - * @returns An object containing the delay duration in milliseconds and the error status. - */ -function getDelayDurationAndStatus(error: unknown): { - delayDurationMs: number; - errorStatus: number | undefined; -} { - const errorStatus = getErrorStatus(error); - let delayDurationMs = 0; - - if (errorStatus === 429) { - delayDurationMs = getRetryAfterDelayMs(error); - } - return { delayDurationMs, errorStatus }; -} - /** * Logs a message for a retry attempt when using exponential backoff. * @param attempt The current attempt number.