diff --git a/packages/cli/src/ui/components/ProQuotaDialog.tsx b/packages/cli/src/ui/components/ProQuotaDialog.tsx index 34de2747c6..cf7ae2a518 100644 --- a/packages/cli/src/ui/components/ProQuotaDialog.tsx +++ b/packages/cli/src/ui/components/ProQuotaDialog.tsx @@ -12,7 +12,7 @@ import { theme } from '../semantic-colors.js'; import { DEFAULT_GEMINI_FLASH_LITE_MODEL, DEFAULT_GEMINI_FLASH_MODEL, - PREVIEW_GEMINI_MODEL, + DEFAULT_GEMINI_MODEL, UserTierId, } from '@google/gemini-cli-core'; @@ -127,7 +127,7 @@ export function ProQuotaDialog({ - {failedModel === PREVIEW_GEMINI_MODEL && !isModelNotFoundError + {fallbackModel === DEFAULT_GEMINI_MODEL && !isModelNotFoundError ? 'Note: We will periodically retry Preview Model to see if congestion has cleared.' : 'Note: You can always use /model to select a different option.'} diff --git a/packages/cli/src/ui/hooks/useQuotaAndFallback.test.ts b/packages/cli/src/ui/hooks/useQuotaAndFallback.test.ts index 13c0fd838b..dae2044af7 100644 --- a/packages/cli/src/ui/hooks/useQuotaAndFallback.test.ts +++ b/packages/cli/src/ui/hooks/useQuotaAndFallback.test.ts @@ -27,6 +27,8 @@ import { RetryableQuotaError, PREVIEW_GEMINI_MODEL, ModelNotFoundError, + DEFAULT_GEMINI_MODEL, + DEFAULT_GEMINI_FLASH_MODEL, } from '@google/gemini-cli-core'; import { useQuotaAndFallback } from './useQuotaAndFallback.js'; import type { UseHistoryManagerReturn } from './useHistoryManager.js'; @@ -432,7 +434,7 @@ To disable Gemini 3, disable "Preview features" in /settings.`, await act(() => { promise = handler( PREVIEW_GEMINI_MODEL, - 'gemini-flash', + DEFAULT_GEMINI_MODEL, new Error('preview model failed'), ); }); @@ -447,7 +449,42 @@ To disable Gemini 3, disable "Preview features" in /settings.`, const lastCall = (mockHistoryManager.addItem as Mock).mock.calls[0][0]; expect(lastCall.type).toBe(MessageType.INFO); expect(lastCall.text).toContain( - `Switched to fallback model gemini-flash. We will periodically check if ${PREVIEW_GEMINI_MODEL} is available again.`, + `Switched to fallback model gemini-2.5-pro. We will periodically check if ${PREVIEW_GEMINI_MODEL} is available again.`, + ); + }); + + it('should show a special message when falling back from the preview model, but do not show periodical check message for flash model fallback', async () => { + const { result } = renderHook(() => + useQuotaAndFallback({ + config: mockConfig, + historyManager: mockHistoryManager, + userTier: UserTierId.FREE, + setModelSwitchedFromQuotaError: mockSetModelSwitchedFromQuotaError, + }), + ); + + const handler = setFallbackHandlerSpy.mock + .calls[0][0] as FallbackModelHandler; + let promise: Promise; + await act(() => { + promise = handler( + PREVIEW_GEMINI_MODEL, + DEFAULT_GEMINI_FLASH_MODEL, + new Error('preview model failed'), + ); + }); + + await act(() => { + result.current.handleProQuotaChoice('retry_always'); + }); + + await promise!; + + expect(mockHistoryManager.addItem).toHaveBeenCalledTimes(1); + const lastCall = (mockHistoryManager.addItem as Mock).mock.calls[0][0]; + expect(lastCall.type).toBe(MessageType.INFO); + expect(lastCall.text).toContain( + `Switched to fallback model gemini-2.5-flash.`, ); }); }); diff --git a/packages/cli/src/ui/hooks/useQuotaAndFallback.ts b/packages/cli/src/ui/hooks/useQuotaAndFallback.ts index 87f768d69c..084494421a 100644 --- a/packages/cli/src/ui/hooks/useQuotaAndFallback.ts +++ b/packages/cli/src/ui/hooks/useQuotaAndFallback.ts @@ -13,6 +13,7 @@ import { ModelNotFoundError, type UserTierId, PREVIEW_GEMINI_MODEL, + DEFAULT_GEMINI_MODEL, } from '@google/gemini-cli-core'; import { useCallback, useEffect, useRef, useState } from 'react'; import { type UseHistoryManagerReturn } from './useHistoryManager.js'; @@ -55,11 +56,16 @@ export function useQuotaAndFallback({ let message: string; let isTerminalQuotaError = false; let isModelNotFoundError = false; + const usageLimitReachedModel = + failedModel === DEFAULT_GEMINI_MODEL || + failedModel === PREVIEW_GEMINI_MODEL + ? 'all Pro models' + : failedModel; if (error instanceof TerminalQuotaError) { isTerminalQuotaError = true; // Common part of the message for both tiers const messageLines = [ - `Usage limit reached for ${failedModel}.`, + `Usage limit reached for ${usageLimitReachedModel}.`, error.retryDelayMs ? getResetTimeMessage(error.retryDelayMs) : null, `/stats for usage details`, `/auth to switch to API key.`, @@ -116,10 +122,13 @@ export function useQuotaAndFallback({ if (choice === 'retry_always') { // If we were recovering from a Preview Model failure, show a specific message. if (proQuotaRequest.failedModel === PREVIEW_GEMINI_MODEL) { + const showPeriodicalCheckMessage = + !proQuotaRequest.isModelNotFoundError && + proQuotaRequest.fallbackModel === DEFAULT_GEMINI_MODEL; historyManager.addItem( { type: MessageType.INFO, - text: `Switched to fallback model ${proQuotaRequest.fallbackModel}. ${!proQuotaRequest.isModelNotFoundError ? `We will periodically check if ${PREVIEW_GEMINI_MODEL} is available again.` : ''}`, + text: `Switched to fallback model ${proQuotaRequest.fallbackModel}. ${showPeriodicalCheckMessage ? `We will periodically check if ${PREVIEW_GEMINI_MODEL} is available again.` : ''}`, }, Date.now(), ); diff --git a/packages/core/src/fallback/handler.test.ts b/packages/core/src/fallback/handler.test.ts index 6f8e7de99f..09a24a11ef 100644 --- a/packages/core/src/fallback/handler.test.ts +++ b/packages/core/src/fallback/handler.test.ts @@ -25,6 +25,10 @@ import { import { logFlashFallback } from '../telemetry/index.js'; import type { FallbackModelHandler } from './types.js'; import { ModelNotFoundError } from '../utils/httpErrors.js'; +import { + RetryableQuotaError, + TerminalQuotaError, +} from '../utils/googleQuotaErrors.js'; // Mock the telemetry logger and event class vi.mock('../telemetry/index.js', () => ({ @@ -104,7 +108,7 @@ describe('handleFallback', () => { expect(result).toBeNull(); }); - describe('when handler returns "retry"', () => { + describe('when handler returns "retry_always"', () => { it('should activate fallback mode, log telemetry, and return true', async () => { mockHandler.mockResolvedValue('retry_always'); @@ -212,65 +216,175 @@ describe('handleFallback', () => { describe('Preview Model Fallback Logic', () => { const previewModel = PREVIEW_GEMINI_MODEL; - it('should always set Preview Model bypass mode on failure', async () => { - await handleFallback(mockConfig, previewModel, AUTH_OAUTH); + it('should only set Preview Model bypass mode on retryable quota failure', async () => { + const mockGoogleApiError = { + code: 429, + message: 'mock error', + details: [], + }; + const retryableQuotaError = new RetryableQuotaError( + 'Capacity error', + mockGoogleApiError, + 5, + ); + await handleFallback( + mockConfig, + previewModel, + AUTH_OAUTH, + retryableQuotaError, + ); expect(mockConfig.setPreviewModelBypassMode).toHaveBeenCalledWith(true); }); - it('should silently retry if Preview Model fallback mode is already active', async () => { - vi.spyOn(mockConfig, 'isPreviewModelFallbackMode').mockReturnValue(true); + it('should not set Preview Model bypass mode on non-retryable quota failure', async () => { + const mockGoogleApiError = { + code: 429, + message: 'mock error', + details: [], + }; + const terminalQuotaError = new TerminalQuotaError( + 'quota error', + mockGoogleApiError, + 5, + ); + await handleFallback( + mockConfig, + previewModel, + AUTH_OAUTH, + terminalQuotaError, + ); - const result = await handleFallback(mockConfig, previewModel, AUTH_OAUTH); + expect(mockConfig.setPreviewModelBypassMode).not.toHaveBeenCalled(); + }); + + it('should silently retry if Preview Model fallback mode is already active and error is retryable error', async () => { + vi.spyOn(mockConfig, 'isPreviewModelFallbackMode').mockReturnValue(true); + const mockGoogleApiError = { + code: 429, + message: 'mock error', + details: [], + }; + const retryableQuotaError = new RetryableQuotaError( + 'Capacity error', + mockGoogleApiError, + 5, + ); + const result = await handleFallback( + mockConfig, + previewModel, + AUTH_OAUTH, + retryableQuotaError, + ); expect(result).toBe(true); expect(mockHandler).not.toHaveBeenCalled(); }); - it('should activate Preview Model fallback mode when handler returns "retry_always"', async () => { + it('should activate Preview Model fallback mode when handler returns "retry_always" and is RetryableQuotaError', async () => { mockHandler.mockResolvedValue('retry_always'); - - const result = await handleFallback(mockConfig, previewModel, AUTH_OAUTH); + const mockGoogleApiError = { + code: 429, + message: 'mock error', + details: [], + }; + const retryableQuotaError = new RetryableQuotaError( + 'Capacity error', + mockGoogleApiError, + 5, + ); + const result = await handleFallback( + mockConfig, + previewModel, + AUTH_OAUTH, + retryableQuotaError, + ); expect(result).toBe(true); expect(mockConfig.setPreviewModelBypassMode).toHaveBeenCalledWith(true); expect(mockConfig.setPreviewModelFallbackMode).toHaveBeenCalledWith(true); }); + it('should activate regular fallback when handler returns "retry_always" and is TerminalQuotaError', async () => { + mockHandler.mockResolvedValue('retry_always'); + const mockGoogleApiError = { + code: 503, + message: 'mock error', + details: [], + }; + const terminalError = new TerminalQuotaError( + 'Quota error', + mockGoogleApiError, + 5, + ); + const result = await handleFallback( + mockConfig, + previewModel, + AUTH_OAUTH, + terminalError, + ); + + expect(result).toBe(true); + expect(mockConfig.setPreviewModelFallbackMode).not.toBeCalled(); + expect(mockConfig.setFallbackMode).toHaveBeenCalledWith(true); + }); + it('should NOT set fallback mode if user chooses "retry_once"', async () => { + const mockGoogleApiError = { + code: 429, + message: 'mock error', + details: [], + }; + const terminalQuotaError = new TerminalQuotaError( + 'quota error', + mockGoogleApiError, + 5, + ); mockHandler.mockResolvedValue('retry_once'); const result = await handleFallback( mockConfig, PREVIEW_GEMINI_MODEL, AuthType.LOGIN_WITH_GOOGLE, - new Error('Capacity'), + terminalQuotaError, ); expect(result).toBe(true); - expect(mockConfig.setPreviewModelBypassMode).toHaveBeenCalledWith(true); + expect(mockConfig.setPreviewModelBypassMode).not.toHaveBeenCalled(); expect(mockConfig.setPreviewModelFallbackMode).not.toHaveBeenCalled(); + expect(mockConfig.setFallbackMode).not.toHaveBeenCalled(); }); - it('should set fallback mode if user chooses "retry_always"', async () => { - mockHandler.mockResolvedValue('retry_always'); - - const result = await handleFallback( - mockConfig, - PREVIEW_GEMINI_MODEL, - AuthType.LOGIN_WITH_GOOGLE, - new Error('Capacity'), - ); - - expect(result).toBe(true); - expect(mockConfig.setPreviewModelBypassMode).toHaveBeenCalledWith(true); - expect(mockConfig.setPreviewModelFallbackMode).toHaveBeenCalledWith(true); - }); - it('should pass DEFAULT_GEMINI_MODEL as fallback when Preview Model fails', async () => { + it('should pass DEFAULT_GEMINI_MODEL as fallback when Preview Model fails with Retryable Error', async () => { const mockFallbackHandler = vi.fn().mockResolvedValue('stop'); vi.mocked(mockConfig.fallbackModelHandler!).mockImplementation( mockFallbackHandler, ); + const mockGoogleApiError = { + code: 429, + message: 'mock error', + details: [], + }; + const retryableQuotaError = new RetryableQuotaError( + 'Capacity error', + mockGoogleApiError, + 5, + ); + await handleFallback( + mockConfig, + PREVIEW_GEMINI_MODEL, + AuthType.LOGIN_WITH_GOOGLE, + retryableQuotaError, + ); + + expect(mockConfig.fallbackModelHandler).toHaveBeenCalledWith( + PREVIEW_GEMINI_MODEL, + DEFAULT_GEMINI_MODEL, + retryableQuotaError, + ); + }); + + it('should pass DEFAULT_GEMINI_MODEL as fallback when Preview Model fails with other error', async () => { await handleFallback( mockConfig, PREVIEW_GEMINI_MODEL, @@ -283,6 +397,31 @@ describe('handleFallback', () => { undefined, ); }); + + it('should pass DEFAULT_GEMINI_FLASH_MODEL as fallback when Preview Model fails with other error', async () => { + const mockGoogleApiError = { + code: 429, + message: 'mock error', + details: [], + }; + const terminalQuotaError = new TerminalQuotaError( + 'quota error', + mockGoogleApiError, + 5, + ); + await handleFallback( + mockConfig, + PREVIEW_GEMINI_MODEL, + AuthType.LOGIN_WITH_GOOGLE, + terminalQuotaError, + ); + + expect(mockConfig.fallbackModelHandler).toHaveBeenCalledWith( + PREVIEW_GEMINI_MODEL, + DEFAULT_GEMINI_FLASH_MODEL, + terminalQuotaError, + ); + }); }); it('should return null if ModelNotFoundError occurs for a non-preview model', async () => { diff --git a/packages/core/src/fallback/handler.ts b/packages/core/src/fallback/handler.ts index 43e2a96f42..5974657ce6 100644 --- a/packages/core/src/fallback/handler.ts +++ b/packages/core/src/fallback/handler.ts @@ -17,6 +17,7 @@ import { openBrowserSecurely } from '../utils/secure-browser-launcher.js'; import { debugLogger } from '../utils/debugLogger.js'; import { getErrorMessage } from '../utils/errors.js'; import { ModelNotFoundError } from '../utils/httpErrors.js'; +import { TerminalQuotaError } from '../utils/googleQuotaErrors.js'; const UPGRADE_URL_PAGE = 'https://goo.gle/set-up-gemini-code-assist'; @@ -36,10 +37,12 @@ export async function handleFallback( ) { return null; } - + const shouldActivatePreviewFallback = + failedModel === PREVIEW_GEMINI_MODEL && + !(error instanceof TerminalQuotaError); // Preview Model Specific Logic - if (failedModel === PREVIEW_GEMINI_MODEL) { - // Always set bypass mode for the immediate retry. + if (shouldActivatePreviewFallback) { + // Always set bypass mode for the immediate retry, for non-TerminalQuotaErrors. // This ensures the next attempt uses 2.5 Pro. config.setPreviewModelBypassMode(true); @@ -50,10 +53,9 @@ export async function handleFallback( } } - const fallbackModel = - failedModel === PREVIEW_GEMINI_MODEL - ? DEFAULT_GEMINI_MODEL - : DEFAULT_GEMINI_FLASH_MODEL; + const fallbackModel = shouldActivatePreviewFallback + ? DEFAULT_GEMINI_MODEL + : DEFAULT_GEMINI_FLASH_MODEL; // Consult UI Handler for Intent const fallbackModelHandler = config.fallbackModelHandler; @@ -70,7 +72,9 @@ export async function handleFallback( // Process Intent and Update State switch (intent) { case 'retry_always': - if (failedModel === PREVIEW_GEMINI_MODEL) { + // If the error is non-retryable, e.g. TerminalQuota Error, trigger a regular fallback to flash. + // For all other errors, activate previewModel fallback. + if (shouldActivatePreviewFallback) { activatePreviewModelFallbackMode(config); } else { activateFallbackMode(config, authType);