diff --git a/packages/cli/src/ui/components/ProQuotaDialog.tsx b/packages/cli/src/ui/components/ProQuotaDialog.tsx
index 34de2747c6..cf7ae2a518 100644
--- a/packages/cli/src/ui/components/ProQuotaDialog.tsx
+++ b/packages/cli/src/ui/components/ProQuotaDialog.tsx
@@ -12,7 +12,7 @@ import { theme } from '../semantic-colors.js';
import {
DEFAULT_GEMINI_FLASH_LITE_MODEL,
DEFAULT_GEMINI_FLASH_MODEL,
- PREVIEW_GEMINI_MODEL,
+ DEFAULT_GEMINI_MODEL,
UserTierId,
} from '@google/gemini-cli-core';
@@ -127,7 +127,7 @@ export function ProQuotaDialog({
- {failedModel === PREVIEW_GEMINI_MODEL && !isModelNotFoundError
+ {fallbackModel === DEFAULT_GEMINI_MODEL && !isModelNotFoundError
? 'Note: We will periodically retry Preview Model to see if congestion has cleared.'
: 'Note: You can always use /model to select a different option.'}
diff --git a/packages/cli/src/ui/hooks/useQuotaAndFallback.test.ts b/packages/cli/src/ui/hooks/useQuotaAndFallback.test.ts
index 13c0fd838b..dae2044af7 100644
--- a/packages/cli/src/ui/hooks/useQuotaAndFallback.test.ts
+++ b/packages/cli/src/ui/hooks/useQuotaAndFallback.test.ts
@@ -27,6 +27,8 @@ import {
RetryableQuotaError,
PREVIEW_GEMINI_MODEL,
ModelNotFoundError,
+ DEFAULT_GEMINI_MODEL,
+ DEFAULT_GEMINI_FLASH_MODEL,
} from '@google/gemini-cli-core';
import { useQuotaAndFallback } from './useQuotaAndFallback.js';
import type { UseHistoryManagerReturn } from './useHistoryManager.js';
@@ -432,7 +434,7 @@ To disable Gemini 3, disable "Preview features" in /settings.`,
await act(() => {
promise = handler(
PREVIEW_GEMINI_MODEL,
- 'gemini-flash',
+ DEFAULT_GEMINI_MODEL,
new Error('preview model failed'),
);
});
@@ -447,7 +449,42 @@ To disable Gemini 3, disable "Preview features" in /settings.`,
const lastCall = (mockHistoryManager.addItem as Mock).mock.calls[0][0];
expect(lastCall.type).toBe(MessageType.INFO);
expect(lastCall.text).toContain(
- `Switched to fallback model gemini-flash. We will periodically check if ${PREVIEW_GEMINI_MODEL} is available again.`,
+ `Switched to fallback model gemini-2.5-pro. We will periodically check if ${PREVIEW_GEMINI_MODEL} is available again.`,
+ );
+ });
+
+ it('should show a special message when falling back from the preview model, but do not show periodical check message for flash model fallback', async () => {
+ const { result } = renderHook(() =>
+ useQuotaAndFallback({
+ config: mockConfig,
+ historyManager: mockHistoryManager,
+ userTier: UserTierId.FREE,
+ setModelSwitchedFromQuotaError: mockSetModelSwitchedFromQuotaError,
+ }),
+ );
+
+ const handler = setFallbackHandlerSpy.mock
+ .calls[0][0] as FallbackModelHandler;
+ let promise: Promise;
+ await act(() => {
+ promise = handler(
+ PREVIEW_GEMINI_MODEL,
+ DEFAULT_GEMINI_FLASH_MODEL,
+ new Error('preview model failed'),
+ );
+ });
+
+ await act(() => {
+ result.current.handleProQuotaChoice('retry_always');
+ });
+
+ await promise!;
+
+ expect(mockHistoryManager.addItem).toHaveBeenCalledTimes(1);
+ const lastCall = (mockHistoryManager.addItem as Mock).mock.calls[0][0];
+ expect(lastCall.type).toBe(MessageType.INFO);
+ expect(lastCall.text).toContain(
+ `Switched to fallback model gemini-2.5-flash.`,
);
});
});
diff --git a/packages/cli/src/ui/hooks/useQuotaAndFallback.ts b/packages/cli/src/ui/hooks/useQuotaAndFallback.ts
index 87f768d69c..084494421a 100644
--- a/packages/cli/src/ui/hooks/useQuotaAndFallback.ts
+++ b/packages/cli/src/ui/hooks/useQuotaAndFallback.ts
@@ -13,6 +13,7 @@ import {
ModelNotFoundError,
type UserTierId,
PREVIEW_GEMINI_MODEL,
+ DEFAULT_GEMINI_MODEL,
} from '@google/gemini-cli-core';
import { useCallback, useEffect, useRef, useState } from 'react';
import { type UseHistoryManagerReturn } from './useHistoryManager.js';
@@ -55,11 +56,16 @@ export function useQuotaAndFallback({
let message: string;
let isTerminalQuotaError = false;
let isModelNotFoundError = false;
+ const usageLimitReachedModel =
+ failedModel === DEFAULT_GEMINI_MODEL ||
+ failedModel === PREVIEW_GEMINI_MODEL
+ ? 'all Pro models'
+ : failedModel;
if (error instanceof TerminalQuotaError) {
isTerminalQuotaError = true;
// Common part of the message for both tiers
const messageLines = [
- `Usage limit reached for ${failedModel}.`,
+ `Usage limit reached for ${usageLimitReachedModel}.`,
error.retryDelayMs ? getResetTimeMessage(error.retryDelayMs) : null,
`/stats for usage details`,
`/auth to switch to API key.`,
@@ -116,10 +122,13 @@ export function useQuotaAndFallback({
if (choice === 'retry_always') {
// If we were recovering from a Preview Model failure, show a specific message.
if (proQuotaRequest.failedModel === PREVIEW_GEMINI_MODEL) {
+ const showPeriodicalCheckMessage =
+ !proQuotaRequest.isModelNotFoundError &&
+ proQuotaRequest.fallbackModel === DEFAULT_GEMINI_MODEL;
historyManager.addItem(
{
type: MessageType.INFO,
- text: `Switched to fallback model ${proQuotaRequest.fallbackModel}. ${!proQuotaRequest.isModelNotFoundError ? `We will periodically check if ${PREVIEW_GEMINI_MODEL} is available again.` : ''}`,
+ text: `Switched to fallback model ${proQuotaRequest.fallbackModel}. ${showPeriodicalCheckMessage ? `We will periodically check if ${PREVIEW_GEMINI_MODEL} is available again.` : ''}`,
},
Date.now(),
);
diff --git a/packages/core/src/fallback/handler.test.ts b/packages/core/src/fallback/handler.test.ts
index 6f8e7de99f..09a24a11ef 100644
--- a/packages/core/src/fallback/handler.test.ts
+++ b/packages/core/src/fallback/handler.test.ts
@@ -25,6 +25,10 @@ import {
import { logFlashFallback } from '../telemetry/index.js';
import type { FallbackModelHandler } from './types.js';
import { ModelNotFoundError } from '../utils/httpErrors.js';
+import {
+ RetryableQuotaError,
+ TerminalQuotaError,
+} from '../utils/googleQuotaErrors.js';
// Mock the telemetry logger and event class
vi.mock('../telemetry/index.js', () => ({
@@ -104,7 +108,7 @@ describe('handleFallback', () => {
expect(result).toBeNull();
});
- describe('when handler returns "retry"', () => {
+ describe('when handler returns "retry_always"', () => {
it('should activate fallback mode, log telemetry, and return true', async () => {
mockHandler.mockResolvedValue('retry_always');
@@ -212,65 +216,175 @@ describe('handleFallback', () => {
describe('Preview Model Fallback Logic', () => {
const previewModel = PREVIEW_GEMINI_MODEL;
- it('should always set Preview Model bypass mode on failure', async () => {
- await handleFallback(mockConfig, previewModel, AUTH_OAUTH);
+ it('should only set Preview Model bypass mode on retryable quota failure', async () => {
+ const mockGoogleApiError = {
+ code: 429,
+ message: 'mock error',
+ details: [],
+ };
+ const retryableQuotaError = new RetryableQuotaError(
+ 'Capacity error',
+ mockGoogleApiError,
+ 5,
+ );
+ await handleFallback(
+ mockConfig,
+ previewModel,
+ AUTH_OAUTH,
+ retryableQuotaError,
+ );
expect(mockConfig.setPreviewModelBypassMode).toHaveBeenCalledWith(true);
});
- it('should silently retry if Preview Model fallback mode is already active', async () => {
- vi.spyOn(mockConfig, 'isPreviewModelFallbackMode').mockReturnValue(true);
+ it('should not set Preview Model bypass mode on non-retryable quota failure', async () => {
+ const mockGoogleApiError = {
+ code: 429,
+ message: 'mock error',
+ details: [],
+ };
+ const terminalQuotaError = new TerminalQuotaError(
+ 'quota error',
+ mockGoogleApiError,
+ 5,
+ );
+ await handleFallback(
+ mockConfig,
+ previewModel,
+ AUTH_OAUTH,
+ terminalQuotaError,
+ );
- const result = await handleFallback(mockConfig, previewModel, AUTH_OAUTH);
+ expect(mockConfig.setPreviewModelBypassMode).not.toHaveBeenCalled();
+ });
+
+ it('should silently retry if Preview Model fallback mode is already active and error is retryable error', async () => {
+ vi.spyOn(mockConfig, 'isPreviewModelFallbackMode').mockReturnValue(true);
+ const mockGoogleApiError = {
+ code: 429,
+ message: 'mock error',
+ details: [],
+ };
+ const retryableQuotaError = new RetryableQuotaError(
+ 'Capacity error',
+ mockGoogleApiError,
+ 5,
+ );
+ const result = await handleFallback(
+ mockConfig,
+ previewModel,
+ AUTH_OAUTH,
+ retryableQuotaError,
+ );
expect(result).toBe(true);
expect(mockHandler).not.toHaveBeenCalled();
});
- it('should activate Preview Model fallback mode when handler returns "retry_always"', async () => {
+ it('should activate Preview Model fallback mode when handler returns "retry_always" and is RetryableQuotaError', async () => {
mockHandler.mockResolvedValue('retry_always');
-
- const result = await handleFallback(mockConfig, previewModel, AUTH_OAUTH);
+ const mockGoogleApiError = {
+ code: 429,
+ message: 'mock error',
+ details: [],
+ };
+ const retryableQuotaError = new RetryableQuotaError(
+ 'Capacity error',
+ mockGoogleApiError,
+ 5,
+ );
+ const result = await handleFallback(
+ mockConfig,
+ previewModel,
+ AUTH_OAUTH,
+ retryableQuotaError,
+ );
expect(result).toBe(true);
expect(mockConfig.setPreviewModelBypassMode).toHaveBeenCalledWith(true);
expect(mockConfig.setPreviewModelFallbackMode).toHaveBeenCalledWith(true);
});
+ it('should activate regular fallback when handler returns "retry_always" and is TerminalQuotaError', async () => {
+ mockHandler.mockResolvedValue('retry_always');
+ const mockGoogleApiError = {
+ code: 503,
+ message: 'mock error',
+ details: [],
+ };
+ const terminalError = new TerminalQuotaError(
+ 'Quota error',
+ mockGoogleApiError,
+ 5,
+ );
+ const result = await handleFallback(
+ mockConfig,
+ previewModel,
+ AUTH_OAUTH,
+ terminalError,
+ );
+
+ expect(result).toBe(true);
+ expect(mockConfig.setPreviewModelFallbackMode).not.toBeCalled();
+ expect(mockConfig.setFallbackMode).toHaveBeenCalledWith(true);
+ });
+
it('should NOT set fallback mode if user chooses "retry_once"', async () => {
+ const mockGoogleApiError = {
+ code: 429,
+ message: 'mock error',
+ details: [],
+ };
+ const terminalQuotaError = new TerminalQuotaError(
+ 'quota error',
+ mockGoogleApiError,
+ 5,
+ );
mockHandler.mockResolvedValue('retry_once');
const result = await handleFallback(
mockConfig,
PREVIEW_GEMINI_MODEL,
AuthType.LOGIN_WITH_GOOGLE,
- new Error('Capacity'),
+ terminalQuotaError,
);
expect(result).toBe(true);
- expect(mockConfig.setPreviewModelBypassMode).toHaveBeenCalledWith(true);
+ expect(mockConfig.setPreviewModelBypassMode).not.toHaveBeenCalled();
expect(mockConfig.setPreviewModelFallbackMode).not.toHaveBeenCalled();
+ expect(mockConfig.setFallbackMode).not.toHaveBeenCalled();
});
- it('should set fallback mode if user chooses "retry_always"', async () => {
- mockHandler.mockResolvedValue('retry_always');
-
- const result = await handleFallback(
- mockConfig,
- PREVIEW_GEMINI_MODEL,
- AuthType.LOGIN_WITH_GOOGLE,
- new Error('Capacity'),
- );
-
- expect(result).toBe(true);
- expect(mockConfig.setPreviewModelBypassMode).toHaveBeenCalledWith(true);
- expect(mockConfig.setPreviewModelFallbackMode).toHaveBeenCalledWith(true);
- });
- it('should pass DEFAULT_GEMINI_MODEL as fallback when Preview Model fails', async () => {
+ it('should pass DEFAULT_GEMINI_MODEL as fallback when Preview Model fails with Retryable Error', async () => {
const mockFallbackHandler = vi.fn().mockResolvedValue('stop');
vi.mocked(mockConfig.fallbackModelHandler!).mockImplementation(
mockFallbackHandler,
);
+ const mockGoogleApiError = {
+ code: 429,
+ message: 'mock error',
+ details: [],
+ };
+ const retryableQuotaError = new RetryableQuotaError(
+ 'Capacity error',
+ mockGoogleApiError,
+ 5,
+ );
+ await handleFallback(
+ mockConfig,
+ PREVIEW_GEMINI_MODEL,
+ AuthType.LOGIN_WITH_GOOGLE,
+ retryableQuotaError,
+ );
+
+ expect(mockConfig.fallbackModelHandler).toHaveBeenCalledWith(
+ PREVIEW_GEMINI_MODEL,
+ DEFAULT_GEMINI_MODEL,
+ retryableQuotaError,
+ );
+ });
+
+ it('should pass DEFAULT_GEMINI_MODEL as fallback when Preview Model fails with other error', async () => {
await handleFallback(
mockConfig,
PREVIEW_GEMINI_MODEL,
@@ -283,6 +397,31 @@ describe('handleFallback', () => {
undefined,
);
});
+
+ it('should pass DEFAULT_GEMINI_FLASH_MODEL as fallback when Preview Model fails with other error', async () => {
+ const mockGoogleApiError = {
+ code: 429,
+ message: 'mock error',
+ details: [],
+ };
+ const terminalQuotaError = new TerminalQuotaError(
+ 'quota error',
+ mockGoogleApiError,
+ 5,
+ );
+ await handleFallback(
+ mockConfig,
+ PREVIEW_GEMINI_MODEL,
+ AuthType.LOGIN_WITH_GOOGLE,
+ terminalQuotaError,
+ );
+
+ expect(mockConfig.fallbackModelHandler).toHaveBeenCalledWith(
+ PREVIEW_GEMINI_MODEL,
+ DEFAULT_GEMINI_FLASH_MODEL,
+ terminalQuotaError,
+ );
+ });
});
it('should return null if ModelNotFoundError occurs for a non-preview model', async () => {
diff --git a/packages/core/src/fallback/handler.ts b/packages/core/src/fallback/handler.ts
index 43e2a96f42..5974657ce6 100644
--- a/packages/core/src/fallback/handler.ts
+++ b/packages/core/src/fallback/handler.ts
@@ -17,6 +17,7 @@ import { openBrowserSecurely } from '../utils/secure-browser-launcher.js';
import { debugLogger } from '../utils/debugLogger.js';
import { getErrorMessage } from '../utils/errors.js';
import { ModelNotFoundError } from '../utils/httpErrors.js';
+import { TerminalQuotaError } from '../utils/googleQuotaErrors.js';
const UPGRADE_URL_PAGE = 'https://goo.gle/set-up-gemini-code-assist';
@@ -36,10 +37,12 @@ export async function handleFallback(
) {
return null;
}
-
+ const shouldActivatePreviewFallback =
+ failedModel === PREVIEW_GEMINI_MODEL &&
+ !(error instanceof TerminalQuotaError);
// Preview Model Specific Logic
- if (failedModel === PREVIEW_GEMINI_MODEL) {
- // Always set bypass mode for the immediate retry.
+ if (shouldActivatePreviewFallback) {
+ // Always set bypass mode for the immediate retry, for non-TerminalQuotaErrors.
// This ensures the next attempt uses 2.5 Pro.
config.setPreviewModelBypassMode(true);
@@ -50,10 +53,9 @@ export async function handleFallback(
}
}
- const fallbackModel =
- failedModel === PREVIEW_GEMINI_MODEL
- ? DEFAULT_GEMINI_MODEL
- : DEFAULT_GEMINI_FLASH_MODEL;
+ const fallbackModel = shouldActivatePreviewFallback
+ ? DEFAULT_GEMINI_MODEL
+ : DEFAULT_GEMINI_FLASH_MODEL;
// Consult UI Handler for Intent
const fallbackModelHandler = config.fallbackModelHandler;
@@ -70,7 +72,9 @@ export async function handleFallback(
// Process Intent and Update State
switch (intent) {
case 'retry_always':
- if (failedModel === PREVIEW_GEMINI_MODEL) {
+ // If the error is non-retryable, e.g. TerminalQuota Error, trigger a regular fallback to flash.
+ // For all other errors, activate previewModel fallback.
+ if (shouldActivatePreviewFallback) {
activatePreviewModelFallbackMode(config);
} else {
activateFallbackMode(config, authType);