Let users know when falling back to flash, and update the error messa… (#12640)

This commit is contained in:
Sehoon Shon
2025-11-06 13:43:21 -05:00
committed by GitHub
parent c585470a71
commit 31b34b11ab
11 changed files with 174 additions and 225 deletions
@@ -11,18 +11,16 @@ import {
type FallbackIntent,
TerminalQuotaError,
UserTierId,
RetryableQuotaError,
} from '@google/gemini-cli-core';
import { useCallback, useEffect, useRef, useState } from 'react';
import { type UseHistoryManagerReturn } from './useHistoryManager.js';
import { AuthState, MessageType } from '../types.js';
import { MessageType } from '../types.js';
import { type ProQuotaDialogRequest } from '../contexts/UIStateContext.js';
interface UseQuotaAndFallbackArgs {
config: Config;
historyManager: UseHistoryManagerReturn;
userTier: UserTierId | undefined;
setAuthState: (state: AuthState) => void;
setModelSwitchedFromQuotaError: (value: boolean) => void;
}
@@ -30,7 +28,6 @@ export function useQuotaAndFallback({
config,
historyManager,
userTier,
setAuthState,
setModelSwitchedFromQuotaError,
}: UseQuotaAndFallbackArgs) {
const [proQuotaRequest, setProQuotaRequest] =
@@ -69,55 +66,24 @@ export function useQuotaAndFallback({
message = [
`⚡ You have reached your daily ${failedModel} quota limit.`,
`⚡ You can choose to authenticate with a paid API key or continue with the fallback model.`,
`To continue accessing the ${failedModel} model today, consider using /auth to switch to using a paid API key from AI Studio at https://aistudio.google.com/apikey}`,
`Increase your limits by using a Gemini API Key. See: https://goo.gle/gemini-cli-docs-auth#gemini-api-key`,
`⚡ You can switch authentication methods by typing /auth`,
].join('\n');
} else {
message = [
`⚡ You have reached your daily ${failedModel} quota limit.`,
`⚡ You can choose to authenticate with a paid API key or continue with the fallback model.`,
`To increase your limits, upgrade to a Gemini Code Assist Standard or Enterprise plan with higher limits at https://goo.gle/set-up-gemini-code-assist`,
`Or you can utilize a Gemini API Key. See: https://goo.gle/gemini-cli-docs-auth#gemini-api-key`,
`You can switch authentication methods by typing /auth`,
].join('\n');
}
} else if (error instanceof RetryableQuotaError) {
// Short term quota retries exhausted (Automatic fallback)
const actionMessage = [
`⚡ Your requests are being throttled right now due to server being at capacity for ${failedModel}.`,
`⚡ Automatically switching from ${failedModel} to ${fallbackModel} for the remainder of this session.`,
].join('\n');
if (isPaidTier) {
message = [
actionMessage,
`⚡ To continue accessing the ${failedModel} model, retry your request after some time or consider using /auth to switch to using a paid API key from AI Studio at https://aistudio.google.com/apikey}`,
].join('\n');
} else {
message = [
actionMessage,
`⚡ Retry your requests after some time. Otherwise consider upgrading to a Gemini Code Assist Standard or Enterprise plan with higher limits at https://goo.gle/set-up-gemini-code-assist`,
`Increase your limits by `,
`- signing up for a plan with higher limits at https://goo.gle/set-up-gemini-code-assist`,
`- or using a Gemini API Key. See: https://goo.gle/gemini-cli-docs-auth#gemini-api-key`,
`⚡ You can switch authentication methods by typing /auth`,
].join('\n');
}
} else {
// Other errors (Automatic fallback)
const actionMessage = `⚡ Automatically switching from ${failedModel} to ${fallbackModel} for faster responses for the remainder of this session.`;
if (isPaidTier) {
message = [
actionMessage,
`⚡ Your requests are being throttled temporarily due to server being at capacity for ${failedModel} or there is a service outage.`,
`⚡ To continue accessing the ${failedModel} model, you can retry your request after some time or consider using /auth to switch to using a paid API key from AI Studio at https://aistudio.google.com/apikey}`,
].join('\n');
} else {
message = [
actionMessage,
`⚡ Your requests are being throttled temporarily due to server being at capacity for ${failedModel} or there is a service outage.`,
`⚡ To avoid being throttled, you can retry your request after some time or upgrade to a Gemini Code Assist Standard or Enterprise plan with higher limits at https://goo.gle/set-up-gemini-code-assist`,
`⚡ Or you can utilize a Gemini API Key. See: https://goo.gle/gemini-cli-docs-auth#gemini-api-key`,
`⚡ You can switch authentication methods by typing /auth`,
].join('\n');
}
message = [
`🚦Pardon Our Congestion! It looks like ${failedModel} is very popular at the moment.`,
`Please retry again later.`,
].join('\n');
}
// Add message to UI history
@@ -132,44 +98,37 @@ export function useQuotaAndFallback({
setModelSwitchedFromQuotaError(true);
config.setQuotaErrorOccurred(true);
// Interactive Fallback for Pro quota
if (error instanceof TerminalQuotaError) {
if (isDialogPending.current) {
return 'stop'; // A dialog is already active, so just stop this request.
}
isDialogPending.current = true;
const intent: FallbackIntent = await new Promise<FallbackIntent>(
(resolve) => {
setProQuotaRequest({
failedModel,
fallbackModel,
resolve,
});
},
);
return intent;
if (isDialogPending.current) {
return 'stop'; // A dialog is already active, so just stop this request.
}
isDialogPending.current = true;
return 'stop';
const intent: FallbackIntent = await new Promise<FallbackIntent>(
(resolve) => {
setProQuotaRequest({
failedModel,
fallbackModel,
resolve,
});
},
);
return intent;
};
config.setFallbackModelHandler(fallbackHandler);
}, [config, historyManager, userTier, setModelSwitchedFromQuotaError]);
const handleProQuotaChoice = useCallback(
(choice: 'auth' | 'continue') => {
(choice: FallbackIntent) => {
if (!proQuotaRequest) return;
const intent: FallbackIntent = choice === 'auth' ? 'auth' : 'retry';
const intent: FallbackIntent = choice;
proQuotaRequest.resolve(intent);
setProQuotaRequest(null);
isDialogPending.current = false; // Reset the flag here
if (choice === 'auth') {
setAuthState(AuthState.Updating);
} else {
if (choice === 'retry') {
historyManager.addItem(
{
type: MessageType.INFO,
@@ -179,7 +138,7 @@ export function useQuotaAndFallback({
);
}
},
[proQuotaRequest, setAuthState, historyManager],
[proQuotaRequest, historyManager],
);
return {