fix: handle request retries and model fallback correctly (#11624)

This commit is contained in:
Gaurav
2025-10-24 11:09:06 -07:00
committed by GitHub
parent c2104a14fb
commit ee92db7533
14 changed files with 1357 additions and 804 deletions
@@ -9,9 +9,9 @@ import {
type Config,
type FallbackModelHandler,
type FallbackIntent,
isGenericQuotaExceededError,
isProQuotaExceededError,
TerminalQuotaError,
UserTierId,
RetryableQuotaError,
} from '@google/gemini-cli-core';
import { useCallback, useEffect, useRef, useState } from 'react';
import { type UseHistoryManagerReturn } from './useHistoryManager.js';
@@ -63,7 +63,7 @@ export function useQuotaAndFallback({
let message: string;
if (error && isProQuotaExceededError(error)) {
if (error instanceof TerminalQuotaError) {
// Pro Quota specific messages (Interactive)
if (isPaidTier) {
message = `⚡ You have reached your daily ${failedModel} quota limit.
@@ -76,31 +76,30 @@ export function useQuotaAndFallback({
⚡ Or you can utilize a Gemini API Key. See: https://goo.gle/gemini-cli-docs-auth#gemini-api-key
⚡ You can switch authentication methods by typing /auth`;
}
} else if (error && isGenericQuotaExceededError(error)) {
// Generic Quota (Automatic fallback)
const actionMessage = `⚡ You have reached your daily quota limit.\n⚡ Automatically switching from ${failedModel} to ${fallbackModel} for the remainder of this session.`;
} else if (error instanceof RetryableQuotaError) {
// Short term quota retries exhausted (Automatic fallback)
const actionMessage = `⚡ Your requests are being throttled right now due to server being at capacity for ${failedModel}.\n⚡ Automatically switching from ${failedModel} to ${fallbackModel} for the remainder of this session.`;
if (isPaidTier) {
message = `${actionMessage}
⚡ To continue accessing the ${failedModel} model today, consider using /auth to switch to using a paid API key from AI Studio at https://aistudio.google.com/apikey`;
⚡ To continue accessing the ${failedModel} model, retry your request after some time or consider using /auth to switch to using a paid API key from AI Studio at https://aistudio.google.com/apikey`;
} else {
message = `${actionMessage}
To increase your limits, upgrade to a Gemini Code Assist Standard or Enterprise plan with higher limits at https://goo.gle/set-up-gemini-code-assist
⚡ Or you can utilize a Gemini API Key. See: https://goo.gle/gemini-cli-docs-auth#gemini-api-key
Retry your requests after some time. Otherwise consider upgrading to a Gemini Code Assist Standard or Enterprise plan with higher limits at https://goo.gle/set-up-gemini-code-assist
⚡ You can switch authentication methods by typing /auth`;
}
} else {
// Consecutive 429s or other errors (Automatic fallback)
// Other errors (Automatic fallback)
const actionMessage = `⚡ Automatically switching from ${failedModel} to ${fallbackModel} for faster responses for the remainder of this session.`;
if (isPaidTier) {
message = `${actionMessage}
Possible reasons for this are that you have received multiple consecutive capacity errors or you have reached your daily ${failedModel} quota limit
⚡ To continue accessing the ${failedModel} model today, consider using /auth to switch to using a paid API key from AI Studio at https://aistudio.google.com/apikey`;
Your requests are being throttled temporarily due to server being at capacity for ${failedModel} or there is a service outage.
⚡ To continue accessing the ${failedModel} model, you can retry your request after some time or consider using /auth to switch to using a paid API key from AI Studio at https://aistudio.google.com/apikey`;
} else {
message = `${actionMessage}
Possible reasons for this are that you have received multiple consecutive capacity errors or you have reached your daily ${failedModel} quota limit
⚡ To increase your limits, upgrade to a Gemini Code Assist Standard or Enterprise plan with higher limits at https://goo.gle/set-up-gemini-code-assist
Your requests are being throttled temporarily due to server being at capacity for ${failedModel} or there is a service outage.
⚡ To avoid being throttled, you can retry your request after some time or upgrade to a Gemini Code Assist Standard or Enterprise plan with higher limits at https://goo.gle/set-up-gemini-code-assist
⚡ Or you can utilize a Gemini API Key. See: https://goo.gle/gemini-cli-docs-auth#gemini-api-key
⚡ You can switch authentication methods by typing /auth`;
}
@@ -119,7 +118,7 @@ export function useQuotaAndFallback({
config.setQuotaErrorOccurred(true);
// Interactive Fallback for Pro quota
if (error && isProQuotaExceededError(error)) {
if (error instanceof TerminalQuotaError) {
if (isDialogPending.current) {
return 'stop'; // A dialog is already active, so just stop this request.
}