refactor: Centralize and improve model fallback handling (#7634)

This commit is contained in:
Abhi
2025-09-08 16:19:52 -04:00
committed by GitHub
parent 9c71d3dd64
commit f6f2fff724
20 changed files with 1543 additions and 380 deletions
+33 -169
View File
@@ -24,18 +24,15 @@ import { MessageType, StreamingState } from './types.js';
import {
type EditorType,
type Config,
IdeClient,
type DetectedIde,
ideContext,
type IdeContext,
type UserTierId,
DEFAULT_GEMINI_FLASH_MODEL,
IdeClient,
ideContext,
getErrorMessage,
getAllGeminiMdFilenames,
UserTierId,
AuthType,
isProQuotaExceededError,
isGenericQuotaExceededError,
logFlashFallback,
FlashFallbackEvent,
clearCachedCredentialFile,
} from '@google/gemini-cli-core';
import { validateAuthMethod } from '../config/auth.js';
@@ -44,6 +41,7 @@ import process from 'node:process';
import { useHistory } from './hooks/useHistoryManager.js';
import { useThemeCommand } from './hooks/useThemeCommand.js';
import { useAuthCommand } from './auth/useAuth.js';
import { useQuotaAndFallback } from './hooks/useQuotaAndFallback.js';
import { useEditorSettings } from './hooks/useEditorSettings.js';
import { useSettingsCommand } from './hooks/useSettingsCommand.js';
import { useSlashCommandProcessor } from './hooks/slashCommandProcessor.js';
@@ -123,12 +121,18 @@ export const AppContainer = (props: AppContainerProps) => {
const [isTrustedFolder, setIsTrustedFolder] = useState<boolean | undefined>(
config.isTrustedFolder(),
);
const [currentModel, setCurrentModel] = useState(config.getModel());
// Helper to determine the effective model, considering the fallback state.
const getEffectiveModel = useCallback(() => {
if (config.isInFallbackMode()) {
return DEFAULT_GEMINI_FLASH_MODEL;
}
return config.getModel();
}, [config]);
const [currentModel, setCurrentModel] = useState(getEffectiveModel());
const [userTier, setUserTier] = useState<UserTierId | undefined>(undefined);
const [isProQuotaDialogOpen, setIsProQuotaDialogOpen] = useState(false);
const [proQuotaDialogResolver, setProQuotaDialogResolver] = useState<
((value: boolean) => void) | null
>(null);
// Auto-accept indicator
const showAutoAcceptIndicator = useAutoAcceptIndicator({
@@ -167,18 +171,17 @@ export const AppContainer = (props: AppContainerProps) => {
// Watch for model changes (e.g., from Flash fallback)
useEffect(() => {
const checkModelChange = () => {
const configModel = config.getModel();
if (configModel !== currentModel) {
setCurrentModel(configModel);
const effectiveModel = getEffectiveModel();
if (effectiveModel !== currentModel) {
setCurrentModel(effectiveModel);
}
};
// Check immediately and then periodically
checkModelChange();
const interval = setInterval(checkModelChange, 1000); // Check every second
return () => clearInterval(interval);
}, [config, currentModel]);
}, [config, currentModel, getEffectiveModel]);
const {
consoleMessages,
@@ -273,6 +276,14 @@ export const AppContainer = (props: AppContainerProps) => {
config,
);
const { proQuotaRequest, handleProQuotaChoice } = useQuotaAndFallback({
config,
historyManager,
userTier,
setAuthState,
setModelSwitchedFromQuotaError,
});
// Derive auth state variables for backward compatibility with UIStateContext
const isAuthDialogOpen = authState === AuthState.Updating;
const isAuthenticating = authState === AuthState.Unauthenticated;
@@ -477,132 +488,6 @@ Logging in with Google... Please restart Gemini CLI to continue.
}
}, [config, historyManager, settings.merged]);
// Set up Flash fallback handler
useEffect(() => {
const flashFallbackHandler = async (
currentModel: string,
fallbackModel: string,
error?: unknown,
): Promise<boolean> => {
// Check if we've already switched to the fallback model
if (config.isInFallbackMode()) {
// If we're already in fallback mode, don't show the dialog again
return false;
}
let message: string;
if (
config.getContentGeneratorConfig().authType ===
AuthType.LOGIN_WITH_GOOGLE
) {
// Use actual user tier if available; otherwise, default to FREE tier behavior (safe default)
const isPaidTier =
userTier === UserTierId.LEGACY || userTier === UserTierId.STANDARD;
// Check if this is a Pro quota exceeded error
if (error && isProQuotaExceededError(error)) {
if (isPaidTier) {
message = `⚡ You have reached your daily ${currentModel} quota limit.
⚡ You can choose to authenticate with a paid API key or continue with the fallback model.
⚡ To continue accessing the ${currentModel} model today, consider using /auth to switch to using a paid API key from AI Studio at https://aistudio.google.com/apikey`;
} else {
message = `⚡ You have reached your daily ${currentModel} quota limit.
⚡ You can choose to authenticate with a paid API key or continue with the fallback model.
⚡ To increase your limits, upgrade to a Gemini Code Assist Standard or Enterprise plan with higher limits at https://goo.gle/set-up-gemini-code-assist
⚡ Or you can utilize a Gemini API Key. See: https://goo.gle/gemini-cli-docs-auth#gemini-api-key
⚡ You can switch authentication methods by typing /auth`;
}
} else if (error && isGenericQuotaExceededError(error)) {
if (isPaidTier) {
message = `⚡ You have reached your daily quota limit.
⚡ Automatically switching from ${currentModel} to ${fallbackModel} for the remainder of this session.
⚡ To continue accessing the ${currentModel} model today, consider using /auth to switch to using a paid API key from AI Studio at https://aistudio.google.com/apikey`;
} else {
message = `⚡ You have reached your daily quota limit.
⚡ Automatically switching from ${currentModel} to ${fallbackModel} for the remainder of this session.
⚡ To increase your limits, upgrade to a Gemini Code Assist Standard or Enterprise plan with higher limits at https://goo.gle/set-up-gemini-code-assist
⚡ Or you can utilize a Gemini API Key. See: https://goo.gle/gemini-cli-docs-auth#gemini-api-key
⚡ You can switch authentication methods by typing /auth`;
}
} else {
if (isPaidTier) {
// Default fallback message for other cases (like consecutive 429s)
message = `⚡ Automatically switching from ${currentModel} to ${fallbackModel} for faster responses for the remainder of this session.
⚡ Possible reasons for this are that you have received multiple consecutive capacity errors or you have reached your daily ${currentModel} quota limit
⚡ To continue accessing the ${currentModel} model today, consider using /auth to switch to using a paid API key from AI Studio at https://aistudio.google.com/apikey`;
} else {
// Default fallback message for other cases (like consecutive 429s)
message = `⚡ Automatically switching from ${currentModel} to ${fallbackModel} for faster responses for the remainder of this session.
⚡ Possible reasons for this are that you have received multiple consecutive capacity errors or you have reached your daily ${currentModel} quota limit
⚡ To increase your limits, upgrade to a Gemini Code Assist Standard or Enterprise plan with higher limits at https://goo.gle/set-up-gemini-code-assist
⚡ Or you can utilize a Gemini API Key. See: https://goo.gle/gemini-cli-docs-auth#gemini-api-key
⚡ You can switch authentication methods by typing /auth`;
}
}
// Add message to UI history
historyManager.addItem(
{
type: MessageType.INFO,
text: message,
},
Date.now(),
);
// For Pro quota errors, show the dialog and wait for user's choice
if (error && isProQuotaExceededError(error)) {
// Set the flag to prevent tool continuation
setModelSwitchedFromQuotaError(true);
// Set global quota error flag to prevent Flash model calls
config.setQuotaErrorOccurred(true);
// Show the ProQuotaDialog and wait for user's choice
const shouldContinueWithFallback = await new Promise<boolean>(
(resolve) => {
setIsProQuotaDialogOpen(true);
setProQuotaDialogResolver(() => resolve);
},
);
// If user chose to continue with fallback, we don't need to stop the current prompt
if (shouldContinueWithFallback) {
// Switch to fallback model for future use
config.setModel(fallbackModel);
config.setFallbackMode(true);
logFlashFallback(
config,
new FlashFallbackEvent(
config.getContentGeneratorConfig().authType!,
),
);
return true; // Continue with current prompt using fallback model
}
// If user chose to authenticate, stop current prompt
return false;
}
// For other quota errors, automatically switch to fallback model
// Set the flag to prevent tool continuation
setModelSwitchedFromQuotaError(true);
// Set global quota error flag to prevent Flash model calls
config.setQuotaErrorOccurred(true);
}
// Switch model for future use but return false to stop current retry
config.setModel(fallbackModel);
config.setFallbackMode(true);
logFlashFallback(
config,
new FlashFallbackEvent(config.getContentGeneratorConfig().authType!),
);
return false; // Don't continue with current prompt
};
config.setFlashFallbackHandler(flashFallbackHandler);
}, [config, historyManager, userTier]);
const cancelHandlerRef = useRef<() => void>(() => {});
const {
@@ -681,22 +566,6 @@ Logging in with Google... Please restart Gemini CLI to continue.
refreshStatic();
}, [historyManager, clearConsoleMessagesState, refreshStatic]);
const handleProQuotaChoice = useCallback(
(choice: 'auth' | 'continue') => {
setIsProQuotaDialogOpen(false);
if (proQuotaDialogResolver) {
if (choice === 'auth') {
proQuotaDialogResolver(false); // Don't continue with fallback, show auth dialog
setAuthState(AuthState.Updating);
} else {
proQuotaDialogResolver(true); // Continue with fallback model
}
setProQuotaDialogResolver(null);
}
},
[proQuotaDialogResolver, setAuthState],
);
const { handleInput: vimHandleInput } = useVim(buffer, handleFinalSubmit);
/**
@@ -712,7 +581,7 @@ Logging in with Google... Please restart Gemini CLI to continue.
!isProcessing &&
(streamingState === StreamingState.Idle ||
streamingState === StreamingState.Responding) &&
!isProQuotaDialogOpen;
!proQuotaRequest;
// Compute available terminal height based on controls measurement
const availableTerminalHeight = useMemo(() => {
@@ -1029,7 +898,7 @@ Logging in with Google... Please restart Gemini CLI to continue.
isAuthDialogOpen ||
isEditorDialogOpen ||
showPrivacyNotice ||
isProQuotaDialogOpen,
!!proQuotaRequest,
[
showWorkspaceMigrationDialog,
shouldShowIdePrompt,
@@ -1042,7 +911,7 @@ Logging in with Google... Please restart Gemini CLI to continue.
isAuthDialogOpen,
isEditorDialogOpen,
showPrivacyNotice,
isProQuotaDialogOpen,
proQuotaRequest,
],
);
@@ -1101,11 +970,9 @@ Logging in with Google... Please restart Gemini CLI to continue.
showAutoAcceptIndicator,
showWorkspaceMigrationDialog,
workspaceExtensions,
// Use current state values instead of config.getModel()
currentModel,
userTier,
isProQuotaDialogOpen,
// New fields
proQuotaRequest,
contextFileNames,
errorCount,
availableTerminalHeight,
@@ -1174,10 +1041,8 @@ Logging in with Google... Please restart Gemini CLI to continue.
showAutoAcceptIndicator,
showWorkspaceMigrationDialog,
workspaceExtensions,
// Quota-related state dependencies
userTier,
isProQuotaDialogOpen,
// New fields dependencies
proQuotaRequest,
contextFileNames,
errorCount,
availableTerminalHeight,
@@ -1196,7 +1061,6 @@ Logging in with Google... Please restart Gemini CLI to continue.
updateInfo,
showIdeRestartPrompt,
isRestarting,
// Quota-related dependencies
currentModel,
],
);