Let users know when falling back to flash, and update the error messa… (#12640)

This commit is contained in:
Sehoon Shon
2025-11-06 13:43:21 -05:00
committed by GitHub
parent c585470a71
commit 31b34b11ab
11 changed files with 174 additions and 225 deletions

View File

@@ -671,9 +671,9 @@ describe('AppContainer State Management', () => {
// You can even verify that the plumbed function is callable
act(() => {
capturedUIActions.handleProQuotaChoice('auth');
capturedUIActions.handleProQuotaChoice('retry_later');
});
expect(mockHandler).toHaveBeenCalledWith('auth');
expect(mockHandler).toHaveBeenCalledWith('retry_later');
unmount();
});
});

View File

@@ -390,7 +390,6 @@ export const AppContainer = (props: AppContainerProps) => {
config,
historyManager,
userTier,
setAuthState,
setModelSwitchedFromQuotaError,
});

View File

@@ -53,7 +53,6 @@ export const DialogManager = ({
if (uiState.proQuotaRequest) {
return (
<ProQuotaDialog
failedModel={uiState.proQuotaRequest.failedModel}
fallbackModel={uiState.proQuotaRequest.fallbackModel}
onChoice={uiActions.handleProQuotaChoice}
/>

View File

@@ -22,29 +22,27 @@ describe('ProQuotaDialog', () => {
it('should render with correct title and options', () => {
const { lastFrame, unmount } = render(
<ProQuotaDialog
failedModel="gemini-2.5-pro"
fallbackModel="gemini-2.5-flash"
onChoice={() => {}}
/>,
<ProQuotaDialog fallbackModel="gemini-2.5-flash" onChoice={() => {}} />,
);
const output = lastFrame();
expect(output).toContain('Pro quota limit reached for gemini-2.5-pro.');
expect(output).toContain(
'Note: You can always use /model to select a different option.',
);
// Check that RadioButtonSelect was called with the correct items
expect(RadioButtonSelect).toHaveBeenCalledWith(
expect.objectContaining({
items: [
{
label: 'Change auth (executes the /auth command)',
value: 'auth',
key: 'auth',
label: 'Try again later',
value: 'retry_later' as const,
key: 'retry_later',
},
{
label: `Continue with gemini-2.5-flash`,
value: 'continue',
key: 'continue',
label: `Switch to gemini-2.5-flash for the rest of this session`,
value: 'retry' as const,
key: 'retry',
},
],
}),
@@ -57,7 +55,6 @@ describe('ProQuotaDialog', () => {
const mockOnChoice = vi.fn();
const { unmount } = render(
<ProQuotaDialog
failedModel="gemini-2.5-pro"
fallbackModel="gemini-2.5-flash"
onChoice={mockOnChoice}
/>,
@@ -79,7 +76,6 @@ describe('ProQuotaDialog', () => {
const mockOnChoice = vi.fn();
const { unmount } = render(
<ProQuotaDialog
failedModel="gemini-2.5-pro"
fallbackModel="gemini-2.5-flash"
onChoice={mockOnChoice}
/>,
@@ -90,10 +86,10 @@ describe('ProQuotaDialog', () => {
// Simulate the selection
act(() => {
onSelect('continue');
onSelect('retry');
});
expect(mockOnChoice).toHaveBeenCalledWith('continue');
expect(mockOnChoice).toHaveBeenCalledWith('retry');
unmount();
});
});

View File

@@ -10,45 +10,43 @@ import { RadioButtonSelect } from './shared/RadioButtonSelect.js';
import { theme } from '../semantic-colors.js';
interface ProQuotaDialogProps {
failedModel: string;
fallbackModel: string;
onChoice: (choice: 'auth' | 'continue') => void;
onChoice: (choice: 'retry_later' | 'retry') => void;
}
export function ProQuotaDialog({
failedModel,
fallbackModel,
onChoice,
}: ProQuotaDialogProps): React.JSX.Element {
const items = [
{
label: 'Change auth (executes the /auth command)',
value: 'auth' as const,
key: 'auth',
label: 'Try again later',
value: 'retry_later' as const,
key: 'retry_later',
},
{
label: `Continue with ${fallbackModel}`,
value: 'continue' as const,
key: 'continue',
label: `Switch to ${fallbackModel} for the rest of this session`,
value: 'retry' as const,
key: 'retry',
},
];
const handleSelect = (choice: 'auth' | 'continue') => {
const handleSelect = (choice: 'retry_later' | 'retry') => {
onChoice(choice);
};
return (
<Box borderStyle="round" flexDirection="column" paddingX={1}>
<Text bold color={theme.status.warning}>
Pro quota limit reached for {failedModel}.
</Text>
<Box marginTop={1}>
<Box marginTop={1} marginBottom={1}>
<RadioButtonSelect
items={items}
initialIndex={1}
onSelect={handleSelect}
/>
</Box>
<Text color={theme.text.primary}>
Note: You can always use /model to select a different option.
</Text>
</Box>
);
}

View File

@@ -40,7 +40,7 @@ export interface UIActions {
refreshStatic: () => void;
handleFinalSubmit: (value: string) => void;
handleClearScreen: () => void;
handleProQuotaChoice: (choice: 'auth' | 'continue') => void;
handleProQuotaChoice: (choice: 'retry_later' | 'retry') => void;
setQueueErrorMessage: (message: string | null) => void;
popAllMessages: (onPop: (messages: string | undefined) => void) => void;
handleApiKeySubmit: (apiKey: string) => Promise<void>;

View File

@@ -28,7 +28,7 @@ import {
} from '@google/gemini-cli-core';
import { useQuotaAndFallback } from './useQuotaAndFallback.js';
import type { UseHistoryManagerReturn } from './useHistoryManager.js';
import { AuthState, MessageType } from '../types.js';
import { MessageType } from '../types.js';
// Use a type alias for SpyInstance as it's not directly exported
type SpyInstance = ReturnType<typeof vi.spyOn>;
@@ -36,7 +36,6 @@ type SpyInstance = ReturnType<typeof vi.spyOn>;
describe('useQuotaAndFallback', () => {
let mockConfig: Config;
let mockHistoryManager: UseHistoryManagerReturn;
let mockSetAuthState: Mock;
let mockSetModelSwitchedFromQuotaError: Mock;
let setFallbackHandlerSpy: SpyInstance;
let mockGoogleApiError: GoogleApiError;
@@ -62,7 +61,6 @@ describe('useQuotaAndFallback', () => {
clearItems: vi.fn(),
loadHistory: vi.fn(),
};
mockSetAuthState = vi.fn();
mockSetModelSwitchedFromQuotaError = vi.fn();
setFallbackHandlerSpy = vi.spyOn(mockConfig, 'setFallbackModelHandler');
@@ -79,7 +77,6 @@ describe('useQuotaAndFallback', () => {
config: mockConfig,
historyManager: mockHistoryManager,
userTier: UserTierId.FREE,
setAuthState: mockSetAuthState,
setModelSwitchedFromQuotaError: mockSetModelSwitchedFromQuotaError,
}),
);
@@ -99,7 +96,6 @@ describe('useQuotaAndFallback', () => {
config: mockConfig,
historyManager: mockHistoryManager,
userTier: props.userTier,
setAuthState: mockSetAuthState,
setModelSwitchedFromQuotaError: mockSetModelSwitchedFromQuotaError,
}),
{ initialProps: { userTier } },
@@ -129,97 +125,14 @@ describe('useQuotaAndFallback', () => {
expect(mockHistoryManager.addItem).not.toHaveBeenCalled();
});
describe('Automatic Fallback Scenarios', () => {
const testCases = [
{
description: 'other error for FREE tier',
tier: UserTierId.FREE,
error: new Error('some error'),
expectedMessageSnippets: [
'Automatically switching from model-A to model-B for faster responses',
'upgrade to a Gemini Code Assist Standard or Enterprise plan',
],
},
{
description: 'other error for LEGACY tier',
tier: UserTierId.LEGACY, // Paid tier
error: new Error('some error'),
expectedMessageSnippets: [
'Automatically switching from model-A to model-B for faster responses',
'switch to using a paid API key from AI Studio',
],
},
{
description: 'retryable quota error for FREE tier',
tier: UserTierId.FREE,
error: new RetryableQuotaError(
'retryable quota',
mockGoogleApiError,
5,
),
expectedMessageSnippets: [
'Your requests are being throttled right now due to server being at capacity for model-A',
'Automatically switching from model-A to model-B',
'upgrading to a Gemini Code Assist Standard or Enterprise plan',
],
},
{
description: 'retryable quota error for LEGACY tier',
tier: UserTierId.LEGACY, // Paid tier
error: new RetryableQuotaError(
'retryable quota',
mockGoogleApiError,
5,
),
expectedMessageSnippets: [
'Your requests are being throttled right now due to server being at capacity for model-A',
'Automatically switching from model-A to model-B',
'switch to using a paid API key from AI Studio',
],
},
];
for (const {
description,
tier,
error,
expectedMessageSnippets,
} of testCases) {
it(`should handle ${description} correctly`, async () => {
const handler = getRegisteredHandler(tier);
let result: FallbackIntent | null;
await act(async () => {
result = await handler('model-A', 'model-B', error);
});
// Automatic fallbacks should return 'stop'
expect(result!).toBe('stop');
expect(mockHistoryManager.addItem).toHaveBeenCalledWith(
expect.objectContaining({ type: MessageType.INFO }),
expect.any(Number),
);
const message = (mockHistoryManager.addItem as Mock).mock.calls[0][0]
.text;
for (const snippet of expectedMessageSnippets) {
expect(message).toContain(snippet);
}
expect(mockSetModelSwitchedFromQuotaError).toHaveBeenCalledWith(true);
expect(mockConfig.setQuotaErrorOccurred).toHaveBeenCalledWith(true);
});
}
});
describe('Interactive Fallback (Pro Quota Error)', () => {
describe('Interactive Fallback', () => {
// Pro Quota Errors
it('should set an interactive request and wait for user choice', async () => {
const { result } = renderHook(() =>
useQuotaAndFallback({
config: mockConfig,
historyManager: mockHistoryManager,
userTier: UserTierId.FREE,
setAuthState: mockSetAuthState,
setModelSwitchedFromQuotaError: mockSetModelSwitchedFromQuotaError,
}),
);
@@ -243,7 +156,7 @@ describe('useQuotaAndFallback', () => {
// Simulate the user choosing to continue with the fallback model
await act(() => {
result.current.handleProQuotaChoice('continue');
result.current.handleProQuotaChoice('retry');
});
// The original promise from the handler should now resolve
@@ -260,7 +173,6 @@ describe('useQuotaAndFallback', () => {
config: mockConfig,
historyManager: mockHistoryManager,
userTier: UserTierId.FREE,
setAuthState: mockSetAuthState,
setModelSwitchedFromQuotaError: mockSetModelSwitchedFromQuotaError,
}),
);
@@ -294,13 +206,120 @@ describe('useQuotaAndFallback', () => {
expect(result.current.proQuotaRequest).toBe(firstRequest);
await act(() => {
result.current.handleProQuotaChoice('continue');
result.current.handleProQuotaChoice('retry');
});
const intent1 = await promise1!;
expect(intent1).toBe('retry');
expect(result.current.proQuotaRequest).toBeNull();
});
// Non-Quota error test cases
const testCases = [
{
description: 'other error for FREE tier',
tier: UserTierId.FREE,
error: new Error('some error'),
expectedMessageSnippets: [
'🚦Pardon Our Congestion! It looks like model-A is very popular at the moment.',
'Please retry again later.',
],
},
{
description: 'other error for LEGACY tier',
tier: UserTierId.LEGACY, // Paid tier
error: new Error('some error'),
expectedMessageSnippets: [
'🚦Pardon Our Congestion! It looks like model-A is very popular at the moment.',
'Please retry again later.',
],
},
{
description: 'retryable quota error for FREE tier',
tier: UserTierId.FREE,
error: new RetryableQuotaError(
'retryable quota',
mockGoogleApiError,
5,
),
expectedMessageSnippets: [
'🚦Pardon Our Congestion! It looks like model-A is very popular at the moment.',
'Please retry again later.',
],
},
{
description: 'retryable quota error for LEGACY tier',
tier: UserTierId.LEGACY, // Paid tier
error: new RetryableQuotaError(
'retryable quota',
mockGoogleApiError,
5,
),
expectedMessageSnippets: [
'🚦Pardon Our Congestion! It looks like model-A is very popular at the moment.',
'Please retry again later.',
],
},
];
for (const {
description,
tier,
error,
expectedMessageSnippets,
} of testCases) {
it(`should handle ${description} correctly`, async () => {
const { result } = renderHook(
(props) =>
useQuotaAndFallback({
config: mockConfig,
historyManager: mockHistoryManager,
userTier: props.tier,
setModelSwitchedFromQuotaError:
mockSetModelSwitchedFromQuotaError,
}),
{ initialProps: { tier } },
);
const handler = setFallbackHandlerSpy.mock
.calls[0][0] as FallbackModelHandler;
// Call the handler but do not await it, to check the intermediate state
let promise: Promise<FallbackIntent | null>;
await act(() => {
promise = handler('model-A', 'model-B', error);
});
// The hook should now have a pending request for the UI to handle
expect(result.current.proQuotaRequest).not.toBeNull();
expect(result.current.proQuotaRequest?.failedModel).toBe('model-A');
// Check that the correct initial message was added
expect(mockHistoryManager.addItem).toHaveBeenCalledWith(
expect.objectContaining({ type: MessageType.INFO }),
expect.any(Number),
);
const message = (mockHistoryManager.addItem as Mock).mock.calls[0][0]
.text;
for (const snippet of expectedMessageSnippets) {
expect(message).toContain(snippet);
}
// Simulate the user choosing to continue with the fallback model
await act(() => {
result.current.handleProQuotaChoice('retry');
});
expect(mockSetModelSwitchedFromQuotaError).toHaveBeenCalledWith(true);
// The original promise from the handler should now resolve
const intent = await promise!;
expect(intent).toBe('retry');
// The pending request should be cleared from the state
expect(result.current.proQuotaRequest).toBeNull();
expect(mockConfig.setQuotaErrorOccurred).toHaveBeenCalledWith(true);
});
}
});
});
@@ -311,26 +330,23 @@ describe('useQuotaAndFallback', () => {
config: mockConfig,
historyManager: mockHistoryManager,
userTier: UserTierId.FREE,
setAuthState: mockSetAuthState,
setModelSwitchedFromQuotaError: mockSetModelSwitchedFromQuotaError,
}),
);
act(() => {
result.current.handleProQuotaChoice('auth');
result.current.handleProQuotaChoice('retry_later');
});
expect(mockSetAuthState).not.toHaveBeenCalled();
expect(mockHistoryManager.addItem).not.toHaveBeenCalled();
});
it('should resolve intent to "auth" and trigger auth state update', async () => {
it('should resolve intent to "retry_later"', async () => {
const { result } = renderHook(() =>
useQuotaAndFallback({
config: mockConfig,
historyManager: mockHistoryManager,
userTier: UserTierId.FREE,
setAuthState: mockSetAuthState,
setModelSwitchedFromQuotaError: mockSetModelSwitchedFromQuotaError,
}),
);
@@ -347,12 +363,11 @@ describe('useQuotaAndFallback', () => {
});
await act(() => {
result.current.handleProQuotaChoice('auth');
result.current.handleProQuotaChoice('retry_later');
});
const intent = await promise!;
expect(intent).toBe('auth');
expect(mockSetAuthState).toHaveBeenCalledWith(AuthState.Updating);
expect(intent).toBe('retry_later');
expect(result.current.proQuotaRequest).toBeNull();
});
@@ -362,7 +377,6 @@ describe('useQuotaAndFallback', () => {
config: mockConfig,
historyManager: mockHistoryManager,
userTier: UserTierId.FREE,
setAuthState: mockSetAuthState,
setModelSwitchedFromQuotaError: mockSetModelSwitchedFromQuotaError,
}),
);
@@ -380,7 +394,7 @@ describe('useQuotaAndFallback', () => {
});
await act(() => {
result.current.handleProQuotaChoice('continue');
result.current.handleProQuotaChoice('retry');
});
const intent = await promise!;

View File

@@ -11,18 +11,16 @@ import {
type FallbackIntent,
TerminalQuotaError,
UserTierId,
RetryableQuotaError,
} from '@google/gemini-cli-core';
import { useCallback, useEffect, useRef, useState } from 'react';
import { type UseHistoryManagerReturn } from './useHistoryManager.js';
import { AuthState, MessageType } from '../types.js';
import { MessageType } from '../types.js';
import { type ProQuotaDialogRequest } from '../contexts/UIStateContext.js';
interface UseQuotaAndFallbackArgs {
config: Config;
historyManager: UseHistoryManagerReturn;
userTier: UserTierId | undefined;
setAuthState: (state: AuthState) => void;
setModelSwitchedFromQuotaError: (value: boolean) => void;
}
@@ -30,7 +28,6 @@ export function useQuotaAndFallback({
config,
historyManager,
userTier,
setAuthState,
setModelSwitchedFromQuotaError,
}: UseQuotaAndFallbackArgs) {
const [proQuotaRequest, setProQuotaRequest] =
@@ -69,55 +66,24 @@ export function useQuotaAndFallback({
message = [
`⚡ You have reached your daily ${failedModel} quota limit.`,
`⚡ You can choose to authenticate with a paid API key or continue with the fallback model.`,
`To continue accessing the ${failedModel} model today, consider using /auth to switch to using a paid API key from AI Studio at https://aistudio.google.com/apikey}`,
`Increase your limits by using a Gemini API Key. See: https://goo.gle/gemini-cli-docs-auth#gemini-api-key`,
`⚡ You can switch authentication methods by typing /auth`,
].join('\n');
} else {
message = [
`⚡ You have reached your daily ${failedModel} quota limit.`,
`⚡ You can choose to authenticate with a paid API key or continue with the fallback model.`,
`To increase your limits, upgrade to a Gemini Code Assist Standard or Enterprise plan with higher limits at https://goo.gle/set-up-gemini-code-assist`,
`Or you can utilize a Gemini API Key. See: https://goo.gle/gemini-cli-docs-auth#gemini-api-key`,
`You can switch authentication methods by typing /auth`,
].join('\n');
}
} else if (error instanceof RetryableQuotaError) {
// Short term quota retries exhausted (Automatic fallback)
const actionMessage = [
`⚡ Your requests are being throttled right now due to server being at capacity for ${failedModel}.`,
`⚡ Automatically switching from ${failedModel} to ${fallbackModel} for the remainder of this session.`,
].join('\n');
if (isPaidTier) {
message = [
actionMessage,
`⚡ To continue accessing the ${failedModel} model, retry your request after some time or consider using /auth to switch to using a paid API key from AI Studio at https://aistudio.google.com/apikey}`,
].join('\n');
} else {
message = [
actionMessage,
`⚡ Retry your requests after some time. Otherwise consider upgrading to a Gemini Code Assist Standard or Enterprise plan with higher limits at https://goo.gle/set-up-gemini-code-assist`,
`Increase your limits by `,
`- signing up for a plan with higher limits at https://goo.gle/set-up-gemini-code-assist`,
`- or using a Gemini API Key. See: https://goo.gle/gemini-cli-docs-auth#gemini-api-key`,
`⚡ You can switch authentication methods by typing /auth`,
].join('\n');
}
} else {
// Other errors (Automatic fallback)
const actionMessage = `⚡ Automatically switching from ${failedModel} to ${fallbackModel} for faster responses for the remainder of this session.`;
if (isPaidTier) {
message = [
actionMessage,
`⚡ Your requests are being throttled temporarily due to server being at capacity for ${failedModel} or there is a service outage.`,
`⚡ To continue accessing the ${failedModel} model, you can retry your request after some time or consider using /auth to switch to using a paid API key from AI Studio at https://aistudio.google.com/apikey}`,
].join('\n');
} else {
message = [
actionMessage,
`⚡ Your requests are being throttled temporarily due to server being at capacity for ${failedModel} or there is a service outage.`,
`⚡ To avoid being throttled, you can retry your request after some time or upgrade to a Gemini Code Assist Standard or Enterprise plan with higher limits at https://goo.gle/set-up-gemini-code-assist`,
`⚡ Or you can utilize a Gemini API Key. See: https://goo.gle/gemini-cli-docs-auth#gemini-api-key`,
`⚡ You can switch authentication methods by typing /auth`,
].join('\n');
}
message = [
`🚦Pardon Our Congestion! It looks like ${failedModel} is very popular at the moment.`,
`Please retry again later.`,
].join('\n');
}
// Add message to UI history
@@ -132,44 +98,37 @@ export function useQuotaAndFallback({
setModelSwitchedFromQuotaError(true);
config.setQuotaErrorOccurred(true);
// Interactive Fallback for Pro quota
if (error instanceof TerminalQuotaError) {
if (isDialogPending.current) {
return 'stop'; // A dialog is already active, so just stop this request.
}
isDialogPending.current = true;
const intent: FallbackIntent = await new Promise<FallbackIntent>(
(resolve) => {
setProQuotaRequest({
failedModel,
fallbackModel,
resolve,
});
},
);
return intent;
if (isDialogPending.current) {
return 'stop'; // A dialog is already active, so just stop this request.
}
isDialogPending.current = true;
return 'stop';
const intent: FallbackIntent = await new Promise<FallbackIntent>(
(resolve) => {
setProQuotaRequest({
failedModel,
fallbackModel,
resolve,
});
},
);
return intent;
};
config.setFallbackModelHandler(fallbackHandler);
}, [config, historyManager, userTier, setModelSwitchedFromQuotaError]);
const handleProQuotaChoice = useCallback(
(choice: 'auth' | 'continue') => {
(choice: FallbackIntent) => {
if (!proQuotaRequest) return;
const intent: FallbackIntent = choice === 'auth' ? 'auth' : 'retry';
const intent: FallbackIntent = choice;
proQuotaRequest.resolve(intent);
setProQuotaRequest(null);
isDialogPending.current = false; // Reset the flag here
if (choice === 'auth') {
setAuthState(AuthState.Updating);
} else {
if (choice === 'retry') {
historyManager.addItem(
{
type: MessageType.INFO,
@@ -179,7 +138,7 @@ export function useQuotaAndFallback({
);
}
},
[proQuotaRequest, setAuthState, historyManager],
[proQuotaRequest, historyManager],
);
return {

View File

@@ -127,22 +127,6 @@ describe('handleFallback', () => {
});
});
describe('when handler returns "auth"', () => {
it('should NOT activate fallback mode and return false', async () => {
mockHandler.mockResolvedValue('auth');
const result = await handleFallback(
mockConfig,
MOCK_PRO_MODEL,
AUTH_OAUTH,
);
expect(result).toBe(false);
expect(mockConfig.setFallbackMode).not.toHaveBeenCalled();
expect(logFlashFallback).not.toHaveBeenCalled();
});
});
describe('when handler returns an unexpected value', () => {
it('should log an error and return null', async () => {
mockHandler.mockResolvedValue(null);

View File

@@ -46,7 +46,7 @@ export async function handleFallback(
activateFallbackMode(config, authType);
return false;
case 'auth':
case 'retry_later':
return false;
default:

View File

@@ -10,7 +10,7 @@
export type FallbackIntent =
| 'retry' // Immediately retry the current request with the fallback model.
| 'stop' // Switch to fallback for future requests, but stop the current request.
| 'auth'; // Stop the current request; user intends to change authentication.
| 'retry_later'; // Stop the current request and do not fallback. Intend to try again later with the same model.
/**
* The interface for the handler provided by the UI layer (e.g., the CLI)