mirror of
https://github.com/google-gemini/gemini-cli.git
synced 2026-03-22 12:01:39 -07:00
fix: handle request retries and model fallback correctly (#11624)
This commit is contained in:
@@ -19,25 +19,15 @@ import {
|
||||
type FallbackModelHandler,
|
||||
UserTierId,
|
||||
AuthType,
|
||||
isGenericQuotaExceededError,
|
||||
isProQuotaExceededError,
|
||||
TerminalQuotaError,
|
||||
makeFakeConfig,
|
||||
type GoogleApiError,
|
||||
RetryableQuotaError,
|
||||
} from '@google/gemini-cli-core';
|
||||
import { useQuotaAndFallback } from './useQuotaAndFallback.js';
|
||||
import type { UseHistoryManagerReturn } from './useHistoryManager.js';
|
||||
import { AuthState, MessageType } from '../types.js';
|
||||
|
||||
// Mock the error checking functions from the core package to control test scenarios
|
||||
vi.mock('@google/gemini-cli-core', async (importOriginal) => {
|
||||
const original =
|
||||
await importOriginal<typeof import('@google/gemini-cli-core')>();
|
||||
return {
|
||||
...original,
|
||||
isGenericQuotaExceededError: vi.fn(),
|
||||
isProQuotaExceededError: vi.fn(),
|
||||
};
|
||||
});
|
||||
|
||||
// Use a type alias for SpyInstance as it's not directly exported
|
||||
type SpyInstance = ReturnType<typeof vi.spyOn>;
|
||||
|
||||
@@ -47,12 +37,15 @@ describe('useQuotaAndFallback', () => {
|
||||
let mockSetAuthState: Mock;
|
||||
let mockSetModelSwitchedFromQuotaError: Mock;
|
||||
let setFallbackHandlerSpy: SpyInstance;
|
||||
|
||||
const mockedIsGenericQuotaExceededError = isGenericQuotaExceededError as Mock;
|
||||
const mockedIsProQuotaExceededError = isProQuotaExceededError as Mock;
|
||||
let mockGoogleApiError: GoogleApiError;
|
||||
|
||||
beforeEach(() => {
|
||||
mockConfig = makeFakeConfig();
|
||||
mockGoogleApiError = {
|
||||
code: 429,
|
||||
message: 'mock error',
|
||||
details: [],
|
||||
};
|
||||
|
||||
// Spy on the method that requires the private field and mock its return.
|
||||
// This is cleaner than modifying the config class for tests.
|
||||
@@ -72,9 +65,6 @@ describe('useQuotaAndFallback', () => {
|
||||
|
||||
setFallbackHandlerSpy = vi.spyOn(mockConfig, 'setFallbackModelHandler');
|
||||
vi.spyOn(mockConfig, 'setQuotaErrorOccurred');
|
||||
|
||||
mockedIsGenericQuotaExceededError.mockReturnValue(false);
|
||||
mockedIsProQuotaExceededError.mockReturnValue(false);
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
@@ -140,51 +130,62 @@ describe('useQuotaAndFallback', () => {
|
||||
describe('Automatic Fallback Scenarios', () => {
|
||||
const testCases = [
|
||||
{
|
||||
errorType: 'generic',
|
||||
description: 'other error for FREE tier',
|
||||
tier: UserTierId.FREE,
|
||||
error: new Error('some error'),
|
||||
expectedMessageSnippets: [
|
||||
'Automatically switching from model-A to model-B',
|
||||
'Automatically switching from model-A to model-B for faster responses',
|
||||
'upgrade to a Gemini Code Assist Standard or Enterprise plan',
|
||||
],
|
||||
},
|
||||
{
|
||||
errorType: 'generic',
|
||||
tier: UserTierId.STANDARD, // Paid tier
|
||||
description: 'other error for LEGACY tier',
|
||||
tier: UserTierId.LEGACY, // Paid tier
|
||||
error: new Error('some error'),
|
||||
expectedMessageSnippets: [
|
||||
'Automatically switching from model-A to model-B',
|
||||
'Automatically switching from model-A to model-B for faster responses',
|
||||
'switch to using a paid API key from AI Studio',
|
||||
],
|
||||
},
|
||||
{
|
||||
errorType: 'other',
|
||||
description: 'retryable quota error for FREE tier',
|
||||
tier: UserTierId.FREE,
|
||||
error: new RetryableQuotaError(
|
||||
'retryable quota',
|
||||
mockGoogleApiError,
|
||||
5,
|
||||
),
|
||||
expectedMessageSnippets: [
|
||||
'Automatically switching from model-A to model-B for faster responses',
|
||||
'upgrade to a Gemini Code Assist Standard or Enterprise plan',
|
||||
'Your requests are being throttled right now due to server being at capacity for model-A',
|
||||
'Automatically switching from model-A to model-B',
|
||||
'upgrading to a Gemini Code Assist Standard or Enterprise plan',
|
||||
],
|
||||
},
|
||||
{
|
||||
errorType: 'other',
|
||||
description: 'retryable quota error for LEGACY tier',
|
||||
tier: UserTierId.LEGACY, // Paid tier
|
||||
error: new RetryableQuotaError(
|
||||
'retryable quota',
|
||||
mockGoogleApiError,
|
||||
5,
|
||||
),
|
||||
expectedMessageSnippets: [
|
||||
'Automatically switching from model-A to model-B for faster responses',
|
||||
'Your requests are being throttled right now due to server being at capacity for model-A',
|
||||
'Automatically switching from model-A to model-B',
|
||||
'switch to using a paid API key from AI Studio',
|
||||
],
|
||||
},
|
||||
];
|
||||
|
||||
for (const { errorType, tier, expectedMessageSnippets } of testCases) {
|
||||
it(`should handle ${errorType} error for ${tier} tier correctly`, async () => {
|
||||
mockedIsGenericQuotaExceededError.mockReturnValue(
|
||||
errorType === 'generic',
|
||||
);
|
||||
|
||||
for (const {
|
||||
description,
|
||||
tier,
|
||||
error,
|
||||
expectedMessageSnippets,
|
||||
} of testCases) {
|
||||
it(`should handle ${description} correctly`, async () => {
|
||||
const handler = getRegisteredHandler(tier);
|
||||
const result = await handler(
|
||||
'model-A',
|
||||
'model-B',
|
||||
new Error('quota exceeded'),
|
||||
);
|
||||
const result = await handler('model-A', 'model-B', error);
|
||||
|
||||
// Automatic fallbacks should return 'stop'
|
||||
expect(result).toBe('stop');
|
||||
@@ -207,10 +208,6 @@ describe('useQuotaAndFallback', () => {
|
||||
});
|
||||
|
||||
describe('Interactive Fallback (Pro Quota Error)', () => {
|
||||
beforeEach(() => {
|
||||
mockedIsProQuotaExceededError.mockReturnValue(true);
|
||||
});
|
||||
|
||||
it('should set an interactive request and wait for user choice', async () => {
|
||||
const { result } = renderHook(() =>
|
||||
useQuotaAndFallback({
|
||||
@@ -229,7 +226,7 @@ describe('useQuotaAndFallback', () => {
|
||||
const promise = handler(
|
||||
'gemini-pro',
|
||||
'gemini-flash',
|
||||
new Error('pro quota'),
|
||||
new TerminalQuotaError('pro quota', mockGoogleApiError),
|
||||
);
|
||||
|
||||
await act(async () => {});
|
||||
@@ -268,7 +265,7 @@ describe('useQuotaAndFallback', () => {
|
||||
const promise1 = handler(
|
||||
'gemini-pro',
|
||||
'gemini-flash',
|
||||
new Error('pro quota 1'),
|
||||
new TerminalQuotaError('pro quota 1', mockGoogleApiError),
|
||||
);
|
||||
await act(async () => {});
|
||||
|
||||
@@ -278,7 +275,7 @@ describe('useQuotaAndFallback', () => {
|
||||
const result2 = await handler(
|
||||
'gemini-pro',
|
||||
'gemini-flash',
|
||||
new Error('pro quota 2'),
|
||||
new TerminalQuotaError('pro quota 2', mockGoogleApiError),
|
||||
);
|
||||
|
||||
// The lock should have stopped the second request
|
||||
@@ -297,10 +294,6 @@ describe('useQuotaAndFallback', () => {
|
||||
});
|
||||
|
||||
describe('handleProQuotaChoice', () => {
|
||||
beforeEach(() => {
|
||||
mockedIsProQuotaExceededError.mockReturnValue(true);
|
||||
});
|
||||
|
||||
it('should do nothing if there is no pending pro quota request', () => {
|
||||
const { result } = renderHook(() =>
|
||||
useQuotaAndFallback({
|
||||
@@ -336,7 +329,7 @@ describe('useQuotaAndFallback', () => {
|
||||
const promise = handler(
|
||||
'gemini-pro',
|
||||
'gemini-flash',
|
||||
new Error('pro quota'),
|
||||
new TerminalQuotaError('pro quota', mockGoogleApiError),
|
||||
);
|
||||
await act(async () => {}); // Allow state to update
|
||||
|
||||
@@ -367,7 +360,7 @@ describe('useQuotaAndFallback', () => {
|
||||
const promise = handler(
|
||||
'gemini-pro',
|
||||
'gemini-flash',
|
||||
new Error('pro quota'),
|
||||
new TerminalQuotaError('pro quota', mockGoogleApiError),
|
||||
);
|
||||
await act(async () => {}); // Allow state to update
|
||||
|
||||
|
||||
@@ -9,9 +9,9 @@ import {
|
||||
type Config,
|
||||
type FallbackModelHandler,
|
||||
type FallbackIntent,
|
||||
isGenericQuotaExceededError,
|
||||
isProQuotaExceededError,
|
||||
TerminalQuotaError,
|
||||
UserTierId,
|
||||
RetryableQuotaError,
|
||||
} from '@google/gemini-cli-core';
|
||||
import { useCallback, useEffect, useRef, useState } from 'react';
|
||||
import { type UseHistoryManagerReturn } from './useHistoryManager.js';
|
||||
@@ -63,7 +63,7 @@ export function useQuotaAndFallback({
|
||||
|
||||
let message: string;
|
||||
|
||||
if (error && isProQuotaExceededError(error)) {
|
||||
if (error instanceof TerminalQuotaError) {
|
||||
// Pro Quota specific messages (Interactive)
|
||||
if (isPaidTier) {
|
||||
message = `⚡ You have reached your daily ${failedModel} quota limit.
|
||||
@@ -76,31 +76,30 @@ export function useQuotaAndFallback({
|
||||
⚡ Or you can utilize a Gemini API Key. See: https://goo.gle/gemini-cli-docs-auth#gemini-api-key
|
||||
⚡ You can switch authentication methods by typing /auth`;
|
||||
}
|
||||
} else if (error && isGenericQuotaExceededError(error)) {
|
||||
// Generic Quota (Automatic fallback)
|
||||
const actionMessage = `⚡ You have reached your daily quota limit.\n⚡ Automatically switching from ${failedModel} to ${fallbackModel} for the remainder of this session.`;
|
||||
} else if (error instanceof RetryableQuotaError) {
|
||||
// Short term quota retries exhausted (Automatic fallback)
|
||||
const actionMessage = `⚡ Your requests are being throttled right now due to server being at capacity for ${failedModel}.\n⚡ Automatically switching from ${failedModel} to ${fallbackModel} for the remainder of this session.`;
|
||||
|
||||
if (isPaidTier) {
|
||||
message = `${actionMessage}
|
||||
⚡ To continue accessing the ${failedModel} model today, consider using /auth to switch to using a paid API key from AI Studio at https://aistudio.google.com/apikey`;
|
||||
⚡ To continue accessing the ${failedModel} model, retry your request after some time or consider using /auth to switch to using a paid API key from AI Studio at https://aistudio.google.com/apikey`;
|
||||
} else {
|
||||
message = `${actionMessage}
|
||||
⚡ To increase your limits, upgrade to a Gemini Code Assist Standard or Enterprise plan with higher limits at https://goo.gle/set-up-gemini-code-assist
|
||||
⚡ Or you can utilize a Gemini API Key. See: https://goo.gle/gemini-cli-docs-auth#gemini-api-key
|
||||
⚡ Retry your requests after some time. Otherwise consider upgrading to a Gemini Code Assist Standard or Enterprise plan with higher limits at https://goo.gle/set-up-gemini-code-assist
|
||||
⚡ You can switch authentication methods by typing /auth`;
|
||||
}
|
||||
} else {
|
||||
// Consecutive 429s or other errors (Automatic fallback)
|
||||
// Other errors (Automatic fallback)
|
||||
const actionMessage = `⚡ Automatically switching from ${failedModel} to ${fallbackModel} for faster responses for the remainder of this session.`;
|
||||
|
||||
if (isPaidTier) {
|
||||
message = `${actionMessage}
|
||||
⚡ Possible reasons for this are that you have received multiple consecutive capacity errors or you have reached your daily ${failedModel} quota limit
|
||||
⚡ To continue accessing the ${failedModel} model today, consider using /auth to switch to using a paid API key from AI Studio at https://aistudio.google.com/apikey`;
|
||||
⚡ Your requests are being throttled temporarily due to server being at capacity for ${failedModel} or there is a service outage.
|
||||
⚡ To continue accessing the ${failedModel} model, you can retry your request after some time or consider using /auth to switch to using a paid API key from AI Studio at https://aistudio.google.com/apikey`;
|
||||
} else {
|
||||
message = `${actionMessage}
|
||||
⚡ Possible reasons for this are that you have received multiple consecutive capacity errors or you have reached your daily ${failedModel} quota limit
|
||||
⚡ To increase your limits, upgrade to a Gemini Code Assist Standard or Enterprise plan with higher limits at https://goo.gle/set-up-gemini-code-assist
|
||||
⚡ Your requests are being throttled temporarily due to server being at capacity for ${failedModel} or there is a service outage.
|
||||
⚡ To avoid being throttled, you can retry your request after some time or upgrade to a Gemini Code Assist Standard or Enterprise plan with higher limits at https://goo.gle/set-up-gemini-code-assist
|
||||
⚡ Or you can utilize a Gemini API Key. See: https://goo.gle/gemini-cli-docs-auth#gemini-api-key
|
||||
⚡ You can switch authentication methods by typing /auth`;
|
||||
}
|
||||
@@ -119,7 +118,7 @@ export function useQuotaAndFallback({
|
||||
config.setQuotaErrorOccurred(true);
|
||||
|
||||
// Interactive Fallback for Pro quota
|
||||
if (error && isProQuotaExceededError(error)) {
|
||||
if (error instanceof TerminalQuotaError) {
|
||||
if (isDialogPending.current) {
|
||||
return 'stop'; // A dialog is already active, so just stop this request.
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user