mirror of
https://github.com/google-gemini/gemini-cli.git
synced 2026-05-15 06:12:50 -07:00
fix: prevent uncontrolled retry loop during fallback
The retry/fallback path was re-enqueuing the pending user message without deduplication or delay when falling back to the same model, causing a multiplication loop. This fix ensures that we only retry immediately when transitioning to a new model, and respect the backoff delay when retrying the same model. TAG=agy CONV=37b3cdb6-8437-4ced-ae4e-3d04054bacb1
This commit is contained in:
@@ -130,19 +130,19 @@ async function processIntent(
|
||||
config: Config,
|
||||
intent: FallbackIntent | null,
|
||||
fallbackModel: string,
|
||||
): Promise<boolean> {
|
||||
): Promise<string | boolean> {
|
||||
switch (intent) {
|
||||
case 'retry_always':
|
||||
// TODO(telemetry): Implement generic fallback event logging. Existing
|
||||
// logFlashFallback is specific to a single Model.
|
||||
config.activateFallbackMode(fallbackModel);
|
||||
return true;
|
||||
return fallbackModel;
|
||||
|
||||
case 'retry_once':
|
||||
// For distinct retry (retry_once), we do NOT set the active model permanently.
|
||||
// The FallbackStrategy will handle routing to the available model for this turn
|
||||
// based on the availability service state (which is updated before this).
|
||||
return true;
|
||||
return fallbackModel;
|
||||
|
||||
case 'retry_with_credits':
|
||||
return true;
|
||||
|
||||
@@ -665,6 +665,51 @@ describe('retryWithBackoff', () => {
|
||||
expect(mockFn).toHaveBeenCalledTimes(1);
|
||||
},
|
||||
);
|
||||
|
||||
it('should wait before retrying when fallback returns same model and fails terminally', async () => {
|
||||
let attempts = 0;
|
||||
const mockFn = vi.fn().mockImplementation(async () => {
|
||||
attempts++;
|
||||
if (attempts > 2) {
|
||||
throw new Error('Test terminated to prevent infinite loop');
|
||||
}
|
||||
throw new TerminalQuotaError('Quota exhausted', {} as any);
|
||||
});
|
||||
|
||||
const mockPolicy = {
|
||||
model: 'same-model',
|
||||
actions: {},
|
||||
stateTransitions: {},
|
||||
};
|
||||
const getAvailabilityContext = vi
|
||||
.fn()
|
||||
.mockReturnValue({ policy: mockPolicy });
|
||||
|
||||
const promise = retryWithBackoff(mockFn, {
|
||||
maxAttempts: 2,
|
||||
initialDelayMs: 10,
|
||||
onPersistent429: async () => 'same-model',
|
||||
getAvailabilityContext,
|
||||
authType: 'oauth-personal',
|
||||
});
|
||||
|
||||
// Handle rejection early to avoid unhandled rejection warnings
|
||||
const catchPromise = promise.catch((e) => e);
|
||||
|
||||
// At this point, it should have failed once and be waiting.
|
||||
expect(mockFn).toHaveBeenCalledTimes(1);
|
||||
|
||||
// We need to advance timers to allow retries
|
||||
await vi.advanceTimersByTimeAsync(10); // 1st retry delay
|
||||
await vi.advanceTimersByTimeAsync(20); // 2nd retry delay
|
||||
|
||||
const result = await catchPromise;
|
||||
expect(result).toBeInstanceOf(Error);
|
||||
if (result instanceof Error) {
|
||||
expect(result.message).toBe('Test terminated to prevent infinite loop');
|
||||
}
|
||||
expect(mockFn).toHaveBeenCalledTimes(3);
|
||||
});
|
||||
});
|
||||
it('should abort the retry loop when the signal is aborted', async () => {
|
||||
const abortController = new AbortController();
|
||||
|
||||
@@ -307,6 +307,9 @@ export async function retryWithBackoff<T>(
|
||||
) {
|
||||
if (onPersistent429) {
|
||||
try {
|
||||
const currentContext = getAvailabilityContext?.();
|
||||
const currentModel = currentContext?.policy.model;
|
||||
|
||||
const fallbackModel = await onPersistent429(
|
||||
authType,
|
||||
classifiedError,
|
||||
@@ -314,7 +317,21 @@ export async function retryWithBackoff<T>(
|
||||
if (fallbackModel) {
|
||||
attempt = 0; // Reset attempts and retry with the new model.
|
||||
currentDelay = initialDelayMs;
|
||||
continue;
|
||||
|
||||
// Only continue (immediate retry) if fallbackModel is a NEW model
|
||||
if (
|
||||
typeof fallbackModel === 'string' &&
|
||||
fallbackModel !== currentModel
|
||||
) {
|
||||
continue;
|
||||
} else {
|
||||
// If it's the same model (or a boolean retry signal), wait before retrying
|
||||
const jitter = currentDelay * 0.3 * (Math.random() * 2 - 1);
|
||||
const delayWithJitter = Math.max(0, currentDelay + jitter);
|
||||
await delay(delayWithJitter, signal);
|
||||
currentDelay = Math.min(maxDelayMs, currentDelay * 2);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
} catch (fallbackError) {
|
||||
debugLogger.warn('Fallback to Flash model failed:', fallbackError);
|
||||
@@ -356,6 +373,9 @@ export async function retryWithBackoff<T>(
|
||||
);
|
||||
if (onPersistent429) {
|
||||
try {
|
||||
const currentContext = getAvailabilityContext?.();
|
||||
const currentModel = currentContext?.policy.model;
|
||||
|
||||
const fallbackModel = await onPersistent429(
|
||||
authType,
|
||||
classifiedError,
|
||||
@@ -363,7 +383,21 @@ export async function retryWithBackoff<T>(
|
||||
if (fallbackModel) {
|
||||
attempt = 0; // Reset attempts and retry with the new model.
|
||||
currentDelay = initialDelayMs;
|
||||
continue;
|
||||
|
||||
// Only continue (immediate retry) if fallbackModel is a NEW model
|
||||
if (
|
||||
typeof fallbackModel === 'string' &&
|
||||
fallbackModel !== currentModel
|
||||
) {
|
||||
continue;
|
||||
} else {
|
||||
// If it's the same model (or a boolean retry signal), wait before retrying
|
||||
const jitter = currentDelay * 0.3 * (Math.random() * 2 - 1);
|
||||
const delayWithJitter = Math.max(0, currentDelay + jitter);
|
||||
await delay(delayWithJitter, signal);
|
||||
currentDelay = Math.min(maxDelayMs, currentDelay * 2);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
} catch (fallbackError) {
|
||||
debugLogger.warn('Model fallback failed:', fallbackError);
|
||||
|
||||
Reference in New Issue
Block a user