From 3bed217620b5fc005036d7f301598fe701fdc0e4 Mon Sep 17 00:00:00 2001 From: Adam Weidman Date: Tue, 12 May 2026 17:50:39 -0400 Subject: [PATCH] fix: prevent uncontrolled retry loop during fallback The retry/fallback path was re-enqueuing the pending user message without deduplication or delay when falling back to the same model, causing a multiplication loop. This fix ensures that we only retry immediately when transitioning to a new model, and respect the backoff delay when retrying the same model. TAG=agy CONV=37b3cdb6-8437-4ced-ae4e-3d04054bacb1 --- packages/core/src/fallback/handler.ts | 6 ++-- packages/core/src/utils/retry.test.ts | 45 +++++++++++++++++++++++++++ packages/core/src/utils/retry.ts | 38 ++++++++++++++++++++-- 3 files changed, 84 insertions(+), 5 deletions(-) diff --git a/packages/core/src/fallback/handler.ts b/packages/core/src/fallback/handler.ts index f216f9216c..2c4c25f5fe 100644 --- a/packages/core/src/fallback/handler.ts +++ b/packages/core/src/fallback/handler.ts @@ -130,19 +130,19 @@ async function processIntent( config: Config, intent: FallbackIntent | null, fallbackModel: string, -): Promise { +): Promise { switch (intent) { case 'retry_always': // TODO(telemetry): Implement generic fallback event logging. Existing // logFlashFallback is specific to a single Model. config.activateFallbackMode(fallbackModel); - return true; + return fallbackModel; case 'retry_once': // For distinct retry (retry_once), we do NOT set the active model permanently. // The FallbackStrategy will handle routing to the available model for this turn // based on the availability service state (which is updated before this). - return true; + return fallbackModel; case 'retry_with_credits': return true; diff --git a/packages/core/src/utils/retry.test.ts b/packages/core/src/utils/retry.test.ts index 290f14eadb..21a15e2976 100644 --- a/packages/core/src/utils/retry.test.ts +++ b/packages/core/src/utils/retry.test.ts @@ -665,6 +665,51 @@ describe('retryWithBackoff', () => { expect(mockFn).toHaveBeenCalledTimes(1); }, ); + + it('should wait before retrying when fallback returns same model and fails terminally', async () => { + let attempts = 0; + const mockFn = vi.fn().mockImplementation(async () => { + attempts++; + if (attempts > 2) { + throw new Error('Test terminated to prevent infinite loop'); + } + throw new TerminalQuotaError('Quota exhausted', {} as any); + }); + + const mockPolicy = { + model: 'same-model', + actions: {}, + stateTransitions: {}, + }; + const getAvailabilityContext = vi + .fn() + .mockReturnValue({ policy: mockPolicy }); + + const promise = retryWithBackoff(mockFn, { + maxAttempts: 2, + initialDelayMs: 10, + onPersistent429: async () => 'same-model', + getAvailabilityContext, + authType: 'oauth-personal', + }); + + // Handle rejection early to avoid unhandled rejection warnings + const catchPromise = promise.catch((e) => e); + + // At this point, it should have failed once and be waiting. + expect(mockFn).toHaveBeenCalledTimes(1); + + // We need to advance timers to allow retries + await vi.advanceTimersByTimeAsync(10); // 1st retry delay + await vi.advanceTimersByTimeAsync(20); // 2nd retry delay + + const result = await catchPromise; + expect(result).toBeInstanceOf(Error); + if (result instanceof Error) { + expect(result.message).toBe('Test terminated to prevent infinite loop'); + } + expect(mockFn).toHaveBeenCalledTimes(3); + }); }); it('should abort the retry loop when the signal is aborted', async () => { const abortController = new AbortController(); diff --git a/packages/core/src/utils/retry.ts b/packages/core/src/utils/retry.ts index a45ba0c0b0..5740162fd6 100644 --- a/packages/core/src/utils/retry.ts +++ b/packages/core/src/utils/retry.ts @@ -307,6 +307,9 @@ export async function retryWithBackoff( ) { if (onPersistent429) { try { + const currentContext = getAvailabilityContext?.(); + const currentModel = currentContext?.policy.model; + const fallbackModel = await onPersistent429( authType, classifiedError, @@ -314,7 +317,21 @@ export async function retryWithBackoff( if (fallbackModel) { attempt = 0; // Reset attempts and retry with the new model. currentDelay = initialDelayMs; - continue; + + // Only continue (immediate retry) if fallbackModel is a NEW model + if ( + typeof fallbackModel === 'string' && + fallbackModel !== currentModel + ) { + continue; + } else { + // If it's the same model (or a boolean retry signal), wait before retrying + const jitter = currentDelay * 0.3 * (Math.random() * 2 - 1); + const delayWithJitter = Math.max(0, currentDelay + jitter); + await delay(delayWithJitter, signal); + currentDelay = Math.min(maxDelayMs, currentDelay * 2); + continue; + } } } catch (fallbackError) { debugLogger.warn('Fallback to Flash model failed:', fallbackError); @@ -356,6 +373,9 @@ export async function retryWithBackoff( ); if (onPersistent429) { try { + const currentContext = getAvailabilityContext?.(); + const currentModel = currentContext?.policy.model; + const fallbackModel = await onPersistent429( authType, classifiedError, @@ -363,7 +383,21 @@ export async function retryWithBackoff( if (fallbackModel) { attempt = 0; // Reset attempts and retry with the new model. currentDelay = initialDelayMs; - continue; + + // Only continue (immediate retry) if fallbackModel is a NEW model + if ( + typeof fallbackModel === 'string' && + fallbackModel !== currentModel + ) { + continue; + } else { + // If it's the same model (or a boolean retry signal), wait before retrying + const jitter = currentDelay * 0.3 * (Math.random() * 2 - 1); + const delayWithJitter = Math.max(0, currentDelay + jitter); + await delay(delayWithJitter, signal); + currentDelay = Math.min(maxDelayMs, currentDelay * 2); + continue; + } } } catch (fallbackError) { debugLogger.warn('Model fallback failed:', fallbackError);