From 3bed217620b5fc005036d7f301598fe701fdc0e4 Mon Sep 17 00:00:00 2001
From: Adam Weidman <adamfweidman@google.com>
Date: Tue, 12 May 2026 17:50:39 -0400
Subject: [PATCH] fix: prevent uncontrolled retry loop during fallback

The retry/fallback path was re-enqueuing the pending user message without deduplication or delay when falling back to the same model, causing a multiplication loop. This fix ensures that we only retry immediately when transitioning to a new model, and respect the backoff delay when retrying the same model.

TAG=agy

CONV=37b3cdb6-8437-4ced-ae4e-3d04054bacb1
---
 packages/core/src/fallback/handler.ts |  6 ++--
 packages/core/src/utils/retry.test.ts | 45 +++++++++++++++++++++++++++
 packages/core/src/utils/retry.ts      | 38 ++++++++++++++++++++--
 3 files changed, 84 insertions(+), 5 deletions(-)
diff --git a/packages/core/src/fallback/handler.ts b/packages/core/src/fallback/handler.ts
index f216f9216c..2c4c25f5fe 100644
--- a/packages/core/src/fallback/handler.ts
+++ b/packages/core/src/fallback/handler.ts
@@ -130,19 +130,19 @@ async function processIntent(
   config: Config,
   intent: FallbackIntent | null,
   fallbackModel: string,
-): Promise<boolean> {
+): Promise<string | boolean> {
   switch (intent) {
     case 'retry_always':
       // TODO(telemetry): Implement generic fallback event logging. Existing
       // logFlashFallback is specific to a single Model.
       config.activateFallbackMode(fallbackModel);
-      return true;
+      return fallbackModel;
 
     case 'retry_once':
       // For distinct retry (retry_once), we do NOT set the active model permanently.
       // The FallbackStrategy will handle routing to the available model for this turn
       // based on the availability service state (which is updated before this).
-      return true;
+      return fallbackModel;
 
     case 'retry_with_credits':
       return true;
diff --git a/packages/core/src/utils/retry.test.ts b/packages/core/src/utils/retry.test.ts
index 290f14eadb..21a15e2976 100644
--- a/packages/core/src/utils/retry.test.ts
+++ b/packages/core/src/utils/retry.test.ts
@@ -665,6 +665,51 @@ describe('retryWithBackoff', () => {
         expect(mockFn).toHaveBeenCalledTimes(1);
       },
     );
+
+    it('should wait before retrying when fallback returns same model and fails terminally', async () => {
+      let attempts = 0;
+      const mockFn = vi.fn().mockImplementation(async () => {
+        attempts++;
+        if (attempts > 2) {
+          throw new Error('Test terminated to prevent infinite loop');
+        }
+        throw new TerminalQuotaError('Quota exhausted', {} as any);
+      });
+
+      const mockPolicy = {
+        model: 'same-model',
+        actions: {},
+        stateTransitions: {},
+      };
+      const getAvailabilityContext = vi
+        .fn()
+        .mockReturnValue({ policy: mockPolicy });
+
+      const promise = retryWithBackoff(mockFn, {
+        maxAttempts: 2,
+        initialDelayMs: 10,
+        onPersistent429: async () => 'same-model',
+        getAvailabilityContext,
+        authType: 'oauth-personal',
+      });
+
+      // Handle rejection early to avoid unhandled rejection warnings
+      const catchPromise = promise.catch((e) => e);
+
+      // At this point, it should have failed once and be waiting.
+      expect(mockFn).toHaveBeenCalledTimes(1);
+
+      // We need to advance timers to allow retries
+      await vi.advanceTimersByTimeAsync(10); // 1st retry delay
+      await vi.advanceTimersByTimeAsync(20); // 2nd retry delay
+
+      const result = await catchPromise;
+      expect(result).toBeInstanceOf(Error);
+      if (result instanceof Error) {
+        expect(result.message).toBe('Test terminated to prevent infinite loop');
+      }
+      expect(mockFn).toHaveBeenCalledTimes(3);
+    });
   });
   it('should abort the retry loop when the signal is aborted', async () => {
     const abortController = new AbortController();
diff --git a/packages/core/src/utils/retry.ts b/packages/core/src/utils/retry.ts
index a45ba0c0b0..5740162fd6 100644
--- a/packages/core/src/utils/retry.ts
+++ b/packages/core/src/utils/retry.ts
@@ -307,6 +307,9 @@ export async function retryWithBackoff<T>(
       ) {
         if (onPersistent429) {
           try {
+            const currentContext = getAvailabilityContext?.();
+            const currentModel = currentContext?.policy.model;
+
             const fallbackModel = await onPersistent429(
               authType,
               classifiedError,
@@ -314,7 +317,21 @@ export async function retryWithBackoff<T>(
             if (fallbackModel) {
               attempt = 0; // Reset attempts and retry with the new model.
               currentDelay = initialDelayMs;
-              continue;
+
+              // Only continue (immediate retry) if fallbackModel is a NEW model
+              if (
+                typeof fallbackModel === 'string' &&
+                fallbackModel !== currentModel
+              ) {
+                continue;
+              } else {
+                // If it's the same model (or a boolean retry signal), wait before retrying
+                const jitter = currentDelay * 0.3 * (Math.random() * 2 - 1);
+                const delayWithJitter = Math.max(0, currentDelay + jitter);
+                await delay(delayWithJitter, signal);
+                currentDelay = Math.min(maxDelayMs, currentDelay * 2);
+                continue;
+              }
             }
           } catch (fallbackError) {
             debugLogger.warn('Fallback to Flash model failed:', fallbackError);
@@ -356,6 +373,9 @@ export async function retryWithBackoff<T>(
           );
           if (onPersistent429) {
             try {
+              const currentContext = getAvailabilityContext?.();
+              const currentModel = currentContext?.policy.model;
+
               const fallbackModel = await onPersistent429(
                 authType,
                 classifiedError,
@@ -363,7 +383,21 @@ export async function retryWithBackoff<T>(
               if (fallbackModel) {
                 attempt = 0; // Reset attempts and retry with the new model.
                 currentDelay = initialDelayMs;
-                continue;
+
+                // Only continue (immediate retry) if fallbackModel is a NEW model
+                if (
+                  typeof fallbackModel === 'string' &&
+                  fallbackModel !== currentModel
+                ) {
+                  continue;
+                } else {
+                  // If it's the same model (or a boolean retry signal), wait before retrying
+                  const jitter = currentDelay * 0.3 * (Math.random() * 2 - 1);
+                  const delayWithJitter = Math.max(0, currentDelay + jitter);
+                  await delay(delayWithJitter, signal);
+                  currentDelay = Math.min(maxDelayMs, currentDelay * 2);
+                  continue;
+                }
               }
             } catch (fallbackError) {
               debugLogger.warn('Model fallback failed:', fallbackError);