fix: prevent uncontrolled retry loop during fallback

The retry/fallback path was re-enqueuing the pending user message without deduplication or delay when falling back to the same model, causing a multiplication loop. This fix ensures that we only retry immediately when transitioning to a new model, and respect the backoff delay when retrying the same model. TAG=agy CONV=37b3cdb6-8437-4ced-ae4e-3d04054bacb1
2026-05-15 06:12:50 -07:00 · 2026-05-12 17:50:39 -04:00
parent 79ec47284a
commit 3bed217620
3 changed files with 84 additions and 5 deletions
@@ -130,19 +130,19 @@ async function processIntent(
  config: Config,
  intent: FallbackIntent | null,
  fallbackModel: string,
-): Promise<boolean> {
+): Promise<string | boolean> {
  switch (intent) {
    case 'retry_always':
      // TODO(telemetry): Implement generic fallback event logging. Existing
      // logFlashFallback is specific to a single Model.
      config.activateFallbackMode(fallbackModel);
-      return true;
+      return fallbackModel;

    case 'retry_once':
      // For distinct retry (retry_once), we do NOT set the active model permanently.
      // The FallbackStrategy will handle routing to the available model for this turn
      // based on the availability service state (which is updated before this).
-      return true;
+      return fallbackModel;

    case 'retry_with_credits':
      return true;
@@ -665,6 +665,51 @@ describe('retryWithBackoff', () => {
        expect(mockFn).toHaveBeenCalledTimes(1);
      },
    );
+
+    it('should wait before retrying when fallback returns same model and fails terminally', async () => {
+      let attempts = 0;
+      const mockFn = vi.fn().mockImplementation(async () => {
+        attempts++;
+        if (attempts > 2) {
+          throw new Error('Test terminated to prevent infinite loop');
+        }
+        throw new TerminalQuotaError('Quota exhausted', {} as any);
+      });
+
+      const mockPolicy = {
+        model: 'same-model',
+        actions: {},
+        stateTransitions: {},
+      };
+      const getAvailabilityContext = vi
+        .fn()
+        .mockReturnValue({ policy: mockPolicy });
+
+      const promise = retryWithBackoff(mockFn, {
+        maxAttempts: 2,
+        initialDelayMs: 10,
+        onPersistent429: async () => 'same-model',
+        getAvailabilityContext,
+        authType: 'oauth-personal',
+      });
+
+      // Handle rejection early to avoid unhandled rejection warnings
+      const catchPromise = promise.catch((e) => e);
+
+      // At this point, it should have failed once and be waiting.
+      expect(mockFn).toHaveBeenCalledTimes(1);
+
+      // We need to advance timers to allow retries
+      await vi.advanceTimersByTimeAsync(10); // 1st retry delay
+      await vi.advanceTimersByTimeAsync(20); // 2nd retry delay
+
+      const result = await catchPromise;
+      expect(result).toBeInstanceOf(Error);
+      if (result instanceof Error) {
+        expect(result.message).toBe('Test terminated to prevent infinite loop');
+      }
+      expect(mockFn).toHaveBeenCalledTimes(3);
+    });
  });
  it('should abort the retry loop when the signal is aborted', async () => {
    const abortController = new AbortController();
@@ -307,6 +307,9 @@ export async function retryWithBackoff<T>(
      ) {
        if (onPersistent429) {
          try {
+            const currentContext = getAvailabilityContext?.();
+            const currentModel = currentContext?.policy.model;
+
            const fallbackModel = await onPersistent429(
              authType,
              classifiedError,
@@ -314,7 +317,21 @@ export async function retryWithBackoff<T>(
            if (fallbackModel) {
              attempt = 0; // Reset attempts and retry with the new model.
              currentDelay = initialDelayMs;
-              continue;
+
+              // Only continue (immediate retry) if fallbackModel is a NEW model
+              if (
+                typeof fallbackModel === 'string' &&
+                fallbackModel !== currentModel
+              ) {
+                continue;
+              } else {
+                // If it's the same model (or a boolean retry signal), wait before retrying
+                const jitter = currentDelay * 0.3 * (Math.random() * 2 - 1);
+                const delayWithJitter = Math.max(0, currentDelay + jitter);
+                await delay(delayWithJitter, signal);
+                currentDelay = Math.min(maxDelayMs, currentDelay * 2);
+                continue;
+              }
            }
          } catch (fallbackError) {
            debugLogger.warn('Fallback to Flash model failed:', fallbackError);
@@ -356,6 +373,9 @@ export async function retryWithBackoff<T>(
          );
          if (onPersistent429) {
            try {
+              const currentContext = getAvailabilityContext?.();
+              const currentModel = currentContext?.policy.model;
+
              const fallbackModel = await onPersistent429(
                authType,
                classifiedError,
@@ -363,7 +383,21 @@ export async function retryWithBackoff<T>(
              if (fallbackModel) {
                attempt = 0; // Reset attempts and retry with the new model.
                currentDelay = initialDelayMs;
-                continue;
+
+                // Only continue (immediate retry) if fallbackModel is a NEW model
+                if (
+                  typeof fallbackModel === 'string' &&
+                  fallbackModel !== currentModel
+                ) {
+                  continue;
+                } else {
+                  // If it's the same model (or a boolean retry signal), wait before retrying
+                  const jitter = currentDelay * 0.3 * (Math.random() * 2 - 1);
+                  const delayWithJitter = Math.max(0, currentDelay + jitter);
+                  await delay(delayWithJitter, signal);
+                  currentDelay = Math.min(maxDelayMs, currentDelay * 2);
+                  continue;
+                }
              }
            } catch (fallbackError) {
              debugLogger.warn('Model fallback failed:', fallbackError);