fix: handle request retries and model fallback correctly (#11624)

2026-04-05 19:01:12 -07:00 · 2025-10-24 11:09:06 -07:00
parent c2104a14fb
commit ee92db7533
14 changed files with 1357 additions and 804 deletions
--- a/packages/core/src/utils/flashFallback.test.ts
+++ b/packages/core/src/utils/flashFallback.test.ts
@@ -11,7 +11,6 @@ import {
  setSimulate429,
  disableSimulationAfterFallback,
  shouldSimulate429,
-  createSimulated429Error,
  resetRequestCounter,
 } from './testUtils.js';
 import { DEFAULT_GEMINI_FLASH_MODEL } from '../config/models.js';
@@ -19,12 +18,15 @@ import { retryWithBackoff } from './retry.js';
 import { AuthType } from '../core/contentGenerator.js';
 // Import the new types (Assuming this test file is in packages/core/src/utils/)
 import type { FallbackModelHandler } from '../fallback/types.js';
+import type { GoogleApiError } from './googleErrors.js';
+import { TerminalQuotaError } from './googleQuotaErrors.js';

 vi.mock('node:fs');

 // Update the description to reflect that this tests the retry utility's integration
 describe('Retry Utility Fallback Integration', () => {
  let config: Config;
+  let mockGoogleApiError: GoogleApiError;

  beforeEach(() => {
    vi.mocked(fs.existsSync).mockReturnValue(true);
@@ -38,6 +40,11 @@ describe('Retry Utility Fallback Integration', () => {
      cwd: '/test',
      model: 'gemini-2.5-pro',
    });
+    mockGoogleApiError = {
+      code: 429,
+      message: 'mock error',
+      details: [],
+    };

    // Reset simulation state for each test
    setSimulate429(false);
@@ -56,6 +63,7 @@ describe('Retry Utility Fallback Integration', () => {
    const result = await config.fallbackModelHandler!(
      'gemini-2.5-pro',
      DEFAULT_GEMINI_FLASH_MODEL,
+      new Error('test'),
    );

    // Verify it returns the correct intent
@@ -63,81 +71,61 @@ describe('Retry Utility Fallback Integration', () => {
  });

  // This test validates the retry utility's logic for triggering the callback.
-  it('should trigger onPersistent429 after 2 consecutive 429 errors for OAuth users', async () => {
+  it('should trigger onPersistent429 on TerminalQuotaError for OAuth users', async () => {
    let fallbackCalled = false;
-    // Removed fallbackModel variable as it's no longer relevant here.

-    // Mock function that simulates exactly 2 429 errors, then succeeds after fallback
    const mockApiCall = vi
      .fn()
-      .mockRejectedValueOnce(createSimulated429Error())
-      .mockRejectedValueOnce(createSimulated429Error())
+      .mockRejectedValueOnce(
+        new TerminalQuotaError('Daily limit', mockGoogleApiError),
+      )
+      .mockRejectedValueOnce(
+        new TerminalQuotaError('Daily limit', mockGoogleApiError),
+      )
      .mockResolvedValueOnce('success after fallback');

-    // Mock the onPersistent429 callback (this is what client.ts/geminiChat.ts provides)
    const mockPersistent429Callback = vi.fn(async (_authType?: string) => {
      fallbackCalled = true;
-      // Return true to signal retryWithBackoff to reset attempts and continue.
      return true;
    });

-    // Test with OAuth personal auth type, with maxAttempts = 2 to ensure fallback triggers
    const result = await retryWithBackoff(mockApiCall, {
      maxAttempts: 2,
      initialDelayMs: 1,
      maxDelayMs: 10,
-      shouldRetryOnError: (error: Error) => {
-        const status = (error as Error & { status?: number }).status;
-        return status === 429;
-      },
      onPersistent429: mockPersistent429Callback,
      authType: AuthType.LOGIN_WITH_GOOGLE,
    });

-    // Verify fallback mechanism was triggered
    expect(fallbackCalled).toBe(true);
    expect(mockPersistent429Callback).toHaveBeenCalledWith(
      AuthType.LOGIN_WITH_GOOGLE,
-      expect.any(Error),
+      expect.any(TerminalQuotaError),
    );
    expect(result).toBe('success after fallback');
-    // Should have: 2 failures, then fallback triggered, then 1 success after retry reset
    expect(mockApiCall).toHaveBeenCalledTimes(3);
  });

  it('should not trigger onPersistent429 for API key users', async () => {
-    let fallbackCalled = false;
+    const fallbackCallback = vi.fn();

-    // Mock function that simulates 429 errors
-    const mockApiCall = vi.fn().mockRejectedValue(createSimulated429Error());
+    const mockApiCall = vi
+      .fn()
+      .mockRejectedValueOnce(
+        new TerminalQuotaError('Daily limit', mockGoogleApiError),
+      );

-    // Mock the callback
-    const mockPersistent429Callback = vi.fn(async () => {
-      fallbackCalled = true;
-      return true;
+    const promise = retryWithBackoff(mockApiCall, {
+      maxAttempts: 2,
+      initialDelayMs: 1,
+      maxDelayMs: 10,
+      onPersistent429: fallbackCallback,
+      authType: AuthType.USE_GEMINI, // API key auth type
    });

-    // Test with API key auth type - should not trigger fallback
-    try {
-      await retryWithBackoff(mockApiCall, {
-        maxAttempts: 5,
-        initialDelayMs: 10,
-        maxDelayMs: 100,
-        shouldRetryOnError: (error: Error) => {
-          const status = (error as Error & { status?: number }).status;
-          return status === 429;
-        },
-        onPersistent429: mockPersistent429Callback,
-        authType: AuthType.USE_GEMINI, // API key auth type
-      });
-    } catch (error) {
-      // Expected to throw after max attempts
-      expect((error as Error).message).toContain('Rate limit exceeded');
-    }
-
-    // Verify fallback was NOT triggered for API key users
-    expect(fallbackCalled).toBe(false);
-    expect(mockPersistent429Callback).not.toHaveBeenCalled();
+    await expect(promise).rejects.toThrow('Daily limit');
+    expect(fallbackCallback).not.toHaveBeenCalled();
+    expect(mockApiCall).toHaveBeenCalledTimes(1);
  });

  // This test validates the test utilities themselves.