fix: handle request retries and model fallback correctly (#11624)

2026-04-27 05:24:34 -07:00 · 2025-10-24 11:09:06 -07:00
parent c2104a14fb
commit ee92db7533
14 changed files with 1357 additions and 804 deletions
@@ -6,9 +6,7 @@

 import { describe, it, expect } from 'vitest';
 import { parseAndFormatApiError } from './errorParsing.js';
-import { isProQuotaExceededError } from './quotaErrorDetection.js';
 import { DEFAULT_GEMINI_FLASH_MODEL } from '../config/models.js';
-import { UserTierId } from '../code_assist/types.js';
 import { AuthType } from '../core/contentGenerator.js';
 import type { StructuredError } from '../core/turn.js';

@@ -40,22 +38,6 @@ describe('parseAndFormatApiError', () => {
    );
  });

-  it('should format a 429 API error with the personal message', () => {
-    const errorMessage =
-      'got status: 429 Too Many Requests. {"error":{"code":429,"message":"Rate limit exceeded","status":"RESOURCE_EXHAUSTED"}}';
-    const result = parseAndFormatApiError(
-      errorMessage,
-      AuthType.LOGIN_WITH_GOOGLE,
-      undefined,
-      'gemini-2.5-pro',
-      DEFAULT_GEMINI_FLASH_MODEL,
-    );
-    expect(result).toContain('[API Error: Rate limit exceeded');
-    expect(result).toContain(
-      'Possible quota limitations in place or slow response times detected. Switching to the gemini-2.5-flash model',
-    );
-  });
-
  it('should format a 429 API error with the vertex message', () => {
    const errorMessage =
      'got status: 429 Too Many Requests. {"error":{"code":429,"message":"Rate limit exceeded","status":"RESOURCE_EXHAUSTED"}}';
@@ -132,230 +114,4 @@ describe('parseAndFormatApiError', () => {
    const expected = '[API Error: An unknown error occurred.]';
    expect(parseAndFormatApiError(error)).toBe(expected);
  });
-
-  it('should format a 429 API error with Pro quota exceeded message for Google auth (Free tier)', () => {
-    const errorMessage =
-      'got status: 429 Too Many Requests. {"error":{"code":429,"message":"Quota exceeded for quota metric \'Gemini 2.5 Pro Requests\' and limit \'RequestsPerDay\' of service \'generativelanguage.googleapis.com\' for consumer \'project_number:123456789\'.","status":"RESOURCE_EXHAUSTED"}}';
-    const result = parseAndFormatApiError(
-      errorMessage,
-      AuthType.LOGIN_WITH_GOOGLE,
-      undefined,
-      'gemini-2.5-pro',
-      DEFAULT_GEMINI_FLASH_MODEL,
-    );
-    expect(result).toContain(
-      "[API Error: Quota exceeded for quota metric 'Gemini 2.5 Pro Requests'",
-    );
-    expect(result).toContain(
-      'You have reached your daily gemini-2.5-pro quota limit',
-    );
-    expect(result).toContain('upgrade to get higher limits');
-  });
-
-  it('should format a regular 429 API error with standard message for Google auth', () => {
-    const errorMessage =
-      'got status: 429 Too Many Requests. {"error":{"code":429,"message":"Rate limit exceeded","status":"RESOURCE_EXHAUSTED"}}';
-    const result = parseAndFormatApiError(
-      errorMessage,
-      AuthType.LOGIN_WITH_GOOGLE,
-      undefined,
-      'gemini-2.5-pro',
-      DEFAULT_GEMINI_FLASH_MODEL,
-    );
-    expect(result).toContain('[API Error: Rate limit exceeded');
-    expect(result).toContain(
-      'Possible quota limitations in place or slow response times detected. Switching to the gemini-2.5-flash model',
-    );
-    expect(result).not.toContain(
-      'You have reached your daily gemini-2.5-pro quota limit',
-    );
-  });
-
-  it('should format a 429 API error with generic quota exceeded message for Google auth', () => {
-    const errorMessage =
-      'got status: 429 Too Many Requests. {"error":{"code":429,"message":"Quota exceeded for quota metric \'GenerationRequests\' and limit \'RequestsPerDay\' of service \'generativelanguage.googleapis.com\' for consumer \'project_number:123456789\'.","status":"RESOURCE_EXHAUSTED"}}';
-    const result = parseAndFormatApiError(
-      errorMessage,
-      AuthType.LOGIN_WITH_GOOGLE,
-      undefined,
-      'gemini-2.5-pro',
-      DEFAULT_GEMINI_FLASH_MODEL,
-    );
-    expect(result).toContain(
-      "[API Error: Quota exceeded for quota metric 'GenerationRequests'",
-    );
-    expect(result).toContain('You have reached your daily quota limit');
-    expect(result).not.toContain(
-      'You have reached your daily Gemini 2.5 Pro quota limit',
-    );
-  });
-
-  it('should prioritize Pro quota message over generic quota message for Google auth', () => {
-    const errorMessage =
-      'got status: 429 Too Many Requests. {"error":{"code":429,"message":"Quota exceeded for quota metric \'Gemini 2.5 Pro Requests\' and limit \'RequestsPerDay\' of service \'generativelanguage.googleapis.com\' for consumer \'project_number:123456789\'.","status":"RESOURCE_EXHAUSTED"}}';
-    const result = parseAndFormatApiError(
-      errorMessage,
-      AuthType.LOGIN_WITH_GOOGLE,
-      undefined,
-      'gemini-2.5-pro',
-      DEFAULT_GEMINI_FLASH_MODEL,
-    );
-    expect(result).toContain(
-      "[API Error: Quota exceeded for quota metric 'Gemini 2.5 Pro Requests'",
-    );
-    expect(result).toContain(
-      'You have reached your daily gemini-2.5-pro quota limit',
-    );
-    expect(result).not.toContain('You have reached your daily quota limit');
-  });
-
-  it('should format a 429 API error with Pro quota exceeded message for Google auth (Standard tier)', () => {
-    const errorMessage =
-      'got status: 429 Too Many Requests. {"error":{"code":429,"message":"Quota exceeded for quota metric \'Gemini 2.5 Pro Requests\' and limit \'RequestsPerDay\' of service \'generativelanguage.googleapis.com\' for consumer \'project_number:123456789\'.","status":"RESOURCE_EXHAUSTED"}}';
-    const result = parseAndFormatApiError(
-      errorMessage,
-      AuthType.LOGIN_WITH_GOOGLE,
-      UserTierId.STANDARD,
-      'gemini-2.5-pro',
-      DEFAULT_GEMINI_FLASH_MODEL,
-    );
-    expect(result).toContain(
-      "[API Error: Quota exceeded for quota metric 'Gemini 2.5 Pro Requests'",
-    );
-    expect(result).toContain(
-      'You have reached your daily gemini-2.5-pro quota limit',
-    );
-    expect(result).toContain(
-      'We appreciate you for choosing Gemini Code Assist and the Gemini CLI',
-    );
-    expect(result).not.toContain('upgrade to get higher limits');
-  });
-
-  it('should format a 429 API error with Pro quota exceeded message for Google auth (Legacy tier)', () => {
-    const errorMessage =
-      'got status: 429 Too Many Requests. {"error":{"code":429,"message":"Quota exceeded for quota metric \'Gemini 2.5 Pro Requests\' and limit \'RequestsPerDay\' of service \'generativelanguage.googleapis.com\' for consumer \'project_number:123456789\'.","status":"RESOURCE_EXHAUSTED"}}';
-    const result = parseAndFormatApiError(
-      errorMessage,
-      AuthType.LOGIN_WITH_GOOGLE,
-      UserTierId.LEGACY,
-      'gemini-2.5-pro',
-      DEFAULT_GEMINI_FLASH_MODEL,
-    );
-    expect(result).toContain(
-      "[API Error: Quota exceeded for quota metric 'Gemini 2.5 Pro Requests'",
-    );
-    expect(result).toContain(
-      'You have reached your daily gemini-2.5-pro quota limit',
-    );
-    expect(result).toContain(
-      'We appreciate you for choosing Gemini Code Assist and the Gemini CLI',
-    );
-    expect(result).not.toContain('upgrade to get higher limits');
-  });
-
-  it('should handle different Gemini 2.5 version strings in Pro quota exceeded errors', () => {
-    const errorMessage25 =
-      'got status: 429 Too Many Requests. {"error":{"code":429,"message":"Quota exceeded for quota metric \'Gemini 2.5 Pro Requests\' and limit \'RequestsPerDay\' of service \'generativelanguage.googleapis.com\' for consumer \'project_number:123456789\'.","status":"RESOURCE_EXHAUSTED"}}';
-    const errorMessagePreview =
-      'got status: 429 Too Many Requests. {"error":{"code":429,"message":"Quota exceeded for quota metric \'Gemini 2.5-preview Pro Requests\' and limit \'RequestsPerDay\' of service \'generativelanguage.googleapis.com\' for consumer \'project_number:123456789\'.","status":"RESOURCE_EXHAUSTED"}}';
-
-    const result25 = parseAndFormatApiError(
-      errorMessage25,
-      AuthType.LOGIN_WITH_GOOGLE,
-      undefined,
-      'gemini-2.5-pro',
-      DEFAULT_GEMINI_FLASH_MODEL,
-    );
-    const resultPreview = parseAndFormatApiError(
-      errorMessagePreview,
-      AuthType.LOGIN_WITH_GOOGLE,
-      undefined,
-      'gemini-2.5-preview-pro',
-      DEFAULT_GEMINI_FLASH_MODEL,
-    );
-
-    expect(result25).toContain(
-      'You have reached your daily gemini-2.5-pro quota limit',
-    );
-    expect(resultPreview).toContain(
-      'You have reached your daily gemini-2.5-preview-pro quota limit',
-    );
-    expect(result25).toContain('upgrade to get higher limits');
-    expect(resultPreview).toContain('upgrade to get higher limits');
-  });
-
-  it('should not match non-Pro models with similar version strings', () => {
-    // Test that Flash models with similar version strings don't match
-    expect(
-      isProQuotaExceededError(
-        "Quota exceeded for quota metric 'Gemini 2.5 Flash Requests' and limit",
-      ),
-    ).toBe(false);
-    expect(
-      isProQuotaExceededError(
-        "Quota exceeded for quota metric 'Gemini 2.5-preview Flash Requests' and limit",
-      ),
-    ).toBe(false);
-
-    // Test other model types
-    expect(
-      isProQuotaExceededError(
-        "Quota exceeded for quota metric 'Gemini 2.5 Ultra Requests' and limit",
-      ),
-    ).toBe(false);
-    expect(
-      isProQuotaExceededError(
-        "Quota exceeded for quota metric 'Gemini 2.5 Standard Requests' and limit",
-      ),
-    ).toBe(false);
-
-    // Test generic quota messages
-    expect(
-      isProQuotaExceededError(
-        "Quota exceeded for quota metric 'GenerationRequests' and limit",
-      ),
-    ).toBe(false);
-    expect(
-      isProQuotaExceededError(
-        "Quota exceeded for quota metric 'EmbeddingRequests' and limit",
-      ),
-    ).toBe(false);
-  });
-
-  it('should format a generic quota exceeded message for Google auth (Standard tier)', () => {
-    const errorMessage =
-      'got status: 429 Too Many Requests. {"error":{"code":429,"message":"Quota exceeded for quota metric \'GenerationRequests\' and limit \'RequestsPerDay\' of service \'generativelanguage.googleapis.com\' for consumer \'project_number:123456789\'.","status":"RESOURCE_EXHAUSTED"}}';
-    const result = parseAndFormatApiError(
-      errorMessage,
-      AuthType.LOGIN_WITH_GOOGLE,
-      UserTierId.STANDARD,
-      'gemini-2.5-pro',
-      DEFAULT_GEMINI_FLASH_MODEL,
-    );
-    expect(result).toContain(
-      "[API Error: Quota exceeded for quota metric 'GenerationRequests'",
-    );
-    expect(result).toContain('You have reached your daily quota limit');
-    expect(result).toContain(
-      'We appreciate you for choosing Gemini Code Assist and the Gemini CLI',
-    );
-    expect(result).not.toContain('upgrade to get higher limits');
-  });
-
-  it('should format a regular 429 API error with standard message for Google auth (Standard tier)', () => {
-    const errorMessage =
-      'got status: 429 Too Many Requests. {"error":{"code":429,"message":"Rate limit exceeded","status":"RESOURCE_EXHAUSTED"}}';
-    const result = parseAndFormatApiError(
-      errorMessage,
-      AuthType.LOGIN_WITH_GOOGLE,
-      UserTierId.STANDARD,
-      'gemini-2.5-pro',
-      DEFAULT_GEMINI_FLASH_MODEL,
-    );
-    expect(result).toContain('[API Error: Rate limit exceeded');
-    expect(result).toContain(
-      'We appreciate you for choosing Gemini Code Assist and the Gemini CLI',
-    );
-    expect(result).not.toContain('upgrade to get higher limits');
-  });
 });