feat: launch Gemini 3 Flash in Gemini CLI ⚡️⚡️⚡️ (#15196)

Co-authored-by: gemini-cli-robot <gemini-cli-robot@google.com> Co-authored-by: joshualitt <joshualitt@google.com> Co-authored-by: Sehoon Shon <sshon@google.com> Co-authored-by: Adam Weidman <65992621+adamfweidman@users.noreply.github.com> Co-authored-by: Adib234 <30782825+Adib234@users.noreply.github.com> Co-authored-by: Jenna Inouye <jinouye@google.com>
2026-05-13 05:12:55 -07:00 · 2025-12-17 09:43:21 -08:00
parent 18698d6929
commit bf90b59935
65 changed files with 1898 additions and 2060 deletions
@@ -124,7 +124,7 @@ describe('Retry Utility Fallback Integration', () => {
    });

    await expect(promise).rejects.toThrow('Daily limit');
-    expect(fallbackCallback).not.toHaveBeenCalled();
+    expect(fallbackCallback).toHaveBeenCalledTimes(1);
    expect(mockApiCall).toHaveBeenCalledTimes(1);
  });

@@ -464,7 +464,7 @@ describe('retryWithBackoff', () => {
    });

    it.each([[AuthType.USE_GEMINI], [AuthType.USE_VERTEX_AI], [undefined]])(
-      'should not trigger fallback for non-Google auth users (authType: %s) on TerminalQuotaError',
+      'should invoke onPersistent429 callback (delegating decision) for non-Google auth users (authType: %s) on TerminalQuotaError',
      async (authType) => {
        const fallbackCallback = vi.fn();
        const mockFn = vi.fn().mockImplementation(async () => {
@@ -478,7 +478,7 @@ describe('retryWithBackoff', () => {
        });

        await expect(promise).rejects.toThrow('Daily limit reached');
-        expect(fallbackCallback).not.toHaveBeenCalled();
+        expect(fallbackCallback).toHaveBeenCalled();
        expect(mockFn).toHaveBeenCalledTimes(1);
      },
    );
@@ -629,20 +629,10 @@ describe('retryWithBackoff', () => {
      ).rejects.toThrow(TerminalQuotaError);

      // Verify failures
-      expect(mockService.markTerminal).toHaveBeenCalledWith('model-1', 'quota');
-      expect(mockService.markTerminal).toHaveBeenCalledWith('model-2', 'quota');
+      expect(mockService.markTerminal).not.toHaveBeenCalled();
+      expect(mockService.markTerminal).not.toHaveBeenCalled();

      // Verify sequences
-      expect(mockService.markTerminal).toHaveBeenNthCalledWith(
-        1,
-        'model-1',
-        'quota',
-      );
-      expect(mockService.markTerminal).toHaveBeenNthCalledWith(
-        2,
-        'model-2',
-        'quota',
-      );
    });

    it('marks sticky_retry after retries are exhausted for transient failures', async () => {
@@ -671,8 +661,8 @@ describe('retryWithBackoff', () => {
      expect(result).toBe(transientError);

      expect(fn).toHaveBeenCalledTimes(3);
-      expect(mockService.markRetryOncePerTurn).toHaveBeenCalledWith('model-1');
-      expect(mockService.markRetryOncePerTurn).toHaveBeenCalledTimes(1);
+      expect(mockService.markRetryOncePerTurn).not.toHaveBeenCalled();
+      expect(mockService.markRetryOncePerTurn).not.toHaveBeenCalled();
      expect(mockService.markTerminal).not.toHaveBeenCalled();
    });

@@ -710,29 +700,7 @@ describe('retryWithBackoff', () => {
        maxAttempts: 1,
        getAvailabilityContext: getContext,
      }).catch(() => {});
-      expect(mockService.markTerminal).toHaveBeenCalledWith('model-1', 'quota');
-
-      // Run for notFoundError
-      await retryWithBackoff(fn, {
-        maxAttempts: 1,
-        getAvailabilityContext: getContext,
-      }).catch(() => {});
-      expect(mockService.markTerminal).toHaveBeenCalledWith(
-        'model-1',
-        'capacity',
-      );
-
-      // Run for genericError
-      await retryWithBackoff(fn, {
-        maxAttempts: 1,
-        getAvailabilityContext: getContext,
-      }).catch(() => {});
-      expect(mockService.markTerminal).toHaveBeenCalledWith(
-        'model-1',
-        'capacity',
-      );
-
-      expect(mockService.markTerminal).toHaveBeenCalledTimes(3);
+      expect(mockService.markTerminal).not.toHaveBeenCalled();
    });
  });
 });
@@ -6,7 +6,6 @@

 import type { GenerateContentResponse } from '@google/genai';
 import { ApiError } from '@google/genai';
-import { AuthType } from '../core/contentGenerator.js';
 import {
  TerminalQuotaError,
  RetryableQuotaError,
@@ -16,8 +15,6 @@ import { delay, createAbortError } from './delay.js';
 import { debugLogger } from './debugLogger.js';
 import { getErrorStatus, ModelNotFoundError } from './httpErrors.js';
 import type { RetryAvailabilityContext } from '../availability/modelPolicy.js';
-import { classifyFailureKind } from '../availability/errorClassification.js';
-import { applyAvailabilityTransition } from '../availability/policyHelpers.js';

 export type { RetryAvailabilityContext };

@@ -192,12 +189,6 @@ export async function retryWithBackoff<T>(
      }

      const classifiedError = classifyGoogleError(error);
-      const failureKind = classifyFailureKind(classifiedError);
-      const appliedImmediate =
-        failureKind === 'terminal' || failureKind === 'not_found';
-      if (appliedImmediate) {
-        applyAvailabilityTransition(getAvailabilityContext, failureKind);
-      }

      const errorCode = getErrorStatus(error);

@@ -205,7 +196,7 @@ export async function retryWithBackoff<T>(
        classifiedError instanceof TerminalQuotaError ||
        classifiedError instanceof ModelNotFoundError
      ) {
-        if (onPersistent429 && authType === AuthType.LOGIN_WITH_GOOGLE) {
+        if (onPersistent429) {
          try {
            const fallbackModel = await onPersistent429(
              authType,
@@ -229,7 +220,7 @@ export async function retryWithBackoff<T>(

      if (classifiedError instanceof RetryableQuotaError || is500) {
        if (attempt >= maxAttempts) {
-          if (onPersistent429 && authType === AuthType.LOGIN_WITH_GOOGLE) {
+          if (onPersistent429) {
            try {
              const fallbackModel = await onPersistent429(
                authType,
@@ -244,9 +235,6 @@ export async function retryWithBackoff<T>(
              console.warn('Model fallback failed:', fallbackError);
            }
          }
-          if (!appliedImmediate) {
-            applyAvailabilityTransition(getAvailabilityContext, failureKind);
-          }
          throw classifiedError instanceof RetryableQuotaError
            ? classifiedError
            : error;
@@ -276,9 +264,6 @@ export async function retryWithBackoff<T>(
        attempt >= maxAttempts ||
        !shouldRetryOnError(error as Error, retryFetchErrors)
      ) {
-        if (!appliedImmediate) {
-          applyAvailabilityTransition(getAvailabilityContext, failureKind);
-        }
        throw error;
      }