feat: launch Gemini 3 Flash in Gemini CLI ⚡️⚡️⚡️ (#15196)

Co-authored-by: gemini-cli-robot <gemini-cli-robot@google.com> Co-authored-by: joshualitt <joshualitt@google.com> Co-authored-by: Sehoon Shon <sshon@google.com> Co-authored-by: Adam Weidman <65992621+adamfweidman@users.noreply.github.com> Co-authored-by: Adib234 <30782825+Adib234@users.noreply.github.com> Co-authored-by: Jenna Inouye <jinouye@google.com>
2026-05-13 21:32:56 -07:00 · 2025-12-17 09:43:21 -08:00
parent 18698d6929
commit bf90b59935
65 changed files with 1898 additions and 2060 deletions
@@ -22,11 +22,12 @@ import { AuthType } from '../core/contentGenerator.js';
 import {
  DEFAULT_GEMINI_FLASH_MODEL,
  DEFAULT_GEMINI_MODEL,
+  DEFAULT_GEMINI_MODEL_AUTO,
+  PREVIEW_GEMINI_FLASH_MODEL,
  PREVIEW_GEMINI_MODEL,
+  PREVIEW_GEMINI_MODEL_AUTO,
 } from '../config/models.js';
-import { logFlashFallback } from '../telemetry/index.js';
 import type { FallbackModelHandler } from './types.js';
-import { ModelNotFoundError } from '../utils/httpErrors.js';
 import { openBrowserSecurely } from '../utils/secure-browser-launcher.js';
 import { coreEvents } from '../utils/events.js';
 import { debugLogger } from '../utils/debugLogger.js';
@@ -64,7 +65,7 @@ const createMockConfig = (overrides: Partial<Config> = {}): Config =>
  ({
    isInFallbackMode: vi.fn(() => false),
    setFallbackMode: vi.fn(),
-    isModelAvailabilityServiceEnabled: vi.fn(() => false),
+    isModelAvailabilityServiceEnabled: vi.fn(() => true),
    isPreviewModelFallbackMode: vi.fn(() => false),
    setPreviewModelFallbackMode: vi.fn(),
    isPreviewModelBypassMode: vi.fn(() => false),
@@ -78,6 +79,7 @@ const createMockConfig = (overrides: Partial<Config> = {}): Config =>
        skipped: [],
      }),
    ),
+    getActiveModel: vi.fn(() => MOCK_PRO_MODEL),
    getModel: vi.fn(() => MOCK_PRO_MODEL),
    getPreviewFeatures: vi.fn(() => false),
    getUserTier: vi.fn(() => undefined),
@@ -113,430 +115,6 @@ describe('handleFallback', () => {
    fallbackEventSpy.mockRestore();
  });

-  it('should return null immediately if authType is not OAuth', async () => {
-    const result = await handleFallback(
-      mockConfig,
-      MOCK_PRO_MODEL,
-      AUTH_API_KEY,
-    );
-    expect(result).toBeNull();
-    expect(mockHandler).not.toHaveBeenCalled();
-    expect(mockConfig.setFallbackMode).not.toHaveBeenCalled();
-  });
-
-  it('should still consult the handler if the failed model is the fallback model', async () => {
-    mockHandler.mockResolvedValue('stop');
-    const result = await handleFallback(
-      mockConfig,
-      FALLBACK_MODEL, // Failed model is Flash
-      AUTH_OAUTH,
-    );
-    expect(result).toBe(false);
-    expect(mockHandler).toHaveBeenCalled();
-  });
-
-  it('should return null if no fallbackHandler is injected in config', async () => {
-    const configWithoutHandler = createMockConfig({
-      fallbackModelHandler: undefined,
-    });
-    const result = await handleFallback(
-      configWithoutHandler,
-      MOCK_PRO_MODEL,
-      AUTH_OAUTH,
-    );
-    expect(result).toBeNull();
-  });
-
-  describe('when handler returns "retry_always"', () => {
-    it('should activate fallback mode, log telemetry, and return true', async () => {
-      mockHandler.mockResolvedValue('retry_always');
-
-      const result = await handleFallback(
-        mockConfig,
-        MOCK_PRO_MODEL,
-        AUTH_OAUTH,
-      );
-
-      expect(result).toBe(true);
-      expect(mockConfig.setFallbackMode).toHaveBeenCalledWith(true);
-      expect(logFlashFallback).toHaveBeenCalled();
-    });
-  });
-
-  describe('when handler returns "stop"', () => {
-    it('should activate fallback mode, log telemetry, and return false', async () => {
-      mockHandler.mockResolvedValue('stop');
-
-      const result = await handleFallback(
-        mockConfig,
-        MOCK_PRO_MODEL,
-        AUTH_OAUTH,
-      );
-
-      expect(result).toBe(false);
-      expect(mockConfig.setFallbackMode).toHaveBeenCalledWith(true);
-      expect(logFlashFallback).toHaveBeenCalled();
-    });
-  });
-
-  it('should return false without toggling fallback when handler returns "retry_later"', async () => {
-    mockHandler.mockResolvedValue('retry_later');
-
-    const result = await handleFallback(mockConfig, MOCK_PRO_MODEL, AUTH_OAUTH);
-
-    expect(result).toBe(false);
-    expect(mockConfig.setFallbackMode).not.toHaveBeenCalled();
-    expect(logFlashFallback).not.toHaveBeenCalled();
-    expect(fallbackEventSpy).not.toHaveBeenCalled();
-  });
-
-  it('should launch upgrade flow and avoid fallback mode when handler returns "upgrade"', async () => {
-    mockHandler.mockResolvedValue('upgrade');
-    vi.mocked(openBrowserSecurely).mockResolvedValue(undefined);
-
-    const result = await handleFallback(mockConfig, MOCK_PRO_MODEL, AUTH_OAUTH);
-
-    expect(result).toBe(false);
-    expect(openBrowserSecurely).toHaveBeenCalledWith(
-      'https://goo.gle/set-up-gemini-code-assist',
-    );
-    expect(mockConfig.setFallbackMode).not.toHaveBeenCalled();
-    expect(logFlashFallback).not.toHaveBeenCalled();
-    expect(fallbackEventSpy).not.toHaveBeenCalled();
-  });
-
-  it('should log a warning and continue when upgrade flow fails to open a browser', async () => {
-    mockHandler.mockResolvedValue('upgrade');
-    const debugWarnSpy = vi.spyOn(debugLogger, 'warn');
-    const consoleWarnSpy = vi
-      .spyOn(console, 'warn')
-      .mockImplementation(() => {});
-    vi.mocked(openBrowserSecurely).mockRejectedValue(new Error('blocked'));
-
-    const result = await handleFallback(mockConfig, MOCK_PRO_MODEL, AUTH_OAUTH);
-
-    expect(result).toBe(false);
-    expect(debugWarnSpy).toHaveBeenCalledWith(
-      'Failed to open browser automatically:',
-      'blocked',
-    );
-    expect(mockConfig.setFallbackMode).not.toHaveBeenCalled();
-    expect(fallbackEventSpy).not.toHaveBeenCalled();
-    debugWarnSpy.mockRestore();
-    consoleWarnSpy.mockRestore();
-  });
-
-  describe('when handler returns an unexpected value', () => {
-    it('should log an error and return null', async () => {
-      mockHandler.mockResolvedValue(null);
-
-      const result = await handleFallback(
-        mockConfig,
-        MOCK_PRO_MODEL,
-        AUTH_OAUTH,
-      );
-
-      expect(result).toBeNull();
-      expect(consoleErrorSpy).toHaveBeenCalledWith(
-        'Fallback UI handler failed:',
-        new Error(
-          'Unexpected fallback intent received from fallbackModelHandler: "null"',
-        ),
-      );
-      expect(mockConfig.setFallbackMode).not.toHaveBeenCalled();
-    });
-  });
-
-  it('should pass the correct context (failedModel, fallbackModel, error) to the handler', async () => {
-    const mockError = new Error('Quota Exceeded');
-    mockHandler.mockResolvedValue('retry_always');
-
-    await handleFallback(mockConfig, MOCK_PRO_MODEL, AUTH_OAUTH, mockError);
-
-    expect(mockHandler).toHaveBeenCalledWith(
-      MOCK_PRO_MODEL,
-      FALLBACK_MODEL,
-      mockError,
-    );
-  });
-
-  it('should not call setFallbackMode or log telemetry if already in fallback mode', async () => {
-    // Setup config where fallback mode is already active
-    const activeFallbackConfig = createMockConfig({
-      fallbackModelHandler: mockHandler,
-      isInFallbackMode: vi.fn(() => true), // Already active
-      setFallbackMode: vi.fn(),
-    });
-
-    mockHandler.mockResolvedValue('retry_always');
-
-    const result = await handleFallback(
-      activeFallbackConfig,
-      MOCK_PRO_MODEL,
-      AUTH_OAUTH,
-    );
-
-    // Should still return true to allow the retry (which will use the active fallback mode)
-    expect(result).toBe(true);
-    // Should still consult the handler
-    expect(mockHandler).toHaveBeenCalled();
-    // But should not mutate state or log telemetry again
-    expect(activeFallbackConfig.setFallbackMode).not.toHaveBeenCalled();
-    expect(logFlashFallback).not.toHaveBeenCalled();
-  });
-
-  it('should catch errors from the handler, log an error, and return null', async () => {
-    const handlerError = new Error('UI interaction failed');
-    mockHandler.mockRejectedValue(handlerError);
-
-    const result = await handleFallback(mockConfig, MOCK_PRO_MODEL, AUTH_OAUTH);
-
-    expect(result).toBeNull();
-    expect(consoleErrorSpy).toHaveBeenCalledWith(
-      'Fallback UI handler failed:',
-      handlerError,
-    );
-    expect(mockConfig.setFallbackMode).not.toHaveBeenCalled();
-  });
-
-  describe('Preview Model Fallback Logic', () => {
-    const previewModel = PREVIEW_GEMINI_MODEL;
-
-    it('should only set Preview Model bypass mode on retryable quota failure', async () => {
-      const mockGoogleApiError = {
-        code: 429,
-        message: 'mock error',
-        details: [],
-      };
-      const retryableQuotaError = new RetryableQuotaError(
-        'Capacity error',
-        mockGoogleApiError,
-        5,
-      );
-      await handleFallback(
-        mockConfig,
-        previewModel,
-        AUTH_OAUTH,
-        retryableQuotaError,
-      );
-      expect(mockConfig.setPreviewModelBypassMode).toHaveBeenCalledWith(true);
-    });
-
-    it('should not set Preview Model bypass mode on non-retryable quota failure', async () => {
-      const mockGoogleApiError = {
-        code: 429,
-        message: 'mock error',
-        details: [],
-      };
-      const terminalQuotaError = new TerminalQuotaError(
-        'quota error',
-        mockGoogleApiError,
-        5,
-      );
-      await handleFallback(
-        mockConfig,
-        previewModel,
-        AUTH_OAUTH,
-        terminalQuotaError,
-      );
-
-      expect(mockConfig.setPreviewModelBypassMode).not.toHaveBeenCalled();
-    });
-
-    it('should silently retry if Preview Model fallback mode is already active and error is retryable error', async () => {
-      vi.spyOn(mockConfig, 'isPreviewModelFallbackMode').mockReturnValue(true);
-      const mockGoogleApiError = {
-        code: 429,
-        message: 'mock error',
-        details: [],
-      };
-      const retryableQuotaError = new RetryableQuotaError(
-        'Capacity error',
-        mockGoogleApiError,
-        5,
-      );
-      const result = await handleFallback(
-        mockConfig,
-        previewModel,
-        AUTH_OAUTH,
-        retryableQuotaError,
-      );
-
-      expect(result).toBe(true);
-      expect(mockHandler).not.toHaveBeenCalled();
-    });
-
-    it('should activate Preview Model fallback mode when handler returns "retry_always" and is RetryableQuotaError', async () => {
-      mockHandler.mockResolvedValue('retry_always');
-      const mockGoogleApiError = {
-        code: 429,
-        message: 'mock error',
-        details: [],
-      };
-      const retryableQuotaError = new RetryableQuotaError(
-        'Capacity error',
-        mockGoogleApiError,
-        5,
-      );
-      const result = await handleFallback(
-        mockConfig,
-        previewModel,
-        AUTH_OAUTH,
-        retryableQuotaError,
-      );
-
-      expect(result).toBe(true);
-      expect(mockConfig.setPreviewModelBypassMode).toHaveBeenCalledWith(true);
-      expect(mockConfig.setPreviewModelFallbackMode).toHaveBeenCalledWith(true);
-    });
-
-    it('should activate regular fallback when handler returns "retry_always" and is TerminalQuotaError', async () => {
-      mockHandler.mockResolvedValue('retry_always');
-      const mockGoogleApiError = {
-        code: 503,
-        message: 'mock error',
-        details: [],
-      };
-      const terminalError = new TerminalQuotaError(
-        'Quota error',
-        mockGoogleApiError,
-        5,
-      );
-      const result = await handleFallback(
-        mockConfig,
-        previewModel,
-        AUTH_OAUTH,
-        terminalError,
-      );
-
-      expect(result).toBe(true);
-      expect(mockConfig.setPreviewModelFallbackMode).not.toBeCalled();
-      expect(mockConfig.setFallbackMode).toHaveBeenCalledWith(true);
-    });
-
-    it('should NOT set fallback mode if user chooses "retry_once"', async () => {
-      const mockGoogleApiError = {
-        code: 429,
-        message: 'mock error',
-        details: [],
-      };
-      const terminalQuotaError = new TerminalQuotaError(
-        'quota error',
-        mockGoogleApiError,
-        5,
-      );
-      mockHandler.mockResolvedValue('retry_once');
-
-      const result = await handleFallback(
-        mockConfig,
-        PREVIEW_GEMINI_MODEL,
-        AuthType.LOGIN_WITH_GOOGLE,
-        terminalQuotaError,
-      );
-
-      expect(result).toBe(true);
-      expect(mockConfig.setPreviewModelBypassMode).not.toHaveBeenCalled();
-      expect(mockConfig.setPreviewModelFallbackMode).not.toHaveBeenCalled();
-      expect(mockConfig.setFallbackMode).not.toHaveBeenCalled();
-    });
-
-    it('should pass DEFAULT_GEMINI_MODEL as fallback when Preview Model fails with Retryable Error', async () => {
-      const mockFallbackHandler = vi.fn().mockResolvedValue('stop');
-      vi.mocked(mockConfig.fallbackModelHandler!).mockImplementation(
-        mockFallbackHandler,
-      );
-      const mockGoogleApiError = {
-        code: 429,
-        message: 'mock error',
-        details: [],
-      };
-      const retryableQuotaError = new RetryableQuotaError(
-        'Capacity error',
-        mockGoogleApiError,
-        5,
-      );
-
-      await handleFallback(
-        mockConfig,
-        PREVIEW_GEMINI_MODEL,
-        AuthType.LOGIN_WITH_GOOGLE,
-        retryableQuotaError,
-      );
-
-      expect(mockConfig.fallbackModelHandler).toHaveBeenCalledWith(
-        PREVIEW_GEMINI_MODEL,
-        DEFAULT_GEMINI_MODEL,
-        retryableQuotaError,
-      );
-    });
-
-    it('should pass DEFAULT_GEMINI_MODEL as fallback when Preview Model fails with other error', async () => {
-      await handleFallback(
-        mockConfig,
-        PREVIEW_GEMINI_MODEL,
-        AuthType.LOGIN_WITH_GOOGLE,
-      );
-
-      expect(mockConfig.fallbackModelHandler).toHaveBeenCalledWith(
-        PREVIEW_GEMINI_MODEL,
-        DEFAULT_GEMINI_MODEL,
-        undefined,
-      );
-    });
-
-    it('should pass DEFAULT_GEMINI_FLASH_MODEL as fallback when Preview Model fails with other error', async () => {
-      const mockGoogleApiError = {
-        code: 429,
-        message: 'mock error',
-        details: [],
-      };
-      const terminalQuotaError = new TerminalQuotaError(
-        'quota error',
-        mockGoogleApiError,
-        5,
-      );
-      await handleFallback(
-        mockConfig,
-        PREVIEW_GEMINI_MODEL,
-        AuthType.LOGIN_WITH_GOOGLE,
-        terminalQuotaError,
-      );
-
-      expect(mockConfig.fallbackModelHandler).toHaveBeenCalledWith(
-        PREVIEW_GEMINI_MODEL,
-        DEFAULT_GEMINI_FLASH_MODEL,
-        terminalQuotaError,
-      );
-    });
-  });
-
-  it('should return null if ModelNotFoundError occurs for a non-preview model', async () => {
-    const modelNotFoundError = new ModelNotFoundError('Not found');
-    const result = await handleFallback(
-      mockConfig,
-      DEFAULT_GEMINI_MODEL, // Not preview model
-      AUTH_OAUTH,
-      modelNotFoundError,
-    );
-    expect(result).toBeNull();
-    expect(mockHandler).not.toHaveBeenCalled();
-  });
-
-  it('should consult handler if ModelNotFoundError occurs for preview model', async () => {
-    const modelNotFoundError = new ModelNotFoundError('Not found');
-    mockHandler.mockResolvedValue('retry_always');
-
-    const result = await handleFallback(
-      mockConfig,
-      PREVIEW_GEMINI_MODEL,
-      AUTH_OAUTH,
-      modelNotFoundError,
-    );
-
-    expect(result).toBe(true);
-    expect(mockHandler).toHaveBeenCalled();
-  });
-
  describe('policy-driven flow', () => {
    let policyConfig: Config;
    let availability: ModelAvailabilityService;
@@ -550,31 +128,47 @@ describe('handleFallback', () => {
      });
      policyHandler = vi.fn().mockResolvedValue('retry_once');
      policyConfig = createMockConfig();
-      vi.spyOn(
-        policyConfig,
-        'isModelAvailabilityServiceEnabled',
-      ).mockReturnValue(true);
-      vi.spyOn(policyConfig, 'getModelAvailabilityService').mockReturnValue(
+
+      // Ensure we test the availability path
+      vi.mocked(policyConfig.isModelAvailabilityServiceEnabled).mockReturnValue(
+        true,
+      );
+      vi.mocked(policyConfig.getModelAvailabilityService).mockReturnValue(
        availability,
      );
-      vi.spyOn(policyConfig, 'getFallbackModelHandler').mockReturnValue(
+      vi.mocked(policyConfig.getFallbackModelHandler).mockReturnValue(
        policyHandler,
      );
    });

+    it('should return null immediately if authType is not OAuth', async () => {
+      const result = await handleFallback(
+        policyConfig,
+        MOCK_PRO_MODEL,
+        AUTH_API_KEY,
+      );
+      expect(result).toBeNull();
+      expect(policyHandler).not.toHaveBeenCalled();
+    });
+
    it('uses availability selection with correct candidates when enabled', async () => {
-      vi.spyOn(policyConfig, 'getPreviewFeatures').mockReturnValue(true);
-      vi.spyOn(policyConfig, 'getModel').mockReturnValue(DEFAULT_GEMINI_MODEL);
+      // Direct mock manipulation since it's already a vi.fn()
+      vi.mocked(policyConfig.getPreviewFeatures).mockReturnValue(true);
+      vi.mocked(policyConfig.getModel).mockReturnValue(
+        DEFAULT_GEMINI_MODEL_AUTO,
+      );

      await handleFallback(policyConfig, DEFAULT_GEMINI_MODEL, AUTH_OAUTH);

      expect(availability.selectFirstAvailable).toHaveBeenCalledWith([
        DEFAULT_GEMINI_FLASH_MODEL,
-        PREVIEW_GEMINI_MODEL,
      ]);
    });

    it('falls back to last resort when availability returns null', async () => {
+      vi.mocked(policyConfig.getModel).mockReturnValue(
+        DEFAULT_GEMINI_MODEL_AUTO,
+      );
      availability.selectFirstAvailable = vi
        .fn()
        .mockReturnValue({ selectedModel: null, skipped: [] });
@@ -634,9 +228,12 @@ describe('handleFallback', () => {
      }
    });

-    it('wraps around to upgrade candidates if the current model was selected mid-chain (e.g. by router)', async () => {
+    it('does not wrap around to upgrade candidates if the current model was selected at the end (e.g. by router)', async () => {
      // Last-resort failure (Flash) in [Preview, Pro, Flash] checks Preview then Pro (all upstream).
-      vi.spyOn(policyConfig, 'getPreviewFeatures').mockReturnValue(true);
+      vi.mocked(policyConfig.getPreviewFeatures).mockReturnValue(true);
+      vi.mocked(policyConfig.getModel).mockReturnValue(
+        DEFAULT_GEMINI_MODEL_AUTO,
+      );

      availability.selectFirstAvailable = vi.fn().mockReturnValue({
        selectedModel: MOCK_PRO_MODEL,
@@ -650,43 +247,27 @@ describe('handleFallback', () => {
        AUTH_OAUTH,
      );

-      expect(availability.selectFirstAvailable).toHaveBeenCalledWith([
-        PREVIEW_GEMINI_MODEL,
-        MOCK_PRO_MODEL,
-      ]);
+      expect(availability.selectFirstAvailable).not.toHaveBeenCalled();
      expect(policyHandler).toHaveBeenCalledWith(
        DEFAULT_GEMINI_FLASH_MODEL,
-        MOCK_PRO_MODEL,
+        DEFAULT_GEMINI_FLASH_MODEL,
        undefined,
      );
    });

-    it('logs and returns null when handler resolves to null', async () => {
-      policyHandler.mockResolvedValue(null);
-      const debugLoggerErrorSpy = vi.spyOn(debugLogger, 'error');
-      const result = await handleFallback(
-        policyConfig,
-        MOCK_PRO_MODEL,
-        AUTH_OAUTH,
-      );
-
-      expect(result).toBeNull();
-      expect(debugLoggerErrorSpy).toHaveBeenCalledWith(
-        'Fallback handler failed:',
-        new Error(
-          'Unexpected fallback intent received from fallbackModelHandler: "null"',
-        ),
-      );
-      debugLoggerErrorSpy.mockRestore();
-    });
-
    it('successfully follows expected availability response for Preview Chain', async () => {
-      availability.selectFirstAvailable = vi
-        .fn()
-        .mockReturnValue({ selectedModel: DEFAULT_GEMINI_MODEL, skipped: [] });
+      availability.selectFirstAvailable = vi.fn().mockReturnValue({
+        selectedModel: PREVIEW_GEMINI_FLASH_MODEL,
+        skipped: [],
+      });
      policyHandler.mockResolvedValue('retry_once');
-      vi.spyOn(policyConfig, 'getPreviewFeatures').mockReturnValue(true);
-      vi.spyOn(policyConfig, 'getModel').mockReturnValue(PREVIEW_GEMINI_MODEL);
+      vi.mocked(policyConfig.getPreviewFeatures).mockReturnValue(true);
+      vi.mocked(policyConfig.getActiveModel).mockReturnValue(
+        PREVIEW_GEMINI_MODEL,
+      );
+      vi.mocked(policyConfig.getModel).mockReturnValue(
+        PREVIEW_GEMINI_MODEL_AUTO,
+      );

      const result = await handleFallback(
        policyConfig,
@@ -696,21 +277,112 @@ describe('handleFallback', () => {

      expect(result).toBe(true);
      expect(availability.selectFirstAvailable).toHaveBeenCalledWith([
-        DEFAULT_GEMINI_MODEL,
-        DEFAULT_GEMINI_FLASH_MODEL,
+        PREVIEW_GEMINI_FLASH_MODEL,
      ]);
-      expect(policyHandler).toHaveBeenCalledWith(
-        PREVIEW_GEMINI_MODEL,
-        DEFAULT_GEMINI_MODEL,
-        undefined,
+    });
+
+    it('should launch upgrade flow and avoid fallback mode when handler returns "upgrade"', async () => {
+      policyHandler.mockResolvedValue('upgrade');
+      vi.mocked(openBrowserSecurely).mockResolvedValue(undefined);
+
+      const result = await handleFallback(
+        policyConfig,
+        MOCK_PRO_MODEL,
+        AUTH_OAUTH,
+      );
+
+      expect(result).toBe(false);
+      expect(openBrowserSecurely).toHaveBeenCalledWith(
+        'https://goo.gle/set-up-gemini-code-assist',
+      );
+      expect(policyConfig.setActiveModel).not.toHaveBeenCalled();
+    });
+
+    it('should catch errors from the handler, log an error, and return null', async () => {
+      const handlerError = new Error('UI interaction failed');
+      policyHandler.mockRejectedValue(handlerError);
+
+      const result = await handleFallback(
+        policyConfig,
+        MOCK_PRO_MODEL,
+        AUTH_OAUTH,
+      );
+
+      expect(result).toBeNull();
+      expect(debugLogger.error).toHaveBeenCalledWith(
+        'Fallback handler failed:',
+        handlerError,
      );
    });

-    it('short-circuits when the failed model is the last-resort policy AND candidates are unavailable', async () => {
+    it('should pass TerminalQuotaError (429) correctly to the handler', async () => {
+      const mockGoogleApiError = {
+        code: 429,
+        message: 'mock error',
+        details: [],
+      };
+      const terminalError = new TerminalQuotaError(
+        'Quota error',
+        mockGoogleApiError,
+        5,
+      );
+      policyHandler.mockResolvedValue('retry_always');
+      vi.mocked(policyConfig.getModel).mockReturnValue(
+        DEFAULT_GEMINI_MODEL_AUTO,
+      );
+
+      await handleFallback(
+        policyConfig,
+        MOCK_PRO_MODEL,
+        AUTH_OAUTH,
+        terminalError,
+      );
+
+      expect(policyHandler).toHaveBeenCalledWith(
+        MOCK_PRO_MODEL,
+        DEFAULT_GEMINI_FLASH_MODEL,
+        terminalError,
+      );
+    });
+
+    it('should pass RetryableQuotaError correctly to the handler', async () => {
+      const mockGoogleApiError = {
+        code: 503,
+        message: 'mock error',
+        details: [],
+      };
+      const retryableError = new RetryableQuotaError(
+        'Service unavailable',
+        mockGoogleApiError,
+        1000,
+      );
+      policyHandler.mockResolvedValue('retry_once');
+      vi.mocked(policyConfig.getModel).mockReturnValue(
+        DEFAULT_GEMINI_MODEL_AUTO,
+      );
+
+      await handleFallback(
+        policyConfig,
+        MOCK_PRO_MODEL,
+        AUTH_OAUTH,
+        retryableError,
+      );
+
+      expect(policyHandler).toHaveBeenCalledWith(
+        MOCK_PRO_MODEL,
+        DEFAULT_GEMINI_FLASH_MODEL,
+        retryableError,
+      );
+    });
+
+    it('Call the handler with fallback model same as the failed model when the failed model is the last-resort policy', async () => {
      // Ensure short-circuit when wrapping to an unavailable upstream model.
      availability.selectFirstAvailable = vi
        .fn()
        .mockReturnValue({ selectedModel: null, skipped: [] });
+      vi.mocked(policyConfig.getModel).mockReturnValue(
+        DEFAULT_GEMINI_MODEL_AUTO,
+      );

      const result = await handleFallback(
        policyConfig,
@@ -718,14 +390,21 @@ describe('handleFallback', () => {
        AUTH_OAUTH,
      );

-      expect(result).toBeNull();
-      // Service called to check upstream; no UI handler since nothing selected.
-      expect(policyConfig.getModelAvailabilityService).toHaveBeenCalled();
-      expect(policyConfig.getFallbackModelHandler).not.toHaveBeenCalled();
+      policyHandler.mockResolvedValue('retry_once');
+
+      expect(result).not.toBeNull();
+      expect(policyHandler).toHaveBeenCalledWith(
+        DEFAULT_GEMINI_FLASH_MODEL,
+        DEFAULT_GEMINI_FLASH_MODEL,
+        undefined,
+      );
    });

    it('calls setActiveModel and logs telemetry when handler returns "retry_always"', async () => {
      policyHandler.mockResolvedValue('retry_always');
+      vi.mocked(policyConfig.getModel).mockReturnValue(
+        DEFAULT_GEMINI_MODEL_AUTO,
+      );

      const result = await handleFallback(
        policyConfig,
@@ -739,7 +418,7 @@ describe('handleFallback', () => {
      // TODO: add logging expect statement
    });

-    it('calls setActiveModel when handler returns "stop"', async () => {
+    it('does NOT call setActiveModel when handler returns "stop"', async () => {
      policyHandler.mockResolvedValue('stop');

      const result = await handleFallback(
@@ -749,8 +428,21 @@ describe('handleFallback', () => {
      );

      expect(result).toBe(false);
-      expect(policyConfig.setActiveModel).toHaveBeenCalledWith(FALLBACK_MODEL);
+      expect(policyConfig.setActiveModel).not.toHaveBeenCalled();
      // TODO: add logging expect statement
    });
+
+    it('does NOT call setActiveModel when handler returns "retry_once"', async () => {
+      policyHandler.mockResolvedValue('retry_once');
+
+      const result = await handleFallback(
+        policyConfig,
+        MOCK_PRO_MODEL,
+        AUTH_OAUTH,
+      );
+
+      expect(result).toBe(true);
+      expect(policyConfig.setActiveModel).not.toHaveBeenCalled();
+    });
  });
 });
@@ -6,24 +6,16 @@

 import type { Config } from '../config/config.js';
 import { AuthType } from '../core/contentGenerator.js';
-import {
-  DEFAULT_GEMINI_FLASH_MODEL,
-  DEFAULT_GEMINI_MODEL,
-  PREVIEW_GEMINI_MODEL,
-} from '../config/models.js';
-import { logFlashFallback, FlashFallbackEvent } from '../telemetry/index.js';
 import { openBrowserSecurely } from '../utils/secure-browser-launcher.js';
 import { debugLogger } from '../utils/debugLogger.js';
 import { getErrorMessage } from '../utils/errors.js';
-import { ModelNotFoundError } from '../utils/httpErrors.js';
-import { TerminalQuotaError } from '../utils/googleQuotaErrors.js';
-import { coreEvents } from '../utils/events.js';
 import type { FallbackIntent, FallbackRecommendation } from './types.js';
 import { classifyFailureKind } from '../availability/errorClassification.js';
 import {
  buildFallbackPolicyContext,
  resolvePolicyChain,
  resolvePolicyAction,
+  applyAvailabilityTransition,
 } from '../availability/policyHelpers.js';

 const UPGRADE_URL_PAGE = 'https://goo.gle/set-up-gemini-code-assist';
@@ -34,75 +26,7 @@ export async function handleFallback(
  authType?: string,
  error?: unknown,
 ): Promise<string | boolean | null> {
-  if (config.isModelAvailabilityServiceEnabled()) {
-    return handlePolicyDrivenFallback(config, failedModel, authType, error);
-  }
-  return legacyHandleFallback(config, failedModel, authType, error);
-}
-
-/**
- * Old fallback logic relying on hard coded strings
- */
-async function legacyHandleFallback(
-  config: Config,
-  failedModel: string,
-  authType?: string,
-  error?: unknown,
-): Promise<string | boolean | null> {
-  if (authType !== AuthType.LOGIN_WITH_GOOGLE) return null;
-
-  // Guardrail: If it's a ModelNotFoundError but NOT the preview model, do not handle it.
-  if (
-    error instanceof ModelNotFoundError &&
-    failedModel !== PREVIEW_GEMINI_MODEL
-  ) {
-    return null;
-  }
-  const shouldActivatePreviewFallback =
-    failedModel === PREVIEW_GEMINI_MODEL &&
-    !(error instanceof TerminalQuotaError);
-  // Preview Model Specific Logic
-  if (shouldActivatePreviewFallback) {
-    // Always set bypass mode for the immediate retry, for non-TerminalQuotaErrors.
-    // This ensures the next attempt uses 2.5 Pro.
-    config.setPreviewModelBypassMode(true);
-
-    // If we are already in Preview Model fallback mode (user previously said "Always"),
-    // we silently retry (which will use 2.5 Pro due to bypass mode).
-    if (config.isPreviewModelFallbackMode()) {
-      return true;
-    }
-  }
-
-  const fallbackModel = shouldActivatePreviewFallback
-    ? DEFAULT_GEMINI_MODEL
-    : DEFAULT_GEMINI_FLASH_MODEL;
-
-  // Consult UI Handler for Intent
-  const fallbackModelHandler = config.fallbackModelHandler;
-  if (typeof fallbackModelHandler !== 'function') return null;
-
-  try {
-    // Pass the specific failed model to the UI handler.
-    const intent = await fallbackModelHandler(
-      failedModel,
-      fallbackModel,
-      error,
-    );
-
-    // Process Intent and Update State
-    return await processIntent(
-      config,
-      intent,
-      failedModel,
-      fallbackModel,
-      authType,
-      error,
-    );
-  } catch (handlerError) {
-    console.error('Fallback UI handler failed:', handlerError);
-    return null;
-  }
+  return handlePolicyDrivenFallback(config, failedModel, authType, error);
 }

 /**
@@ -125,50 +49,56 @@ async function handlePolicyDrivenFallback(
  );

  const failureKind = classifyFailureKind(error);
-
-  if (!candidates.length) {
-    return null;
-  }
-
  const availability = config.getModelAvailabilityService();
-  const selection = availability.selectFirstAvailable(
-    candidates.map((policy) => policy.model),
-  );
-
-  const lastResortPolicy = candidates.find((policy) => policy.isLastResort);
-  const fallbackModel = selection.selectedModel ?? lastResortPolicy?.model;
-  const selectedPolicy = candidates.find(
-    (policy) => policy.model === fallbackModel,
-  );
-
-  if (!fallbackModel || fallbackModel === failedModel || !selectedPolicy) {
-    return null;
-  }
-
-  // failureKind is already declared and calculated above
-  const action = resolvePolicyAction(failureKind, selectedPolicy);
-
-  if (action === 'silent') {
-    return processIntent(
-      config,
-      'retry_always',
-      failedModel,
-      fallbackModel,
-      authType,
-      error,
-    );
-  }
-
-  // This will be used in the future when FallbackRecommendation is passed through UI
-  const recommendation: FallbackRecommendation = {
-    ...selection,
-    selectedModel: fallbackModel,
-    action,
-    failureKind,
-    failedPolicy,
-    selectedPolicy,
+  const getAvailabilityContext = () => {
+    if (!failedPolicy) return undefined;
+    return { service: availability, policy: failedPolicy };
  };
-  void recommendation;
+
+  let fallbackModel: string;
+  if (!candidates.length) {
+    fallbackModel = failedModel;
+  } else {
+    const selection = availability.selectFirstAvailable(
+      candidates.map((policy) => policy.model),
+    );
+
+    const lastResortPolicy = candidates.find((policy) => policy.isLastResort);
+    const selectedFallbackModel =
+      selection.selectedModel ?? lastResortPolicy?.model;
+    const selectedPolicy = candidates.find(
+      (policy) => policy.model === selectedFallbackModel,
+    );
+
+    if (
+      !selectedFallbackModel ||
+      selectedFallbackModel === failedModel ||
+      !selectedPolicy
+    ) {
+      return null;
+    }
+
+    fallbackModel = selectedFallbackModel;
+
+    // failureKind is already declared and calculated above
+    const action = resolvePolicyAction(failureKind, selectedPolicy);
+
+    if (action === 'silent') {
+      applyAvailabilityTransition(getAvailabilityContext, failureKind);
+      return processIntent(config, 'retry_always', fallbackModel);
+    }
+
+    // This will be used in the future when FallbackRecommendation is passed through UI
+    const recommendation: FallbackRecommendation = {
+      ...selection,
+      selectedModel: fallbackModel,
+      action,
+      failureKind,
+      failedPolicy,
+      selectedPolicy,
+    };
+    void recommendation;
+  }

  const handler = config.getFallbackModelHandler();
  if (typeof handler !== 'function') {
@@ -177,14 +107,16 @@ async function handlePolicyDrivenFallback(

  try {
    const intent = await handler(failedModel, fallbackModel, error);
-    return await processIntent(
-      config,
-      intent,
-      failedModel,
-      fallbackModel,
-      authType,
-      error, // Pass the error so processIntent can handle preview-specific logic
-    );
+
+    // If the user chose to switch/retry, we apply the availability transition
+    // to the failed model (e.g. marking it terminal if it had a quota error).
+    // We DO NOT apply it if the user chose 'stop' or 'retry_later', allowing
+    // them to try again later with the same model state.
+    if (intent === 'retry_always' || intent === 'retry_once') {
+      applyAvailabilityTransition(getAvailabilityContext, failureKind);
+    }
+
+    return await processIntent(config, intent, fallbackModel);
  } catch (handlerError) {
    debugLogger.error('Fallback handler failed:', handlerError);
    return null;
@@ -205,47 +137,23 @@ async function handleUpgrade() {
 async function processIntent(
  config: Config,
  intent: FallbackIntent | null,
-  failedModel: string,
  fallbackModel: string,
-  authType?: string,
-  error?: unknown,
 ): Promise<boolean> {
-  const isAvailabilityEnabled = config.isModelAvailabilityServiceEnabled();
-
  switch (intent) {
    case 'retry_always':
-      if (isAvailabilityEnabled) {
-        // TODO(telemetry): Implement generic fallback event logging. Existing
-        // logFlashFallback is specific to a single Model.
-        config.setActiveModel(fallbackModel);
-      } else {
-        // If the error is non-retryable, e.g. TerminalQuota Error, trigger a regular fallback to flash.
-        // For all other errors, activate previewModel fallback.
-        if (
-          failedModel === PREVIEW_GEMINI_MODEL &&
-          !(error instanceof TerminalQuotaError)
-        ) {
-          activatePreviewModelFallbackMode(config);
-        } else {
-          activateFallbackMode(config, authType);
-        }
-      }
+      // TODO(telemetry): Implement generic fallback event logging. Existing
+      // logFlashFallback is specific to a single Model.
+      config.setActiveModel(fallbackModel);
      return true;

    case 'retry_once':
-      if (isAvailabilityEnabled) {
-        config.setActiveModel(fallbackModel);
-      }
+      // For distinct retry (retry_once), we do NOT set the active model permanently.
+      // The FallbackStrategy will handle routing to the available model for this turn
+      // based on the availability service state (which is updated before this).
      return true;

    case 'stop':
-      if (isAvailabilityEnabled) {
-        // TODO(telemetry): Implement generic fallback event logging. Existing
-        // logFlashFallback is specific to a single Model.
-        config.setActiveModel(fallbackModel);
-      } else {
-        activateFallbackMode(config, authType);
-      }
+      // Do not switch model on stop. User wants to stay on current model (and stop).
      return false;

    case 'retry_later':
@@ -261,20 +169,3 @@ async function processIntent(
      );
  }
 }
-
-function activateFallbackMode(config: Config, authType: string | undefined) {
-  if (!config.isInFallbackMode()) {
-    config.setFallbackMode(true);
-    coreEvents.emitFallbackModeChanged(true);
-    if (authType) {
-      logFlashFallback(config, new FlashFallbackEvent(authType));
-    }
-  }
-}
-
-function activatePreviewModelFallbackMode(config: Config) {
-  if (!config.isPreviewModelFallbackMode()) {
-    config.setPreviewModelFallbackMode(true);
-    // We might want a specific event for Preview Model fallback, but for now we just set the mode.
-  }
-}