chore(core): remove legacy fallback flags and migrate loop detection (#15213)

2026-05-12 21:03:05 -07:00 · 2025-12-17 17:14:33 -05:00
parent 3d486ec1e9
commit bf6d0485ce
17 changed files with 56 additions and 419 deletions
@@ -6,24 +6,20 @@ and provides resilience when the primary model is unavailable.

 ## How it works

-Model routing is not based on prompt complexity, but is a fallback mechanism.
-Here's how it works:
+Model routing is managed by the `ModelAvailabilityService`, which monitors model
+health and automatically routes requests to available models based on defined
+policies.

-1.  **Model failure:** If the currently selected model fails to respond (for
-    example, due to a server error or other issue), the CLI will initiate the
-    fallback process.
+1.  **Model failure:** If the currently selected model fails (e.g., due to quota
+    or server errors), the CLI will iniate the fallback process.

-2.  **User consent:** The CLI will prompt you to ask if you want to switch to
-    the fallback model. This is handled by the `fallbackModelHandler`.
+2.  **User consent:** Depending on the failure and the model's policy, the CLI
+    may prompt you to switch to a fallback model (by default always prompts
+    you).

-3.  **Fallback activation:** If you consent, the CLI will activate the fallback
-    mode by calling `config.setFallbackMode(true)`.
-
-4.  **Model switch:** On the next request, the CLI will use the
-    `DEFAULT_GEMINI_FLASH_MODEL` as the fallback model. This is handled by the
-    `resolveModel` function in
-    `packages/cli/src/zed-integration/zedIntegration.ts` which checks if
-    `isInFallbackMode()` is true.
+3.  **Model switch:** If approved, or if the policy allows for silent fallback,
+    the CLI will use an available fallback model for the current turn or the
+    remainder of the session.

 ### Model selection precedence

@@ -280,7 +280,6 @@ describe('Session', () => {
      getTool: vi.fn().mockReturnValue(mockTool),
    };
    mockConfig = {
-      isInFallbackMode: vi.fn().mockReturnValue(false),
      getModel: vi.fn().mockReturnValue('gemini-pro'),
      getPreviewFeatures: vi.fn().mockReturnValue({}),
      getToolRegistry: vi.fn().mockReturnValue(mockToolRegistry),
@@ -19,7 +19,6 @@ const createMockConfig = (overrides: Partial<Config> = {}): Config =>
    getPreviewFeatures: () => false,
    getUserTier: () => undefined,
    getModel: () => 'gemini-2.5-pro',
-    isInFallbackMode: () => false,
    ...overrides,
  }) as unknown as Config;

@@ -344,10 +344,6 @@ describe('Server Config (config.ts)', () => {
        mockContentConfig,
      );

-      // Set fallback mode to true to ensure it gets reset
-      config.setFallbackMode(true);
-      expect(config.isInFallbackMode()).toBe(true);
-
      await config.refreshAuth(authType);

      expect(createContentGeneratorConfig).toHaveBeenCalledWith(
@@ -357,8 +353,6 @@ describe('Server Config (config.ts)', () => {
      // Verify that contentGeneratorConfig is updated
      expect(config.getContentGeneratorConfig()).toEqual(mockContentConfig);
      expect(GeminiClient).toHaveBeenCalledWith(config);
-      // Verify that fallback mode is reset
-      expect(config.isInFallbackMode()).toBe(false);
    });

    it('should reset model availability status', async () => {
@@ -1569,40 +1563,32 @@ describe('Config getHooks', () => {
  });

  describe('setModel', () => {
-    it('should allow setting a pro (any) model and disable fallback mode', () => {
+    it('should allow setting a pro (any) model and reset availability', () => {
      const config = new Config(baseParams);
      const service = config.getModelAvailabilityService();
      const spy = vi.spyOn(service, 'reset');

-      config.setFallbackMode(true);
-      expect(config.isInFallbackMode()).toBe(true);
-
      const proModel = 'gemini-2.5-pro';
      config.setModel(proModel);

      expect(config.getModel()).toBe(proModel);
-      expect(config.isInFallbackMode()).toBe(false);
      expect(mockCoreEvents.emitModelChanged).toHaveBeenCalledWith(proModel);
      expect(spy).toHaveBeenCalled();
    });

-    it('should allow setting auto model from non-auto model and disable fallback mode', () => {
+    it('should allow setting auto model from non-auto model and reset availability', () => {
      const config = new Config(baseParams);
      const service = config.getModelAvailabilityService();
      const spy = vi.spyOn(service, 'reset');

-      config.setFallbackMode(true);
-      expect(config.isInFallbackMode()).toBe(true);
-
      config.setModel('auto');

      expect(config.getModel()).toBe('auto');
-      expect(config.isInFallbackMode()).toBe(false);
      expect(mockCoreEvents.emitModelChanged).toHaveBeenCalledWith('auto');
      expect(spy).toHaveBeenCalled();
    });

-    it('should allow setting auto model from auto model if it is in the fallback mode', () => {
+    it('should allow setting auto model from auto model and reset availability', () => {
      const config = new Config({
        cwd: '/tmp',
        targetDir: '/path/to/target',
@@ -1614,16 +1600,25 @@ describe('Config getHooks', () => {
      const service = config.getModelAvailabilityService();
      const spy = vi.spyOn(service, 'reset');

-      config.setFallbackMode(true);
-      expect(config.isInFallbackMode()).toBe(true);
-
      config.setModel('auto');

      expect(config.getModel()).toBe('auto');
-      expect(config.isInFallbackMode()).toBe(false);
-      expect(mockCoreEvents.emitModelChanged).toHaveBeenCalledWith('auto');
      expect(spy).toHaveBeenCalled();
    });
+
+    it('should reset active model when setModel is called with the current model after a fallback', () => {
+      const config = new Config(baseParams);
+      const originalModel = config.getModel();
+      const fallbackModel = 'fallback-model';
+
+      config.setActiveModel(fallbackModel);
+      expect(config.getActiveModel()).toBe(fallbackModel);
+
+      config.setModel(originalModel);
+
+      expect(config.getModel()).toBe(originalModel);
+      expect(config.getActiveModel()).toBe(originalModel);
+    });
  });
 });

@@ -388,7 +388,6 @@ export class Config {
  private readonly folderTrust: boolean;
  private ideMode: boolean;

-  private inFallbackMode = false;
  private _activeModel: string;
  private readonly maxSessionTurns: number;
  private readonly listSessions: boolean;
@@ -447,8 +446,6 @@ export class Config {
  private experimentsPromise: Promise<void> | undefined;
  private hookSystem?: HookSystem;

-  private previewModelFallbackMode = false;
-  private previewModelBypassMode = false;
  private readonly enableAgents: boolean;

  private readonly experimentalJitContext: boolean;
@@ -774,9 +771,6 @@ export class Config {
      this.setHasAccessToPreviewModel(true);
    }

-    // Reset the session flag since we're explicitly changing auth and using default model
-    this.inFallbackMode = false;
-
    // Update model if user no longer has access to the preview model
    if (!this.hasAccessToPreviewModel && isPreviewModel(this.model)) {
      this.setModel(DEFAULT_GEMINI_MODEL_AUTO);
@@ -847,13 +841,12 @@ export class Config {
  }

  setModel(newModel: string): void {
-    if (this.model !== newModel || this.inFallbackMode) {
+    if (this.model !== newModel || this._activeModel !== newModel) {
      this.model = newModel;
      // When the user explicitly sets a model, that becomes the active model.
      this._activeModel = newModel;
      coreEvents.emitModelChanged(newModel);
    }
-    this.setFallbackMode(false);
    this.modelAvailabilityService.reset();
  }

@@ -867,18 +860,6 @@ export class Config {
    }
  }

-  resetTurn(): void {
-    this.modelAvailabilityService.resetTurn();
-  }
-
-  isInFallbackMode(): boolean {
-    return this.inFallbackMode;
-  }
-
-  setFallbackMode(active: boolean): void {
-    this.inFallbackMode = active;
-  }
-
  setFallbackModelHandler(handler: FallbackModelHandler): void {
    this.fallbackModelHandler = handler;
  }
@@ -887,20 +868,8 @@ export class Config {
    return this.fallbackModelHandler;
  }

-  isPreviewModelFallbackMode(): boolean {
-    return this.previewModelFallbackMode;
-  }
-
-  setPreviewModelFallbackMode(active: boolean): void {
-    this.previewModelFallbackMode = active;
-  }
-
-  isPreviewModelBypassMode(): boolean {
-    return this.previewModelBypassMode;
-  }
-
-  setPreviewModelBypassMode(active: boolean): void {
-    this.previewModelBypassMode = active;
+  resetTurn(): void {
+    this.modelAvailabilityService.resetTurn();
  }

  getMaxSessionTurns(): number {
@@ -37,27 +37,6 @@ describe('Flash Model Fallback Configuration', () => {
    };
  });

-  // These tests do not actually test fallback. isInFallbackMode() only returns true,
-  // when setFallbackMode is marked as true. This is to decouple setting a model
-  // with the fallback mechanism. This will be necessary we introduce more
-  // intelligent model routing.
-  describe('setModel', () => {
-    it('should only mark as switched if contentGeneratorConfig exists', () => {
-      // Create config without initializing contentGeneratorConfig
-      const newConfig = new Config({
-        sessionId: 'test-session-2',
-        targetDir: '/test',
-        debugMode: false,
-        cwd: '/test',
-        model: DEFAULT_GEMINI_MODEL,
-      });
-
-      // Should not crash when contentGeneratorConfig is undefined
-      newConfig.setModel(DEFAULT_GEMINI_FLASH_MODEL);
-      expect(newConfig.isInFallbackMode()).toBe(false);
-    });
-  });
-
  describe('getModel', () => {
    it('should return contentGeneratorConfig model if available', () => {
      // Simulate initialized content generator config
@@ -78,26 +57,4 @@ describe('Flash Model Fallback Configuration', () => {
      expect(newConfig.getModel()).toBe('custom-model');
    });
  });
-
-  describe('isInFallbackMode', () => {
-    it('should start as false for new session', () => {
-      expect(config.isInFallbackMode()).toBe(false);
-    });
-
-    it('should remain false if no model switch occurs', () => {
-      // Perform other operations that don't involve model switching
-      expect(config.isInFallbackMode()).toBe(false);
-    });
-
-    it('should persist switched state throughout session', () => {
-      config.setModel(DEFAULT_GEMINI_FLASH_MODEL);
-      // Setting state for fallback mode as is expected of clients
-      config.setFallbackMode(true);
-      expect(config.isInFallbackMode()).toBe(true);
-
-      // Should remain true even after getting model
-      config.getModel();
-      expect(config.isInFallbackMode()).toBe(true);
-    });
-  });
 });
@@ -117,7 +117,6 @@ describe('BaseLlmClient', () => {
      setActiveModel: vi.fn(),
      getPreviewFeatures: vi.fn().mockReturnValue(false),
      getUserTier: vi.fn().mockReturnValue(undefined),
-      isInFallbackMode: vi.fn().mockReturnValue(false),
      getModel: vi.fn().mockReturnValue('test-model'),
      getActiveModel: vi.fn().mockReturnValue('test-model'),
    } as unknown as Mocked<Config>;
@@ -30,10 +30,7 @@ import {
  type ChatCompressionInfo,
 } from './turn.js';
 import { getCoreSystemPrompt } from './prompts.js';
-import {
-  DEFAULT_GEMINI_FLASH_MODEL,
-  DEFAULT_GEMINI_MODEL_AUTO,
-} from '../config/models.js';
+import { DEFAULT_GEMINI_MODEL_AUTO } from '../config/models.js';
 import { FileDiscoveryService } from '../services/fileDiscoveryService.js';
 import { setSimulate429 } from '../utils/testUtils.js';
 import { tokenLimit } from './tokenLimits.js';
@@ -234,8 +231,6 @@ describe('Gemini Client (client.ts)', () => {
        .mockReturnValue(mockRouterService as unknown as ModelRouterService),
      getMessageBus: vi.fn().mockReturnValue(undefined),
      getEnableHooks: vi.fn().mockReturnValue(false),
-      isInFallbackMode: vi.fn().mockReturnValue(false),
-      setFallbackMode: vi.fn(),
      getChatCompression: vi.fn().mockReturnValue(undefined),
      getSkipNextSpeakerCheck: vi.fn().mockReturnValue(false),
      getUseSmartEdit: vi.fn().mockReturnValue(false),
@@ -1535,68 +1530,6 @@ ${JSON.stringify(
          expect.any(AbortSignal),
        );
      });
-
-      it('should use the fallback model and bypass routing when in fallback mode', async () => {
-        vi.mocked(mockConfig.isInFallbackMode).mockReturnValue(true);
-        mockRouterService.route.mockResolvedValue({
-          model: DEFAULT_GEMINI_FLASH_MODEL,
-          reason: 'fallback',
-        });
-
-        const stream = client.sendMessageStream(
-          [{ text: 'Hi' }],
-          new AbortController().signal,
-          'prompt-1',
-        );
-        await fromAsync(stream);
-
-        expect(mockTurnRunFn).toHaveBeenCalledWith(
-          { model: DEFAULT_GEMINI_FLASH_MODEL },
-          [{ text: 'Hi' }],
-          expect.any(AbortSignal),
-        );
-      });
-
-      it('should stick to the fallback model for the entire sequence even if fallback mode ends', async () => {
-        // Start the sequence in fallback mode
-        vi.mocked(mockConfig.isInFallbackMode).mockReturnValue(true);
-        mockRouterService.route.mockResolvedValue({
-          model: DEFAULT_GEMINI_FLASH_MODEL,
-          reason: 'fallback',
-        });
-        let stream = client.sendMessageStream(
-          [{ text: 'Hi' }],
-          new AbortController().signal,
-          'prompt-fallback-stickiness',
-        );
-        await fromAsync(stream);
-
-        // First call should use fallback model
-        expect(mockTurnRunFn).toHaveBeenCalledWith(
-          { model: DEFAULT_GEMINI_FLASH_MODEL },
-          [{ text: 'Hi' }],
-          expect.any(AbortSignal),
-        );
-
-        // End fallback mode
-        vi.mocked(mockConfig.isInFallbackMode).mockReturnValue(false);
-
-        // Second call in the same sequence
-        stream = client.sendMessageStream(
-          [{ text: 'Continue' }],
-          new AbortController().signal,
-          'prompt-fallback-stickiness',
-        );
-        await fromAsync(stream);
-
-        // Router should still not be called, and it should stick to the fallback model
-        expect(mockTurnRunFn).toHaveBeenCalledTimes(2); // Ensure it was called again
-        expect(mockTurnRunFn).toHaveBeenLastCalledWith(
-          { model: DEFAULT_GEMINI_FLASH_MODEL }, // Still the fallback model
-          [{ text: 'Continue' }],
-          expect.any(AbortSignal),
-        );
-      });
    });

    it('should recursively call sendMessageStream with "Please continue." when InvalidStream event is received', async () => {
@@ -31,7 +31,6 @@ const mockConfig = {
  getModel: vi.fn().mockReturnValue('gemini-pro'),
  getProxy: vi.fn().mockReturnValue(undefined),
  getUsageStatisticsEnabled: vi.fn().mockReturnValue(true),
-  isInFallbackMode: vi.fn().mockReturnValue(false),
  getPreviewFeatures: vi.fn().mockReturnValue(false),
 } as unknown as Config;

@@ -120,7 +119,6 @@ describe('createContentGenerator', () => {
      getModel: vi.fn().mockReturnValue('gemini-pro'),
      getProxy: vi.fn().mockReturnValue(undefined),
      getUsageStatisticsEnabled: () => true,
-      isInFallbackMode: vi.fn().mockReturnValue(false),
      getPreviewFeatures: vi.fn().mockReturnValue(false),
    } as unknown as Config;

@@ -189,7 +187,6 @@ describe('createContentGenerator', () => {
      getModel: vi.fn().mockReturnValue('gemini-pro'),
      getProxy: vi.fn().mockReturnValue(undefined),
      getUsageStatisticsEnabled: () => false,
-      isInFallbackMode: vi.fn().mockReturnValue(false),
      getPreviewFeatures: vi.fn().mockReturnValue(false),
    } as unknown as Config;

@@ -237,7 +234,6 @@ describe('createContentGenerator', () => {
      getModel: vi.fn().mockReturnValue('gemini-pro'),
      getProxy: vi.fn().mockReturnValue(undefined),
      getUsageStatisticsEnabled: () => false,
-      isInFallbackMode: vi.fn().mockReturnValue(false),
      getPreviewFeatures: vi.fn().mockReturnValue(false),
    } as unknown as Config;

@@ -272,7 +268,6 @@ describe('createContentGenerator', () => {
      getModel: vi.fn().mockReturnValue('gemini-pro'),
      getProxy: vi.fn().mockReturnValue(undefined),
      getUsageStatisticsEnabled: () => false,
-      isInFallbackMode: vi.fn().mockReturnValue(false),
      getPreviewFeatures: vi.fn().mockReturnValue(false),
    } as unknown as Config;

@@ -315,7 +310,6 @@ describe('createContentGenerator', () => {
    const mockConfig = {
      getModel: vi.fn().mockReturnValue('gemini-pro'),
      getUsageStatisticsEnabled: () => false,
-      isInFallbackMode: vi.fn().mockReturnValue(false),
      getPreviewFeatures: vi.fn().mockReturnValue(false),
    } as unknown as Config;
    const mockGenerator = {
@@ -17,12 +17,7 @@ import {
 } from './geminiChat.js';
 import type { Config } from '../config/config.js';
 import { setSimulate429 } from '../utils/testUtils.js';
-import {
-  DEFAULT_GEMINI_FLASH_MODEL,
-  DEFAULT_THINKING_MODE,
-  PREVIEW_GEMINI_MODEL,
-  PREVIEW_GEMINI_FLASH_MODEL,
-} from '../config/models.js';
+import { DEFAULT_THINKING_MODE } from '../config/models.js';
 import { AuthType } from './contentGenerator.js';
 import { TerminalQuotaError } from '../utils/googleQuotaErrors.js';
 import { type RetryOptions } from '../utils/retry.js';
@@ -146,7 +141,6 @@ describe('GeminiChat', () => {
        // When model is explicitly set, active model usually resets or updates to it
        currentActiveModel = m;
      }),
-      isInFallbackMode: vi.fn().mockReturnValue(false),
      getQuotaErrorOccurred: vi.fn().mockReturnValue(false),
      setQuotaErrorOccurred: vi.fn(),
      flashFallbackHandler: undefined,
@@ -179,10 +173,6 @@ describe('GeminiChat', () => {
          };
        }),
      },
-      isPreviewModelBypassMode: vi.fn().mockReturnValue(false),
-      setPreviewModelBypassMode: vi.fn(),
-      isPreviewModelFallbackMode: vi.fn().mockReturnValue(false),
-      setPreviewModelFallbackMode: vi.fn(),
      isInteractive: vi.fn().mockReturnValue(false),
      getEnableHooks: vi.fn().mockReturnValue(false),
      getActiveModel: vi.fn().mockImplementation(() => currentActiveModel),
@@ -548,105 +538,6 @@ describe('GeminiChat', () => {
      );
    });

-    it('should use maxAttempts=1 for retryWithBackoff when in Preview Model Fallback Mode', async () => {
-      vi.mocked(mockConfig.isPreviewModelFallbackMode).mockReturnValue(true);
-      vi.mocked(mockContentGenerator.generateContentStream).mockResolvedValue(
-        (async function* () {
-          yield {
-            candidates: [
-              {
-                content: { parts: [{ text: 'Success' }] },
-                finishReason: 'STOP',
-              },
-            ],
-          } as unknown as GenerateContentResponse;
-        })(),
-      );
-
-      const stream = await chat.sendMessageStream(
-        { model: PREVIEW_GEMINI_MODEL },
-        'test',
-        'prompt-id-fast-retry',
-        new AbortController().signal,
-      );
-      for await (const _ of stream) {
-        // consume stream
-      }
-
-      expect(mockRetryWithBackoff).toHaveBeenCalledWith(
-        expect.any(Function),
-        expect.objectContaining({
-          maxAttempts: 1,
-        }),
-      );
-    });
-
-    it('should use maxAttempts=1 for retryWithBackoff when in Preview Model Fallback Mode (Flash)', async () => {
-      vi.mocked(mockConfig.isPreviewModelFallbackMode).mockReturnValue(true);
-      vi.mocked(mockContentGenerator.generateContentStream).mockResolvedValue(
-        (async function* () {
-          yield {
-            candidates: [
-              {
-                content: { parts: [{ text: 'Success' }] },
-                finishReason: 'STOP',
-              },
-            ],
-          } as unknown as GenerateContentResponse;
-        })(),
-      );
-
-      const stream = await chat.sendMessageStream(
-        { model: PREVIEW_GEMINI_FLASH_MODEL },
-        'test',
-        'prompt-id-fast-retry-flash',
-        new AbortController().signal,
-      );
-      for await (const _ of stream) {
-        // consume stream
-      }
-
-      expect(mockRetryWithBackoff).toHaveBeenCalledWith(
-        expect.any(Function),
-        expect.objectContaining({
-          maxAttempts: 1,
-        }),
-      );
-    });
-
-    it('should NOT use maxAttempts=1 for other models even in Preview Model Fallback Mode', async () => {
-      vi.mocked(mockConfig.isPreviewModelFallbackMode).mockReturnValue(true);
-      vi.mocked(mockContentGenerator.generateContentStream).mockResolvedValue(
-        (async function* () {
-          yield {
-            candidates: [
-              {
-                content: { parts: [{ text: 'Success' }] },
-                finishReason: 'STOP',
-              },
-            ],
-          } as unknown as GenerateContentResponse;
-        })(),
-      );
-
-      const stream = await chat.sendMessageStream(
-        { model: DEFAULT_GEMINI_FLASH_MODEL },
-        'test',
-        'prompt-id-normal-retry',
-        new AbortController().signal,
-      );
-      for await (const _ of stream) {
-        // consume stream
-      }
-
-      expect(mockRetryWithBackoff).toHaveBeenCalledWith(
-        expect.any(Function),
-        expect.objectContaining({
-          maxAttempts: undefined, // Should use default
-        }),
-      );
-    });
-
    it('should throw an error when a tool call is followed by an empty stream response', async () => {
      // 1. Setup: A history where the model has just made a function call.
      const initialHistory: Content[] = [
@@ -1880,9 +1771,6 @@ describe('GeminiChat', () => {
        authType,
      });

-      const isInFallbackModeSpy = vi.spyOn(mockConfig, 'isInFallbackMode');
-      isInFallbackModeSpy.mockReturnValue(false);
-
      vi.mocked(mockContentGenerator.generateContentStream)
        .mockRejectedValueOnce(error429) // Attempt 1 fails
        .mockResolvedValueOnce(
@@ -1899,10 +1787,9 @@ describe('GeminiChat', () => {
          })(),
        );

-      mockHandleFallback.mockImplementation(async () => {
-        isInFallbackModeSpy.mockReturnValue(true);
-        return true; // Signal retry
-      });
+      mockHandleFallback.mockImplementation(
+        async () => true, // Signal retry
+      );

      const stream = await chat.sendMessageStream(
        { model: 'test-model' },
@@ -1931,34 +1818,6 @@ describe('GeminiChat', () => {
      const modelTurn = history[1];
      expect(modelTurn.parts![0].text).toBe('Success on retry');
    });
-
-    it('should stop retrying if handleFallback returns false (e.g., auth intent)', async () => {
-      vi.mocked(mockConfig.getModel).mockReturnValue('gemini-pro');
-      vi.mocked(mockContentGenerator.generateContentStream).mockRejectedValue(
-        error429,
-      );
-      mockHandleFallback.mockResolvedValue(false);
-
-      const stream = await chat.sendMessageStream(
-        { model: 'gemini-2.0-flash' },
-        'test stop',
-        'prompt-id-fb2',
-        new AbortController().signal,
-      );
-
-      await expect(
-        (async () => {
-          for await (const _ of stream) {
-            /* consume stream */
-          }
-        })(),
-      ).rejects.toThrow(error429);
-
-      expect(mockContentGenerator.generateContentStream).toHaveBeenCalledTimes(
-        1,
-      );
-      expect(mockHandleFallback).toHaveBeenCalledTimes(1);
-    });
  });

  it('should discard valid partial content from a failed attempt upon retry', async () => {
@@ -263,10 +263,6 @@ export class GeminiChat {
  ): Promise<AsyncGenerator<StreamEvent>> {
    await this.sendPromise;

-    // Preview Model Bypass mode for the new request.
-    // This ensures that we attempt to use Preview Model for every new user turn
-    // (unless the "Always" fallback mode is active, which is handled separately).
-    this.config.setPreviewModelBypassMode(false);
    let streamDoneResolver: () => void;
    const streamDonePromise = new Promise<void>((resolve) => {
      streamDoneResolver = resolve;
@@ -299,12 +295,7 @@ export class GeminiChat {
      try {
        let lastError: unknown = new Error('Request failed after all retries.');

-        let maxAttempts = INVALID_CONTENT_RETRY_OPTIONS.maxAttempts;
-        // If we are in Preview Model Fallback Mode, we want to fail fast (1 attempt)
-        // when probing the Preview Model.
-        if (this.config.isPreviewModelFallbackMode() && isPreviewModel(model)) {
-          maxAttempts = 1;
-        }
+        const maxAttempts = INVALID_CONTENT_RETRY_OPTIONS.maxAttempts;

        for (let attempt = 0; attempt < maxAttempts; attempt++) {
          let isConnectionPhase = true;
@@ -378,15 +369,6 @@ export class GeminiChat {
            );
          }
          throw lastError;
-        } else {
-          // Preview Model successfully used, disable fallback mode.
-          // We only do this if we didn't bypass Preview Model (i.e. we actually used it).
-          if (
-            isPreviewModel(model) &&
-            !this.config.isPreviewModelBypassMode()
-          ) {
-            this.config.setPreviewModelFallbackMode(false);
-          }
        }
      } finally {
        streamDoneResolver!();
@@ -548,12 +530,7 @@ export class GeminiChat {
      authType: this.config.getContentGeneratorConfig()?.authType,
      retryFetchErrors: this.config.getRetryFetchErrors(),
      signal: abortSignal,
-      maxAttempts:
-        availabilityMaxAttempts ??
-        (this.config.isPreviewModelFallbackMode() &&
-        isPreviewModel(lastModelToUse)
-          ? 1
-          : undefined),
+      maxAttempts: availabilityMaxAttempts,
      getAvailabilityContext,
    });

@@ -81,7 +81,6 @@ describe('GeminiChat Network Retries', () => {
      getModel: vi.fn().mockReturnValue('gemini-pro'),
      getActiveModel: vi.fn().mockReturnValue('gemini-pro'),
      setActiveModel: vi.fn(),
-      isInFallbackMode: vi.fn().mockReturnValue(false),
      getQuotaErrorOccurred: vi.fn().mockReturnValue(false),
      getProjectRoot: vi.fn().mockReturnValue('/test/project/root'),
      storage: {
@@ -96,11 +95,7 @@ describe('GeminiChat Network Retries', () => {
          generateContentConfig: { temperature: 0 },
        })),
      },
-      isPreviewModelBypassMode: vi.fn().mockReturnValue(false),
-      setPreviewModelBypassMode: vi.fn(),
-      isPreviewModelFallbackMode: vi.fn().mockReturnValue(false),
      getEnableHooks: vi.fn().mockReturnValue(false),
-      setPreviewModelFallbackMode: vi.fn(),
      getModelAvailabilityService: vi
        .fn()
        .mockReturnValue(createAvailabilityServiceMock()),
@@ -69,7 +69,6 @@ describe('Core System Prompt (prompts.ts)', () => {
      getModel: vi.fn().mockReturnValue(DEFAULT_GEMINI_MODEL_AUTO),
      getActiveModel: vi.fn().mockReturnValue(DEFAULT_GEMINI_MODEL),
      getPreviewFeatures: vi.fn().mockReturnValue(false),
-      isInFallbackMode: vi.fn().mockReturnValue(false),
      getAgentRegistry: vi.fn().mockReturnValue({
        getDirectoryContext: vi.fn().mockReturnValue('Mock Agent Directory'),
      }),
@@ -173,7 +172,6 @@ describe('Core System Prompt (prompts.ts)', () => {
        getModel: vi.fn().mockReturnValue('auto'),
        getActiveModel: vi.fn().mockReturnValue(DEFAULT_GEMINI_MODEL),
        getPreviewFeatures: vi.fn().mockReturnValue(false),
-        isInFallbackMode: vi.fn().mockReturnValue(false),
        getAgentRegistry: vi.fn().mockReturnValue({
          getDirectoryContext: vi.fn().mockReturnValue('Mock Agent Directory'),
        }),
@@ -29,7 +29,6 @@ import {
 } from '../config/models.js';
 import type { FallbackModelHandler } from './types.js';
 import { openBrowserSecurely } from '../utils/secure-browser-launcher.js';
-import { coreEvents } from '../utils/events.js';
 import { debugLogger } from '../utils/debugLogger.js';
 import * as policyHelpers from '../availability/policyHelpers.js';
 import { createDefaultPolicy } from '../availability/policyCatalog.js';
@@ -63,12 +62,6 @@ const AUTH_API_KEY = AuthType.USE_GEMINI;

 const createMockConfig = (overrides: Partial<Config> = {}): Config =>
  ({
-    isInFallbackMode: vi.fn(() => false),
-    setFallbackMode: vi.fn(),
-    isPreviewModelFallbackMode: vi.fn(() => false),
-    setPreviewModelFallbackMode: vi.fn(),
-    isPreviewModelBypassMode: vi.fn(() => false),
-    setPreviewModelBypassMode: vi.fn(),
    fallbackHandler: undefined,
    getFallbackModelHandler: vi.fn(),
    setActiveModel: vi.fn(),
@@ -90,7 +83,6 @@ describe('handleFallback', () => {
  let mockConfig: Config;
  let mockHandler: Mock<FallbackModelHandler>;
  let consoleErrorSpy: MockInstance;
-  let fallbackEventSpy: MockInstance;

  beforeEach(() => {
    vi.clearAllMocks();
@@ -106,12 +98,10 @@ describe('handleFallback', () => {
    // But tests might check console.error usage in legacy code if any?
    // The handler uses console.error in legacyHandleFallback.
    consoleErrorSpy = vi.spyOn(console, 'error').mockImplementation(() => {});
-    fallbackEventSpy = vi.spyOn(coreEvents, 'emitFallbackModeChanged');
  });

  afterEach(() => {
    consoleErrorSpy.mockRestore();
-    fallbackEventSpy.mockRestore();
  });

  describe('policy-driven flow', () => {
@@ -211,14 +201,6 @@ describe('handleFallback', () => {
        expect(policyConfig.setActiveModel).toHaveBeenCalledWith(
          DEFAULT_GEMINI_FLASH_MODEL,
        );
-        // Silent actions should not trigger the legacy fallback mode (via activateFallbackMode),
-        // but setActiveModel might trigger it via legacy sync if it switches to Flash.
-        // However, the test requirement is "doesn't emit fallback mode".
-        // Since we are mocking setActiveModel, we can verify setFallbackMode isn't called *independently*.
-        // But setActiveModel is mocked, so it won't trigger side effects unless the implementation does.
-        // We verified setActiveModel is called.
-        // We verify setFallbackMode is NOT called (which would happen if activateFallbackMode was called).
-        expect(policyConfig.setFallbackMode).not.toHaveBeenCalled();
      } finally {
        chainSpy.mockRestore();
      }
@@ -410,7 +392,6 @@ describe('handleFallback', () => {

      expect(result).toBe(true);
      expect(policyConfig.setActiveModel).toHaveBeenCalledWith(FALLBACK_MODEL);
-      expect(policyConfig.setFallbackMode).not.toHaveBeenCalled();
      // TODO: add logging expect statement
    });

@@ -18,6 +18,7 @@ import { GeminiEventType } from '../core/turn.js';
 import * as loggers from '../telemetry/loggers.js';
 import { LoopType } from '../telemetry/types.js';
 import { LoopDetectionService } from './loopDetectionService.js';
+import { createAvailabilityServiceMock } from '../availability/testUtils.js';

 vi.mock('../telemetry/loggers.js', () => ({
  logLoopDetected: vi.fn(),
@@ -37,6 +38,9 @@ describe('LoopDetectionService', () => {
    mockConfig = {
      getTelemetryEnabled: () => true,
      isInteractive: () => false,
+      getModelAvailabilityService: vi
+        .fn()
+        .mockReturnValue(createAvailabilityServiceMock()),
    } as unknown as Config;
    service = new LoopDetectionService(mockConfig);
    vi.clearAllMocks();
@@ -732,13 +736,15 @@ describe('LoopDetectionService LLM Checks', () => {
      generateJson: vi.fn(),
    } as unknown as BaseLlmClient;

+    const mockAvailability = createAvailabilityServiceMock();
+    vi.mocked(mockAvailability.snapshot).mockReturnValue({ available: true });
+
    mockConfig = {
      getGeminiClient: () => mockGeminiClient,
      getBaseLlmClient: () => mockBaseLlmClient,
      getDebugMode: () => false,
      getTelemetryEnabled: () => true,
      getModel: vi.fn().mockReturnValue('cognitive-loop-v1'),
-      isInFallbackMode: vi.fn().mockReturnValue(false),
      modelConfigService: {
        getResolvedConfig: vi.fn().mockImplementation((key) => {
          if (key.model === 'loop-detection') {
@@ -751,6 +757,7 @@ describe('LoopDetectionService LLM Checks', () => {
        }),
      },
      isInteractive: () => false,
+      getModelAvailabilityService: vi.fn().mockReturnValue(mockAvailability),
    } as unknown as Config;

    service = new LoopDetectionService(mockConfig);
@@ -901,9 +908,6 @@ describe('LoopDetectionService LLM Checks', () => {
  });

  it('should detect a loop when confidence is exactly equal to the threshold (0.9)', async () => {
-    // Mock isInFallbackMode to false so it double checks
-    vi.mocked(mockConfig.isInFallbackMode).mockReturnValue(false);
-
    mockBaseLlmClient.generateJson = vi
      .fn()
      .mockResolvedValueOnce({
@@ -944,9 +948,6 @@ describe('LoopDetectionService LLM Checks', () => {
  });

  it('should not detect a loop when Flash is confident (0.9) but Main model is not (0.89)', async () => {
-    // Mock isInFallbackMode to false so it double checks
-    vi.mocked(mockConfig.isInFallbackMode).mockReturnValue(false);
-
    mockBaseLlmClient.generateJson = vi
      .fn()
      .mockResolvedValueOnce({
@@ -988,9 +989,13 @@ describe('LoopDetectionService LLM Checks', () => {
    expect(mockBaseLlmClient.generateJson).toHaveBeenCalledTimes(3);
  });

-  it('should only call Flash model if in fallback mode', async () => {
-    // Mock isInFallbackMode to true
-    vi.mocked(mockConfig.isInFallbackMode).mockReturnValue(true);
+  it('should only call Flash model if main model is unavailable', async () => {
+    // Mock availability to return unavailable for the main model
+    const availability = mockConfig.getModelAvailabilityService();
+    vi.mocked(availability.snapshot).mockReturnValue({
+      available: false,
+      reason: 'quota',
+    });

    mockBaseLlmClient.generateJson = vi.fn().mockResolvedValueOnce({
      unproductive_state_confidence: 0.9,
@@ -472,7 +472,9 @@ export class LoopDetectionService {
      return false;
    }

-    if (this.config.isInFallbackMode()) {
+    const availability = this.config.getModelAvailabilityService();
+
+    if (!availability.snapshot(doubleCheckModelName).available) {
      const flashModelName = this.config.modelConfigService.getResolvedConfig({
        model: 'loop-detection',
      }).model;
@@ -34,16 +34,6 @@ export interface UserFeedbackPayload {
  error?: unknown;
 }

-/**
- * Payload for the 'fallback-mode-changed' event.
- */
-export interface FallbackModeChangedPayload {
-  /**
-   * Whether fallback mode is now active.
-   */
-  isInFallbackMode: boolean;
-}
-
 /**
 * Payload for the 'model-changed' event.
 */
@@ -78,7 +68,6 @@ export type MemoryChangedPayload = LoadServerHierarchicalMemoryResponse;

 export enum CoreEvent {
  UserFeedback = 'user-feedback',
-  FallbackModeChanged = 'fallback-mode-changed',
  ModelChanged = 'model-changed',
  ConsoleLog = 'console-log',
  Output = 'output',
@@ -88,7 +77,6 @@ export enum CoreEvent {

 export interface CoreEvents {
  [CoreEvent.UserFeedback]: [UserFeedbackPayload];
-  [CoreEvent.FallbackModeChanged]: [FallbackModeChangedPayload];
  [CoreEvent.ModelChanged]: [ModelChangedPayload];
  [CoreEvent.ConsoleLog]: [ConsoleLogPayload];
  [CoreEvent.Output]: [OutputPayload];
@@ -166,15 +154,6 @@ export class CoreEventEmitter extends EventEmitter<CoreEvents> {
    this._emitOrQueue(CoreEvent.Output, payload);
  }

-  /**
-   * Notifies subscribers that fallback mode has changed.
-   * This is synchronous and doesn't use backlog (UI should already be initialized).
-   */
-  emitFallbackModeChanged(isInFallbackMode: boolean): void {
-    const payload: FallbackModeChangedPayload = { isInFallbackMode };
-    this.emit(CoreEvent.FallbackModeChanged, payload);
-  }
-
  /**
   * Notifies subscribers that the model has changed.
   */