From bf6d0485ce67dec84437ded26a2a8802e3f3c353 Mon Sep 17 00:00:00 2001 From: Adam Weidman <65992621+adamfweidman@users.noreply.github.com> Date: Wed, 17 Dec 2025 17:14:33 -0500 Subject: [PATCH] chore(core): remove legacy fallback flags and migrate loop detection (#15213) --- docs/cli/model-routing.md | 26 ++- .../zed-integration/zedIntegration.test.ts | 1 - .../src/availability/policyHelpers.test.ts | 1 - packages/core/src/config/config.test.ts | 39 ++--- packages/core/src/config/config.ts | 37 +---- .../core/src/config/flashFallback.test.ts | 43 ----- packages/core/src/core/baseLlmClient.test.ts | 1 - packages/core/src/core/client.test.ts | 69 +------- .../core/src/core/contentGenerator.test.ts | 6 - packages/core/src/core/geminiChat.test.ts | 149 +----------------- packages/core/src/core/geminiChat.ts | 27 +--- .../src/core/geminiChat_network_retry.test.ts | 5 - packages/core/src/core/prompts.test.ts | 2 - packages/core/src/fallback/handler.test.ts | 19 --- .../src/services/loopDetectionService.test.ts | 25 +-- .../core/src/services/loopDetectionService.ts | 4 +- packages/core/src/utils/events.ts | 21 --- 17 files changed, 56 insertions(+), 419 deletions(-) diff --git a/docs/cli/model-routing.md b/docs/cli/model-routing.md index 882850fd9d..ed5d4eb0bc 100644 --- a/docs/cli/model-routing.md +++ b/docs/cli/model-routing.md @@ -6,24 +6,20 @@ and provides resilience when the primary model is unavailable. ## How it works -Model routing is not based on prompt complexity, but is a fallback mechanism. -Here's how it works: +Model routing is managed by the `ModelAvailabilityService`, which monitors model +health and automatically routes requests to available models based on defined +policies. -1. **Model failure:** If the currently selected model fails to respond (for - example, due to a server error or other issue), the CLI will initiate the - fallback process. +1. **Model failure:** If the currently selected model fails (e.g., due to quota + or server errors), the CLI will iniate the fallback process. -2. **User consent:** The CLI will prompt you to ask if you want to switch to - the fallback model. This is handled by the `fallbackModelHandler`. +2. **User consent:** Depending on the failure and the model's policy, the CLI + may prompt you to switch to a fallback model (by default always prompts + you). -3. **Fallback activation:** If you consent, the CLI will activate the fallback - mode by calling `config.setFallbackMode(true)`. - -4. **Model switch:** On the next request, the CLI will use the - `DEFAULT_GEMINI_FLASH_MODEL` as the fallback model. This is handled by the - `resolveModel` function in - `packages/cli/src/zed-integration/zedIntegration.ts` which checks if - `isInFallbackMode()` is true. +3. **Model switch:** If approved, or if the policy allows for silent fallback, + the CLI will use an available fallback model for the current turn or the + remainder of the session. ### Model selection precedence diff --git a/packages/cli/src/zed-integration/zedIntegration.test.ts b/packages/cli/src/zed-integration/zedIntegration.test.ts index 5cae1b37d0..0a6341a158 100644 --- a/packages/cli/src/zed-integration/zedIntegration.test.ts +++ b/packages/cli/src/zed-integration/zedIntegration.test.ts @@ -280,7 +280,6 @@ describe('Session', () => { getTool: vi.fn().mockReturnValue(mockTool), }; mockConfig = { - isInFallbackMode: vi.fn().mockReturnValue(false), getModel: vi.fn().mockReturnValue('gemini-pro'), getPreviewFeatures: vi.fn().mockReturnValue({}), getToolRegistry: vi.fn().mockReturnValue(mockToolRegistry), diff --git a/packages/core/src/availability/policyHelpers.test.ts b/packages/core/src/availability/policyHelpers.test.ts index 53a7734b8b..026d09f123 100644 --- a/packages/core/src/availability/policyHelpers.test.ts +++ b/packages/core/src/availability/policyHelpers.test.ts @@ -19,7 +19,6 @@ const createMockConfig = (overrides: Partial = {}): Config => getPreviewFeatures: () => false, getUserTier: () => undefined, getModel: () => 'gemini-2.5-pro', - isInFallbackMode: () => false, ...overrides, }) as unknown as Config; diff --git a/packages/core/src/config/config.test.ts b/packages/core/src/config/config.test.ts index 20212e46bb..e0d381a625 100644 --- a/packages/core/src/config/config.test.ts +++ b/packages/core/src/config/config.test.ts @@ -344,10 +344,6 @@ describe('Server Config (config.ts)', () => { mockContentConfig, ); - // Set fallback mode to true to ensure it gets reset - config.setFallbackMode(true); - expect(config.isInFallbackMode()).toBe(true); - await config.refreshAuth(authType); expect(createContentGeneratorConfig).toHaveBeenCalledWith( @@ -357,8 +353,6 @@ describe('Server Config (config.ts)', () => { // Verify that contentGeneratorConfig is updated expect(config.getContentGeneratorConfig()).toEqual(mockContentConfig); expect(GeminiClient).toHaveBeenCalledWith(config); - // Verify that fallback mode is reset - expect(config.isInFallbackMode()).toBe(false); }); it('should reset model availability status', async () => { @@ -1569,40 +1563,32 @@ describe('Config getHooks', () => { }); describe('setModel', () => { - it('should allow setting a pro (any) model and disable fallback mode', () => { + it('should allow setting a pro (any) model and reset availability', () => { const config = new Config(baseParams); const service = config.getModelAvailabilityService(); const spy = vi.spyOn(service, 'reset'); - config.setFallbackMode(true); - expect(config.isInFallbackMode()).toBe(true); - const proModel = 'gemini-2.5-pro'; config.setModel(proModel); expect(config.getModel()).toBe(proModel); - expect(config.isInFallbackMode()).toBe(false); expect(mockCoreEvents.emitModelChanged).toHaveBeenCalledWith(proModel); expect(spy).toHaveBeenCalled(); }); - it('should allow setting auto model from non-auto model and disable fallback mode', () => { + it('should allow setting auto model from non-auto model and reset availability', () => { const config = new Config(baseParams); const service = config.getModelAvailabilityService(); const spy = vi.spyOn(service, 'reset'); - config.setFallbackMode(true); - expect(config.isInFallbackMode()).toBe(true); - config.setModel('auto'); expect(config.getModel()).toBe('auto'); - expect(config.isInFallbackMode()).toBe(false); expect(mockCoreEvents.emitModelChanged).toHaveBeenCalledWith('auto'); expect(spy).toHaveBeenCalled(); }); - it('should allow setting auto model from auto model if it is in the fallback mode', () => { + it('should allow setting auto model from auto model and reset availability', () => { const config = new Config({ cwd: '/tmp', targetDir: '/path/to/target', @@ -1614,16 +1600,25 @@ describe('Config getHooks', () => { const service = config.getModelAvailabilityService(); const spy = vi.spyOn(service, 'reset'); - config.setFallbackMode(true); - expect(config.isInFallbackMode()).toBe(true); - config.setModel('auto'); expect(config.getModel()).toBe('auto'); - expect(config.isInFallbackMode()).toBe(false); - expect(mockCoreEvents.emitModelChanged).toHaveBeenCalledWith('auto'); expect(spy).toHaveBeenCalled(); }); + + it('should reset active model when setModel is called with the current model after a fallback', () => { + const config = new Config(baseParams); + const originalModel = config.getModel(); + const fallbackModel = 'fallback-model'; + + config.setActiveModel(fallbackModel); + expect(config.getActiveModel()).toBe(fallbackModel); + + config.setModel(originalModel); + + expect(config.getModel()).toBe(originalModel); + expect(config.getActiveModel()).toBe(originalModel); + }); }); }); diff --git a/packages/core/src/config/config.ts b/packages/core/src/config/config.ts index 711c11edb9..934d8b88cf 100644 --- a/packages/core/src/config/config.ts +++ b/packages/core/src/config/config.ts @@ -388,7 +388,6 @@ export class Config { private readonly folderTrust: boolean; private ideMode: boolean; - private inFallbackMode = false; private _activeModel: string; private readonly maxSessionTurns: number; private readonly listSessions: boolean; @@ -447,8 +446,6 @@ export class Config { private experimentsPromise: Promise | undefined; private hookSystem?: HookSystem; - private previewModelFallbackMode = false; - private previewModelBypassMode = false; private readonly enableAgents: boolean; private readonly experimentalJitContext: boolean; @@ -774,9 +771,6 @@ export class Config { this.setHasAccessToPreviewModel(true); } - // Reset the session flag since we're explicitly changing auth and using default model - this.inFallbackMode = false; - // Update model if user no longer has access to the preview model if (!this.hasAccessToPreviewModel && isPreviewModel(this.model)) { this.setModel(DEFAULT_GEMINI_MODEL_AUTO); @@ -847,13 +841,12 @@ export class Config { } setModel(newModel: string): void { - if (this.model !== newModel || this.inFallbackMode) { + if (this.model !== newModel || this._activeModel !== newModel) { this.model = newModel; // When the user explicitly sets a model, that becomes the active model. this._activeModel = newModel; coreEvents.emitModelChanged(newModel); } - this.setFallbackMode(false); this.modelAvailabilityService.reset(); } @@ -867,18 +860,6 @@ export class Config { } } - resetTurn(): void { - this.modelAvailabilityService.resetTurn(); - } - - isInFallbackMode(): boolean { - return this.inFallbackMode; - } - - setFallbackMode(active: boolean): void { - this.inFallbackMode = active; - } - setFallbackModelHandler(handler: FallbackModelHandler): void { this.fallbackModelHandler = handler; } @@ -887,20 +868,8 @@ export class Config { return this.fallbackModelHandler; } - isPreviewModelFallbackMode(): boolean { - return this.previewModelFallbackMode; - } - - setPreviewModelFallbackMode(active: boolean): void { - this.previewModelFallbackMode = active; - } - - isPreviewModelBypassMode(): boolean { - return this.previewModelBypassMode; - } - - setPreviewModelBypassMode(active: boolean): void { - this.previewModelBypassMode = active; + resetTurn(): void { + this.modelAvailabilityService.resetTurn(); } getMaxSessionTurns(): number { diff --git a/packages/core/src/config/flashFallback.test.ts b/packages/core/src/config/flashFallback.test.ts index 5665a7e004..b86ae68d3c 100644 --- a/packages/core/src/config/flashFallback.test.ts +++ b/packages/core/src/config/flashFallback.test.ts @@ -37,27 +37,6 @@ describe('Flash Model Fallback Configuration', () => { }; }); - // These tests do not actually test fallback. isInFallbackMode() only returns true, - // when setFallbackMode is marked as true. This is to decouple setting a model - // with the fallback mechanism. This will be necessary we introduce more - // intelligent model routing. - describe('setModel', () => { - it('should only mark as switched if contentGeneratorConfig exists', () => { - // Create config without initializing contentGeneratorConfig - const newConfig = new Config({ - sessionId: 'test-session-2', - targetDir: '/test', - debugMode: false, - cwd: '/test', - model: DEFAULT_GEMINI_MODEL, - }); - - // Should not crash when contentGeneratorConfig is undefined - newConfig.setModel(DEFAULT_GEMINI_FLASH_MODEL); - expect(newConfig.isInFallbackMode()).toBe(false); - }); - }); - describe('getModel', () => { it('should return contentGeneratorConfig model if available', () => { // Simulate initialized content generator config @@ -78,26 +57,4 @@ describe('Flash Model Fallback Configuration', () => { expect(newConfig.getModel()).toBe('custom-model'); }); }); - - describe('isInFallbackMode', () => { - it('should start as false for new session', () => { - expect(config.isInFallbackMode()).toBe(false); - }); - - it('should remain false if no model switch occurs', () => { - // Perform other operations that don't involve model switching - expect(config.isInFallbackMode()).toBe(false); - }); - - it('should persist switched state throughout session', () => { - config.setModel(DEFAULT_GEMINI_FLASH_MODEL); - // Setting state for fallback mode as is expected of clients - config.setFallbackMode(true); - expect(config.isInFallbackMode()).toBe(true); - - // Should remain true even after getting model - config.getModel(); - expect(config.isInFallbackMode()).toBe(true); - }); - }); }); diff --git a/packages/core/src/core/baseLlmClient.test.ts b/packages/core/src/core/baseLlmClient.test.ts index a14bdc8307..bcb701e739 100644 --- a/packages/core/src/core/baseLlmClient.test.ts +++ b/packages/core/src/core/baseLlmClient.test.ts @@ -117,7 +117,6 @@ describe('BaseLlmClient', () => { setActiveModel: vi.fn(), getPreviewFeatures: vi.fn().mockReturnValue(false), getUserTier: vi.fn().mockReturnValue(undefined), - isInFallbackMode: vi.fn().mockReturnValue(false), getModel: vi.fn().mockReturnValue('test-model'), getActiveModel: vi.fn().mockReturnValue('test-model'), } as unknown as Mocked; diff --git a/packages/core/src/core/client.test.ts b/packages/core/src/core/client.test.ts index 2f70e03879..3f496564ab 100644 --- a/packages/core/src/core/client.test.ts +++ b/packages/core/src/core/client.test.ts @@ -30,10 +30,7 @@ import { type ChatCompressionInfo, } from './turn.js'; import { getCoreSystemPrompt } from './prompts.js'; -import { - DEFAULT_GEMINI_FLASH_MODEL, - DEFAULT_GEMINI_MODEL_AUTO, -} from '../config/models.js'; +import { DEFAULT_GEMINI_MODEL_AUTO } from '../config/models.js'; import { FileDiscoveryService } from '../services/fileDiscoveryService.js'; import { setSimulate429 } from '../utils/testUtils.js'; import { tokenLimit } from './tokenLimits.js'; @@ -234,8 +231,6 @@ describe('Gemini Client (client.ts)', () => { .mockReturnValue(mockRouterService as unknown as ModelRouterService), getMessageBus: vi.fn().mockReturnValue(undefined), getEnableHooks: vi.fn().mockReturnValue(false), - isInFallbackMode: vi.fn().mockReturnValue(false), - setFallbackMode: vi.fn(), getChatCompression: vi.fn().mockReturnValue(undefined), getSkipNextSpeakerCheck: vi.fn().mockReturnValue(false), getUseSmartEdit: vi.fn().mockReturnValue(false), @@ -1535,68 +1530,6 @@ ${JSON.stringify( expect.any(AbortSignal), ); }); - - it('should use the fallback model and bypass routing when in fallback mode', async () => { - vi.mocked(mockConfig.isInFallbackMode).mockReturnValue(true); - mockRouterService.route.mockResolvedValue({ - model: DEFAULT_GEMINI_FLASH_MODEL, - reason: 'fallback', - }); - - const stream = client.sendMessageStream( - [{ text: 'Hi' }], - new AbortController().signal, - 'prompt-1', - ); - await fromAsync(stream); - - expect(mockTurnRunFn).toHaveBeenCalledWith( - { model: DEFAULT_GEMINI_FLASH_MODEL }, - [{ text: 'Hi' }], - expect.any(AbortSignal), - ); - }); - - it('should stick to the fallback model for the entire sequence even if fallback mode ends', async () => { - // Start the sequence in fallback mode - vi.mocked(mockConfig.isInFallbackMode).mockReturnValue(true); - mockRouterService.route.mockResolvedValue({ - model: DEFAULT_GEMINI_FLASH_MODEL, - reason: 'fallback', - }); - let stream = client.sendMessageStream( - [{ text: 'Hi' }], - new AbortController().signal, - 'prompt-fallback-stickiness', - ); - await fromAsync(stream); - - // First call should use fallback model - expect(mockTurnRunFn).toHaveBeenCalledWith( - { model: DEFAULT_GEMINI_FLASH_MODEL }, - [{ text: 'Hi' }], - expect.any(AbortSignal), - ); - - // End fallback mode - vi.mocked(mockConfig.isInFallbackMode).mockReturnValue(false); - - // Second call in the same sequence - stream = client.sendMessageStream( - [{ text: 'Continue' }], - new AbortController().signal, - 'prompt-fallback-stickiness', - ); - await fromAsync(stream); - - // Router should still not be called, and it should stick to the fallback model - expect(mockTurnRunFn).toHaveBeenCalledTimes(2); // Ensure it was called again - expect(mockTurnRunFn).toHaveBeenLastCalledWith( - { model: DEFAULT_GEMINI_FLASH_MODEL }, // Still the fallback model - [{ text: 'Continue' }], - expect.any(AbortSignal), - ); - }); }); it('should recursively call sendMessageStream with "Please continue." when InvalidStream event is received', async () => { diff --git a/packages/core/src/core/contentGenerator.test.ts b/packages/core/src/core/contentGenerator.test.ts index 9910ff98a8..6acae4f57e 100644 --- a/packages/core/src/core/contentGenerator.test.ts +++ b/packages/core/src/core/contentGenerator.test.ts @@ -31,7 +31,6 @@ const mockConfig = { getModel: vi.fn().mockReturnValue('gemini-pro'), getProxy: vi.fn().mockReturnValue(undefined), getUsageStatisticsEnabled: vi.fn().mockReturnValue(true), - isInFallbackMode: vi.fn().mockReturnValue(false), getPreviewFeatures: vi.fn().mockReturnValue(false), } as unknown as Config; @@ -120,7 +119,6 @@ describe('createContentGenerator', () => { getModel: vi.fn().mockReturnValue('gemini-pro'), getProxy: vi.fn().mockReturnValue(undefined), getUsageStatisticsEnabled: () => true, - isInFallbackMode: vi.fn().mockReturnValue(false), getPreviewFeatures: vi.fn().mockReturnValue(false), } as unknown as Config; @@ -189,7 +187,6 @@ describe('createContentGenerator', () => { getModel: vi.fn().mockReturnValue('gemini-pro'), getProxy: vi.fn().mockReturnValue(undefined), getUsageStatisticsEnabled: () => false, - isInFallbackMode: vi.fn().mockReturnValue(false), getPreviewFeatures: vi.fn().mockReturnValue(false), } as unknown as Config; @@ -237,7 +234,6 @@ describe('createContentGenerator', () => { getModel: vi.fn().mockReturnValue('gemini-pro'), getProxy: vi.fn().mockReturnValue(undefined), getUsageStatisticsEnabled: () => false, - isInFallbackMode: vi.fn().mockReturnValue(false), getPreviewFeatures: vi.fn().mockReturnValue(false), } as unknown as Config; @@ -272,7 +268,6 @@ describe('createContentGenerator', () => { getModel: vi.fn().mockReturnValue('gemini-pro'), getProxy: vi.fn().mockReturnValue(undefined), getUsageStatisticsEnabled: () => false, - isInFallbackMode: vi.fn().mockReturnValue(false), getPreviewFeatures: vi.fn().mockReturnValue(false), } as unknown as Config; @@ -315,7 +310,6 @@ describe('createContentGenerator', () => { const mockConfig = { getModel: vi.fn().mockReturnValue('gemini-pro'), getUsageStatisticsEnabled: () => false, - isInFallbackMode: vi.fn().mockReturnValue(false), getPreviewFeatures: vi.fn().mockReturnValue(false), } as unknown as Config; const mockGenerator = { diff --git a/packages/core/src/core/geminiChat.test.ts b/packages/core/src/core/geminiChat.test.ts index d7444c52bb..baf8973904 100644 --- a/packages/core/src/core/geminiChat.test.ts +++ b/packages/core/src/core/geminiChat.test.ts @@ -17,12 +17,7 @@ import { } from './geminiChat.js'; import type { Config } from '../config/config.js'; import { setSimulate429 } from '../utils/testUtils.js'; -import { - DEFAULT_GEMINI_FLASH_MODEL, - DEFAULT_THINKING_MODE, - PREVIEW_GEMINI_MODEL, - PREVIEW_GEMINI_FLASH_MODEL, -} from '../config/models.js'; +import { DEFAULT_THINKING_MODE } from '../config/models.js'; import { AuthType } from './contentGenerator.js'; import { TerminalQuotaError } from '../utils/googleQuotaErrors.js'; import { type RetryOptions } from '../utils/retry.js'; @@ -146,7 +141,6 @@ describe('GeminiChat', () => { // When model is explicitly set, active model usually resets or updates to it currentActiveModel = m; }), - isInFallbackMode: vi.fn().mockReturnValue(false), getQuotaErrorOccurred: vi.fn().mockReturnValue(false), setQuotaErrorOccurred: vi.fn(), flashFallbackHandler: undefined, @@ -179,10 +173,6 @@ describe('GeminiChat', () => { }; }), }, - isPreviewModelBypassMode: vi.fn().mockReturnValue(false), - setPreviewModelBypassMode: vi.fn(), - isPreviewModelFallbackMode: vi.fn().mockReturnValue(false), - setPreviewModelFallbackMode: vi.fn(), isInteractive: vi.fn().mockReturnValue(false), getEnableHooks: vi.fn().mockReturnValue(false), getActiveModel: vi.fn().mockImplementation(() => currentActiveModel), @@ -548,105 +538,6 @@ describe('GeminiChat', () => { ); }); - it('should use maxAttempts=1 for retryWithBackoff when in Preview Model Fallback Mode', async () => { - vi.mocked(mockConfig.isPreviewModelFallbackMode).mockReturnValue(true); - vi.mocked(mockContentGenerator.generateContentStream).mockResolvedValue( - (async function* () { - yield { - candidates: [ - { - content: { parts: [{ text: 'Success' }] }, - finishReason: 'STOP', - }, - ], - } as unknown as GenerateContentResponse; - })(), - ); - - const stream = await chat.sendMessageStream( - { model: PREVIEW_GEMINI_MODEL }, - 'test', - 'prompt-id-fast-retry', - new AbortController().signal, - ); - for await (const _ of stream) { - // consume stream - } - - expect(mockRetryWithBackoff).toHaveBeenCalledWith( - expect.any(Function), - expect.objectContaining({ - maxAttempts: 1, - }), - ); - }); - - it('should use maxAttempts=1 for retryWithBackoff when in Preview Model Fallback Mode (Flash)', async () => { - vi.mocked(mockConfig.isPreviewModelFallbackMode).mockReturnValue(true); - vi.mocked(mockContentGenerator.generateContentStream).mockResolvedValue( - (async function* () { - yield { - candidates: [ - { - content: { parts: [{ text: 'Success' }] }, - finishReason: 'STOP', - }, - ], - } as unknown as GenerateContentResponse; - })(), - ); - - const stream = await chat.sendMessageStream( - { model: PREVIEW_GEMINI_FLASH_MODEL }, - 'test', - 'prompt-id-fast-retry-flash', - new AbortController().signal, - ); - for await (const _ of stream) { - // consume stream - } - - expect(mockRetryWithBackoff).toHaveBeenCalledWith( - expect.any(Function), - expect.objectContaining({ - maxAttempts: 1, - }), - ); - }); - - it('should NOT use maxAttempts=1 for other models even in Preview Model Fallback Mode', async () => { - vi.mocked(mockConfig.isPreviewModelFallbackMode).mockReturnValue(true); - vi.mocked(mockContentGenerator.generateContentStream).mockResolvedValue( - (async function* () { - yield { - candidates: [ - { - content: { parts: [{ text: 'Success' }] }, - finishReason: 'STOP', - }, - ], - } as unknown as GenerateContentResponse; - })(), - ); - - const stream = await chat.sendMessageStream( - { model: DEFAULT_GEMINI_FLASH_MODEL }, - 'test', - 'prompt-id-normal-retry', - new AbortController().signal, - ); - for await (const _ of stream) { - // consume stream - } - - expect(mockRetryWithBackoff).toHaveBeenCalledWith( - expect.any(Function), - expect.objectContaining({ - maxAttempts: undefined, // Should use default - }), - ); - }); - it('should throw an error when a tool call is followed by an empty stream response', async () => { // 1. Setup: A history where the model has just made a function call. const initialHistory: Content[] = [ @@ -1880,9 +1771,6 @@ describe('GeminiChat', () => { authType, }); - const isInFallbackModeSpy = vi.spyOn(mockConfig, 'isInFallbackMode'); - isInFallbackModeSpy.mockReturnValue(false); - vi.mocked(mockContentGenerator.generateContentStream) .mockRejectedValueOnce(error429) // Attempt 1 fails .mockResolvedValueOnce( @@ -1899,10 +1787,9 @@ describe('GeminiChat', () => { })(), ); - mockHandleFallback.mockImplementation(async () => { - isInFallbackModeSpy.mockReturnValue(true); - return true; // Signal retry - }); + mockHandleFallback.mockImplementation( + async () => true, // Signal retry + ); const stream = await chat.sendMessageStream( { model: 'test-model' }, @@ -1931,34 +1818,6 @@ describe('GeminiChat', () => { const modelTurn = history[1]; expect(modelTurn.parts![0].text).toBe('Success on retry'); }); - - it('should stop retrying if handleFallback returns false (e.g., auth intent)', async () => { - vi.mocked(mockConfig.getModel).mockReturnValue('gemini-pro'); - vi.mocked(mockContentGenerator.generateContentStream).mockRejectedValue( - error429, - ); - mockHandleFallback.mockResolvedValue(false); - - const stream = await chat.sendMessageStream( - { model: 'gemini-2.0-flash' }, - 'test stop', - 'prompt-id-fb2', - new AbortController().signal, - ); - - await expect( - (async () => { - for await (const _ of stream) { - /* consume stream */ - } - })(), - ).rejects.toThrow(error429); - - expect(mockContentGenerator.generateContentStream).toHaveBeenCalledTimes( - 1, - ); - expect(mockHandleFallback).toHaveBeenCalledTimes(1); - }); }); it('should discard valid partial content from a failed attempt upon retry', async () => { diff --git a/packages/core/src/core/geminiChat.ts b/packages/core/src/core/geminiChat.ts index 300844e9c9..3dc91e1b6c 100644 --- a/packages/core/src/core/geminiChat.ts +++ b/packages/core/src/core/geminiChat.ts @@ -263,10 +263,6 @@ export class GeminiChat { ): Promise> { await this.sendPromise; - // Preview Model Bypass mode for the new request. - // This ensures that we attempt to use Preview Model for every new user turn - // (unless the "Always" fallback mode is active, which is handled separately). - this.config.setPreviewModelBypassMode(false); let streamDoneResolver: () => void; const streamDonePromise = new Promise((resolve) => { streamDoneResolver = resolve; @@ -299,12 +295,7 @@ export class GeminiChat { try { let lastError: unknown = new Error('Request failed after all retries.'); - let maxAttempts = INVALID_CONTENT_RETRY_OPTIONS.maxAttempts; - // If we are in Preview Model Fallback Mode, we want to fail fast (1 attempt) - // when probing the Preview Model. - if (this.config.isPreviewModelFallbackMode() && isPreviewModel(model)) { - maxAttempts = 1; - } + const maxAttempts = INVALID_CONTENT_RETRY_OPTIONS.maxAttempts; for (let attempt = 0; attempt < maxAttempts; attempt++) { let isConnectionPhase = true; @@ -378,15 +369,6 @@ export class GeminiChat { ); } throw lastError; - } else { - // Preview Model successfully used, disable fallback mode. - // We only do this if we didn't bypass Preview Model (i.e. we actually used it). - if ( - isPreviewModel(model) && - !this.config.isPreviewModelBypassMode() - ) { - this.config.setPreviewModelFallbackMode(false); - } } } finally { streamDoneResolver!(); @@ -548,12 +530,7 @@ export class GeminiChat { authType: this.config.getContentGeneratorConfig()?.authType, retryFetchErrors: this.config.getRetryFetchErrors(), signal: abortSignal, - maxAttempts: - availabilityMaxAttempts ?? - (this.config.isPreviewModelFallbackMode() && - isPreviewModel(lastModelToUse) - ? 1 - : undefined), + maxAttempts: availabilityMaxAttempts, getAvailabilityContext, }); diff --git a/packages/core/src/core/geminiChat_network_retry.test.ts b/packages/core/src/core/geminiChat_network_retry.test.ts index 712be19b06..d8bd4b726d 100644 --- a/packages/core/src/core/geminiChat_network_retry.test.ts +++ b/packages/core/src/core/geminiChat_network_retry.test.ts @@ -81,7 +81,6 @@ describe('GeminiChat Network Retries', () => { getModel: vi.fn().mockReturnValue('gemini-pro'), getActiveModel: vi.fn().mockReturnValue('gemini-pro'), setActiveModel: vi.fn(), - isInFallbackMode: vi.fn().mockReturnValue(false), getQuotaErrorOccurred: vi.fn().mockReturnValue(false), getProjectRoot: vi.fn().mockReturnValue('/test/project/root'), storage: { @@ -96,11 +95,7 @@ describe('GeminiChat Network Retries', () => { generateContentConfig: { temperature: 0 }, })), }, - isPreviewModelBypassMode: vi.fn().mockReturnValue(false), - setPreviewModelBypassMode: vi.fn(), - isPreviewModelFallbackMode: vi.fn().mockReturnValue(false), getEnableHooks: vi.fn().mockReturnValue(false), - setPreviewModelFallbackMode: vi.fn(), getModelAvailabilityService: vi .fn() .mockReturnValue(createAvailabilityServiceMock()), diff --git a/packages/core/src/core/prompts.test.ts b/packages/core/src/core/prompts.test.ts index 8ba8c89c00..cb346b01c3 100644 --- a/packages/core/src/core/prompts.test.ts +++ b/packages/core/src/core/prompts.test.ts @@ -69,7 +69,6 @@ describe('Core System Prompt (prompts.ts)', () => { getModel: vi.fn().mockReturnValue(DEFAULT_GEMINI_MODEL_AUTO), getActiveModel: vi.fn().mockReturnValue(DEFAULT_GEMINI_MODEL), getPreviewFeatures: vi.fn().mockReturnValue(false), - isInFallbackMode: vi.fn().mockReturnValue(false), getAgentRegistry: vi.fn().mockReturnValue({ getDirectoryContext: vi.fn().mockReturnValue('Mock Agent Directory'), }), @@ -173,7 +172,6 @@ describe('Core System Prompt (prompts.ts)', () => { getModel: vi.fn().mockReturnValue('auto'), getActiveModel: vi.fn().mockReturnValue(DEFAULT_GEMINI_MODEL), getPreviewFeatures: vi.fn().mockReturnValue(false), - isInFallbackMode: vi.fn().mockReturnValue(false), getAgentRegistry: vi.fn().mockReturnValue({ getDirectoryContext: vi.fn().mockReturnValue('Mock Agent Directory'), }), diff --git a/packages/core/src/fallback/handler.test.ts b/packages/core/src/fallback/handler.test.ts index 9e62b8bb11..8ae6348384 100644 --- a/packages/core/src/fallback/handler.test.ts +++ b/packages/core/src/fallback/handler.test.ts @@ -29,7 +29,6 @@ import { } from '../config/models.js'; import type { FallbackModelHandler } from './types.js'; import { openBrowserSecurely } from '../utils/secure-browser-launcher.js'; -import { coreEvents } from '../utils/events.js'; import { debugLogger } from '../utils/debugLogger.js'; import * as policyHelpers from '../availability/policyHelpers.js'; import { createDefaultPolicy } from '../availability/policyCatalog.js'; @@ -63,12 +62,6 @@ const AUTH_API_KEY = AuthType.USE_GEMINI; const createMockConfig = (overrides: Partial = {}): Config => ({ - isInFallbackMode: vi.fn(() => false), - setFallbackMode: vi.fn(), - isPreviewModelFallbackMode: vi.fn(() => false), - setPreviewModelFallbackMode: vi.fn(), - isPreviewModelBypassMode: vi.fn(() => false), - setPreviewModelBypassMode: vi.fn(), fallbackHandler: undefined, getFallbackModelHandler: vi.fn(), setActiveModel: vi.fn(), @@ -90,7 +83,6 @@ describe('handleFallback', () => { let mockConfig: Config; let mockHandler: Mock; let consoleErrorSpy: MockInstance; - let fallbackEventSpy: MockInstance; beforeEach(() => { vi.clearAllMocks(); @@ -106,12 +98,10 @@ describe('handleFallback', () => { // But tests might check console.error usage in legacy code if any? // The handler uses console.error in legacyHandleFallback. consoleErrorSpy = vi.spyOn(console, 'error').mockImplementation(() => {}); - fallbackEventSpy = vi.spyOn(coreEvents, 'emitFallbackModeChanged'); }); afterEach(() => { consoleErrorSpy.mockRestore(); - fallbackEventSpy.mockRestore(); }); describe('policy-driven flow', () => { @@ -211,14 +201,6 @@ describe('handleFallback', () => { expect(policyConfig.setActiveModel).toHaveBeenCalledWith( DEFAULT_GEMINI_FLASH_MODEL, ); - // Silent actions should not trigger the legacy fallback mode (via activateFallbackMode), - // but setActiveModel might trigger it via legacy sync if it switches to Flash. - // However, the test requirement is "doesn't emit fallback mode". - // Since we are mocking setActiveModel, we can verify setFallbackMode isn't called *independently*. - // But setActiveModel is mocked, so it won't trigger side effects unless the implementation does. - // We verified setActiveModel is called. - // We verify setFallbackMode is NOT called (which would happen if activateFallbackMode was called). - expect(policyConfig.setFallbackMode).not.toHaveBeenCalled(); } finally { chainSpy.mockRestore(); } @@ -410,7 +392,6 @@ describe('handleFallback', () => { expect(result).toBe(true); expect(policyConfig.setActiveModel).toHaveBeenCalledWith(FALLBACK_MODEL); - expect(policyConfig.setFallbackMode).not.toHaveBeenCalled(); // TODO: add logging expect statement }); diff --git a/packages/core/src/services/loopDetectionService.test.ts b/packages/core/src/services/loopDetectionService.test.ts index 2f54195781..660f6c48e8 100644 --- a/packages/core/src/services/loopDetectionService.test.ts +++ b/packages/core/src/services/loopDetectionService.test.ts @@ -18,6 +18,7 @@ import { GeminiEventType } from '../core/turn.js'; import * as loggers from '../telemetry/loggers.js'; import { LoopType } from '../telemetry/types.js'; import { LoopDetectionService } from './loopDetectionService.js'; +import { createAvailabilityServiceMock } from '../availability/testUtils.js'; vi.mock('../telemetry/loggers.js', () => ({ logLoopDetected: vi.fn(), @@ -37,6 +38,9 @@ describe('LoopDetectionService', () => { mockConfig = { getTelemetryEnabled: () => true, isInteractive: () => false, + getModelAvailabilityService: vi + .fn() + .mockReturnValue(createAvailabilityServiceMock()), } as unknown as Config; service = new LoopDetectionService(mockConfig); vi.clearAllMocks(); @@ -732,13 +736,15 @@ describe('LoopDetectionService LLM Checks', () => { generateJson: vi.fn(), } as unknown as BaseLlmClient; + const mockAvailability = createAvailabilityServiceMock(); + vi.mocked(mockAvailability.snapshot).mockReturnValue({ available: true }); + mockConfig = { getGeminiClient: () => mockGeminiClient, getBaseLlmClient: () => mockBaseLlmClient, getDebugMode: () => false, getTelemetryEnabled: () => true, getModel: vi.fn().mockReturnValue('cognitive-loop-v1'), - isInFallbackMode: vi.fn().mockReturnValue(false), modelConfigService: { getResolvedConfig: vi.fn().mockImplementation((key) => { if (key.model === 'loop-detection') { @@ -751,6 +757,7 @@ describe('LoopDetectionService LLM Checks', () => { }), }, isInteractive: () => false, + getModelAvailabilityService: vi.fn().mockReturnValue(mockAvailability), } as unknown as Config; service = new LoopDetectionService(mockConfig); @@ -901,9 +908,6 @@ describe('LoopDetectionService LLM Checks', () => { }); it('should detect a loop when confidence is exactly equal to the threshold (0.9)', async () => { - // Mock isInFallbackMode to false so it double checks - vi.mocked(mockConfig.isInFallbackMode).mockReturnValue(false); - mockBaseLlmClient.generateJson = vi .fn() .mockResolvedValueOnce({ @@ -944,9 +948,6 @@ describe('LoopDetectionService LLM Checks', () => { }); it('should not detect a loop when Flash is confident (0.9) but Main model is not (0.89)', async () => { - // Mock isInFallbackMode to false so it double checks - vi.mocked(mockConfig.isInFallbackMode).mockReturnValue(false); - mockBaseLlmClient.generateJson = vi .fn() .mockResolvedValueOnce({ @@ -988,9 +989,13 @@ describe('LoopDetectionService LLM Checks', () => { expect(mockBaseLlmClient.generateJson).toHaveBeenCalledTimes(3); }); - it('should only call Flash model if in fallback mode', async () => { - // Mock isInFallbackMode to true - vi.mocked(mockConfig.isInFallbackMode).mockReturnValue(true); + it('should only call Flash model if main model is unavailable', async () => { + // Mock availability to return unavailable for the main model + const availability = mockConfig.getModelAvailabilityService(); + vi.mocked(availability.snapshot).mockReturnValue({ + available: false, + reason: 'quota', + }); mockBaseLlmClient.generateJson = vi.fn().mockResolvedValueOnce({ unproductive_state_confidence: 0.9, diff --git a/packages/core/src/services/loopDetectionService.ts b/packages/core/src/services/loopDetectionService.ts index 8381165c58..d8049dbdb6 100644 --- a/packages/core/src/services/loopDetectionService.ts +++ b/packages/core/src/services/loopDetectionService.ts @@ -472,7 +472,9 @@ export class LoopDetectionService { return false; } - if (this.config.isInFallbackMode()) { + const availability = this.config.getModelAvailabilityService(); + + if (!availability.snapshot(doubleCheckModelName).available) { const flashModelName = this.config.modelConfigService.getResolvedConfig({ model: 'loop-detection', }).model; diff --git a/packages/core/src/utils/events.ts b/packages/core/src/utils/events.ts index 65ff8e9fcd..599de78a41 100644 --- a/packages/core/src/utils/events.ts +++ b/packages/core/src/utils/events.ts @@ -34,16 +34,6 @@ export interface UserFeedbackPayload { error?: unknown; } -/** - * Payload for the 'fallback-mode-changed' event. - */ -export interface FallbackModeChangedPayload { - /** - * Whether fallback mode is now active. - */ - isInFallbackMode: boolean; -} - /** * Payload for the 'model-changed' event. */ @@ -78,7 +68,6 @@ export type MemoryChangedPayload = LoadServerHierarchicalMemoryResponse; export enum CoreEvent { UserFeedback = 'user-feedback', - FallbackModeChanged = 'fallback-mode-changed', ModelChanged = 'model-changed', ConsoleLog = 'console-log', Output = 'output', @@ -88,7 +77,6 @@ export enum CoreEvent { export interface CoreEvents { [CoreEvent.UserFeedback]: [UserFeedbackPayload]; - [CoreEvent.FallbackModeChanged]: [FallbackModeChangedPayload]; [CoreEvent.ModelChanged]: [ModelChangedPayload]; [CoreEvent.ConsoleLog]: [ConsoleLogPayload]; [CoreEvent.Output]: [OutputPayload]; @@ -166,15 +154,6 @@ export class CoreEventEmitter extends EventEmitter { this._emitOrQueue(CoreEvent.Output, payload); } - /** - * Notifies subscribers that fallback mode has changed. - * This is synchronous and doesn't use backlog (UI should already be initialized). - */ - emitFallbackModeChanged(isInFallbackMode: boolean): void { - const payload: FallbackModeChangedPayload = { isInFallbackMode }; - this.emit(CoreEvent.FallbackModeChanged, payload); - } - /** * Notifies subscribers that the model has changed. */