diff --git a/packages/cli/src/config/settings.test.ts b/packages/cli/src/config/settings.test.ts index f370a8dd1c..4bc2e7250a 100644 --- a/packages/cli/src/config/settings.test.ts +++ b/packages/cli/src/config/settings.test.ts @@ -291,7 +291,7 @@ describe('Settings Loading and Merging', () => { theme: 'legacy-dark', vimMode: true, contextFileName: 'LEGACY_CONTEXT.md', - model: 'gemini-pro', + model: 'gemini-2.5-pro', mcpServers: { 'legacy-server-1': { command: 'npm', @@ -329,7 +329,7 @@ describe('Settings Loading and Merging', () => { fileName: 'LEGACY_CONTEXT.md', }, model: { - name: 'gemini-pro', + name: 'gemini-2.5-pro', }, mcpServers: { 'legacy-server-1': { @@ -1929,7 +1929,7 @@ describe('Settings Loading and Merging', () => { usageStatisticsEnabled: false, }, model: { - name: 'gemini-pro', + name: 'gemini-2.5-pro', }, context: { fileName: 'CONTEXT.md', @@ -1968,7 +1968,7 @@ describe('Settings Loading and Merging', () => { vimMode: true, theme: 'dark', usageStatisticsEnabled: false, - model: 'gemini-pro', + model: 'gemini-2.5-pro', contextFileName: 'CONTEXT.md', includeDirectories: ['/src'], sandbox: true, diff --git a/packages/cli/src/ui/AppContainer.test.tsx b/packages/cli/src/ui/AppContainer.test.tsx index e9684434ba..1d7fce5aa9 100644 --- a/packages/cli/src/ui/AppContainer.test.tsx +++ b/packages/cli/src/ui/AppContainer.test.tsx @@ -17,7 +17,7 @@ import { import { render } from '../test-utils/render.js'; import { waitFor } from '../test-utils/async.js'; import { cleanup } from 'ink-testing-library'; -import { act, useContext } from 'react'; +import { act, useContext, type ReactElement } from 'react'; import { AppContainer } from './AppContainer.js'; import { SettingsContext } from './contexts/SettingsContext.js'; import { @@ -71,6 +71,14 @@ vi.mock('@google/gemini-cli-core', async (importOriginal) => { })), enableMouseEvents: vi.fn(), disableMouseEvents: vi.fn(), + FileDiscoveryService: vi.fn().mockImplementation(() => ({ + initialize: vi.fn(), + })), + startupProfiler: { + flush: vi.fn(), + start: vi.fn(), + end: vi.fn(), + }, }; }); import ansiEscapes from 'ansi-escapes'; @@ -344,7 +352,7 @@ describe('AppContainer State Management', () => { // Add other properties if AppContainer uses them }); mockedUseLogger.mockReturnValue({ - getPreviousUserMessages: vi.fn().mockReturnValue(new Promise(() => {})), + getPreviousUserMessages: vi.fn().mockResolvedValue([]), }); mockedUseInputHistoryStore.mockReturnValue({ inputHistory: [], @@ -361,6 +369,8 @@ describe('AppContainer State Management', () => { // Mock config's getTargetDir to return consistent workspace directory vi.spyOn(mockConfig, 'getTargetDir').mockReturnValue('/test/workspace'); + vi.spyOn(mockConfig, 'initialize').mockResolvedValue(undefined); + vi.spyOn(mockConfig, 'getDebugMode').mockReturnValue(false); mockExtensionManager = vi.mockObject({ getExtensions: vi.fn().mockReturnValue([]), @@ -403,17 +413,25 @@ describe('AppContainer State Management', () => { describe('Basic Rendering', () => { it('renders without crashing with minimal props', async () => { - const { unmount } = renderAppContainer(); + let unmount: () => void; + await act(async () => { + const result = renderAppContainer(); + unmount = result.unmount; + }); await waitFor(() => expect(capturedUIState).toBeTruthy()); - unmount(); + unmount!(); }); it('renders with startup warnings', async () => { const startupWarnings = ['Warning 1', 'Warning 2']; - const { unmount } = renderAppContainer({ startupWarnings }); + let unmount: () => void; + await act(async () => { + const result = renderAppContainer({ startupWarnings }); + unmount = result.unmount; + }); await waitFor(() => expect(capturedUIState).toBeTruthy()); - unmount(); + unmount!(); }); }); @@ -424,11 +442,15 @@ describe('AppContainer State Management', () => { themeError: 'Failed to load theme', }; - const { unmount } = renderAppContainer({ - initResult: initResultWithError, + let unmount: () => void; + await act(async () => { + const result = renderAppContainer({ + initResult: initResultWithError, + }); + unmount = result.unmount; }); await waitFor(() => expect(capturedUIState).toBeTruthy()); - unmount(); + unmount!(); }); it('handles debug mode state', () => { @@ -443,29 +465,45 @@ describe('AppContainer State Management', () => { describe('Context Providers', () => { it('provides AppContext with correct values', async () => { - const { unmount } = renderAppContainer({ version: '2.0.0' }); + let unmount: () => void; + await act(async () => { + const result = renderAppContainer({ version: '2.0.0' }); + unmount = result.unmount; + }); await waitFor(() => expect(capturedUIState).toBeTruthy()); // Should render and unmount cleanly - expect(() => unmount()).not.toThrow(); + expect(() => unmount!()).not.toThrow(); }); it('provides UIStateContext with state management', async () => { - const { unmount } = renderAppContainer(); + let unmount: () => void; + await act(async () => { + const result = renderAppContainer(); + unmount = result.unmount; + }); await waitFor(() => expect(capturedUIState).toBeTruthy()); - unmount(); + unmount!(); }); it('provides UIActionsContext with action handlers', async () => { - const { unmount } = renderAppContainer(); + let unmount: () => void; + await act(async () => { + const result = renderAppContainer(); + unmount = result.unmount; + }); await waitFor(() => expect(capturedUIState).toBeTruthy()); - unmount(); + unmount!(); }); it('provides ConfigContext with config object', async () => { - const { unmount } = renderAppContainer(); + let unmount: () => void; + await act(async () => { + const result = renderAppContainer(); + unmount = result.unmount; + }); await waitFor(() => expect(capturedUIState).toBeTruthy()); - unmount(); + unmount!(); }); }); @@ -480,9 +518,13 @@ describe('AppContainer State Management', () => { }, } as unknown as LoadedSettings; - const { unmount } = renderAppContainer({ settings: settingsAllHidden }); + let unmount: () => void; + await act(async () => { + const result = renderAppContainer({ settings: settingsAllHidden }); + unmount = result.unmount; + }); await waitFor(() => expect(capturedUIState).toBeTruthy()); - unmount(); + unmount!(); }); it('handles settings with memory usage enabled', async () => { @@ -495,9 +537,13 @@ describe('AppContainer State Management', () => { }, } as unknown as LoadedSettings; - const { unmount } = renderAppContainer({ settings: settingsWithMemory }); + let unmount: () => void; + await act(async () => { + const result = renderAppContainer({ settings: settingsWithMemory }); + unmount = result.unmount; + }); await waitFor(() => expect(capturedUIState).toBeTruthy()); - unmount(); + unmount!(); }); }); @@ -505,9 +551,13 @@ describe('AppContainer State Management', () => { it.each(['1.0.0', '2.1.3-beta', '3.0.0-nightly'])( 'handles version format: %s', async (version) => { - const { unmount } = renderAppContainer({ version }); + let unmount: () => void; + await act(async () => { + const result = renderAppContainer({ version }); + unmount = result.unmount; + }); await waitFor(() => expect(capturedUIState).toBeTruthy()); - unmount(); + unmount!(); }, ); }); @@ -529,9 +579,13 @@ describe('AppContainer State Management', () => { merged: {}, } as LoadedSettings; - const { unmount } = renderAppContainer({ settings: undefinedSettings }); + let unmount: () => void; + await act(async () => { + const result = renderAppContainer({ settings: undefinedSettings }); + unmount = result.unmount; + }); await waitFor(() => expect(capturedUIState).toBeTruthy()); - unmount(); + unmount!(); }); }); @@ -860,12 +914,16 @@ describe('AppContainer State Management', () => { describe('Quota and Fallback Integration', () => { it('passes a null proQuotaRequest to UIStateContext by default', async () => { // The default mock from beforeEach already sets proQuotaRequest to null - const { unmount } = renderAppContainer(); + let unmount: () => void; + await act(async () => { + const result = renderAppContainer(); + unmount = result.unmount; + }); await waitFor(() => { // Assert that the context value is as expected expect(capturedUIState.proQuotaRequest).toBeNull(); }); - unmount(); + unmount!(); }); it('passes a valid proQuotaRequest to UIStateContext when provided by the hook', async () => { @@ -881,12 +939,16 @@ describe('AppContainer State Management', () => { }); // Act: Render the container - const { unmount } = renderAppContainer(); + let unmount: () => void; + await act(async () => { + const result = renderAppContainer(); + unmount = result.unmount; + }); await waitFor(() => { // Assert: The mock request is correctly passed through the context expect(capturedUIState.proQuotaRequest).toEqual(mockRequest); }); - unmount(); + unmount!(); }); it('passes the handleProQuotaChoice function to UIActionsContext', async () => { @@ -898,7 +960,11 @@ describe('AppContainer State Management', () => { }); // Act: Render the container - const { unmount } = renderAppContainer(); + let unmount: () => void; + await act(async () => { + const result = renderAppContainer(); + unmount = result.unmount; + }); await waitFor(() => { // Assert: The action in the context is the mock handler we provided expect(capturedUIActions.handleProQuotaChoice).toBe(mockHandler); @@ -909,7 +975,7 @@ describe('AppContainer State Management', () => { capturedUIActions.handleProQuotaChoice('retry_later'); }); expect(mockHandler).toHaveBeenCalledWith('retry_later'); - unmount(); + unmount!(); }); }); @@ -1327,13 +1393,17 @@ describe('AppContainer State Management', () => { activePtyId: 'some-id', }); - const { unmount } = renderAppContainer(); + let unmount: () => void; + await act(async () => { + const result = renderAppContainer(); + unmount = result.unmount; + }); await waitFor(() => expect(resizePtySpy).toHaveBeenCalled()); const lastCall = resizePtySpy.mock.calls[resizePtySpy.mock.calls.length - 1]; // Check the height argument specifically expect(lastCall[2]).toBe(1); - unmount(); + unmount!(); }); }); @@ -1672,11 +1742,15 @@ describe('AppContainer State Management', () => { closeModelDialog: vi.fn(), }); - const { unmount } = renderAppContainer(); + let unmount: () => void; + await act(async () => { + const result = renderAppContainer(); + unmount = result.unmount; + }); await waitFor(() => expect(capturedUIState).toBeTruthy()); expect(capturedUIState.isModelDialogOpen).toBe(true); - unmount(); + unmount!(); }); it('should provide model dialog actions in the UIActionsContext', async () => { @@ -1688,7 +1762,11 @@ describe('AppContainer State Management', () => { closeModelDialog: mockCloseModelDialog, }); - const { unmount } = renderAppContainer(); + let unmount: () => void; + await act(async () => { + const result = renderAppContainer(); + unmount = result.unmount; + }); await waitFor(() => expect(capturedUIState).toBeTruthy()); // Verify that the actions are correctly passed through context @@ -1696,13 +1774,17 @@ describe('AppContainer State Management', () => { capturedUIActions.closeModelDialog(); }); expect(mockCloseModelDialog).toHaveBeenCalled(); - unmount(); + unmount!(); }); }); describe('CoreEvents Integration', () => { it('subscribes to UserFeedback and drains backlog on mount', async () => { - const { unmount } = renderAppContainer(); + let unmount: () => void; + await act(async () => { + const result = renderAppContainer(); + unmount = result.unmount; + }); await waitFor(() => expect(capturedUIState).toBeTruthy()); expect(mockCoreEvents.on).toHaveBeenCalledWith( @@ -1710,14 +1792,18 @@ describe('AppContainer State Management', () => { expect.any(Function), ); expect(mockCoreEvents.drainBacklogs).toHaveBeenCalledTimes(1); - unmount(); + unmount!(); }); it('unsubscribes from UserFeedback on unmount', async () => { - const { unmount } = renderAppContainer(); + let unmount: () => void; + await act(async () => { + const result = renderAppContainer(); + unmount = result.unmount; + }); await waitFor(() => expect(capturedUIState).toBeTruthy()); - unmount(); + unmount!(); expect(mockCoreEvents.off).toHaveBeenCalledWith( CoreEvent.UserFeedback, @@ -1726,7 +1812,11 @@ describe('AppContainer State Management', () => { }); it('adds history item when UserFeedback event is received', async () => { - const { unmount } = renderAppContainer(); + let unmount: () => void; + await act(async () => { + const result = renderAppContainer(); + unmount = result.unmount; + }); await waitFor(() => expect(capturedUIState).toBeTruthy()); // Get the registered handler @@ -1751,14 +1841,18 @@ describe('AppContainer State Management', () => { }), expect.any(Number), ); - unmount(); + unmount!(); }); it('updates currentModel when ModelChanged event is received', async () => { // Arrange: Mock initial model vi.spyOn(mockConfig, 'getModel').mockReturnValue('initial-model'); - const { unmount } = renderAppContainer(); + let unmount: () => void; + await act(async () => { + const result = renderAppContainer(); + unmount = result.unmount; + }); await waitFor(() => { expect(capturedUIState?.currentModel).toBe('initial-model'); }); @@ -1770,13 +1864,15 @@ describe('AppContainer State Management', () => { expect(handler).toBeDefined(); // Act: Simulate ModelChanged event + // Update config mock to return new model since the handler reads from config + vi.spyOn(mockConfig, 'getModel').mockReturnValue('new-model'); act(() => { handler({ model: 'new-model' }); }); // Assert: Verify model is updated expect(capturedUIState.currentModel).toBe('new-model'); - unmount(); + unmount!(); }); }); @@ -1799,10 +1895,14 @@ describe('AppContainer State Management', () => { }); // The main assertion is that the render does not throw. - const { unmount } = renderAppContainer(); + let unmount: () => void; + await act(async () => { + const result = renderAppContainer(); + unmount = result.unmount; + }); await waitFor(() => expect(resizePtySpy).toHaveBeenCalled()); - unmount(); + unmount!(); }); }); describe('Banner Text', () => { @@ -1812,10 +1912,14 @@ describe('AppContainer State Management', () => { authType: AuthType.USE_GEMINI, apiKey: 'fake-key', }); - const { unmount } = renderAppContainer(); + let unmount: () => void; + await act(async () => { + const result = renderAppContainer(); + unmount = result.unmount; + }); await waitFor(() => { expect(capturedUIState.bannerData.defaultText).toBeDefined(); - unmount(); + unmount!(); }); }); }); @@ -1838,7 +1942,11 @@ describe('AppContainer State Management', () => { }); it('clears the prompt when onCancelSubmit is called with shouldRestorePrompt=false', async () => { - const { unmount } = renderAppContainer(); + let unmount: () => void; + await act(async () => { + const result = renderAppContainer(); + unmount = result.unmount; + }); await waitFor(() => expect(capturedUIState).toBeTruthy()); const { onCancelSubmit } = extractUseGeminiStreamArgs( @@ -1851,7 +1959,7 @@ describe('AppContainer State Management', () => { expect(mockSetText).toHaveBeenCalledWith(''); - unmount(); + unmount!(); }); it('restores the prompt when onCancelSubmit is called with shouldRestorePrompt=true (or undefined)', async () => { @@ -1862,7 +1970,11 @@ describe('AppContainer State Management', () => { initializeFromLogger: vi.fn(), }); - const { unmount } = renderAppContainer(); + let unmount: () => void; + await act(async () => { + const result = renderAppContainer(); + unmount = result.unmount; + }); await waitFor(() => expect(capturedUIState.userMessages).toContain('previous message'), ); @@ -1877,7 +1989,7 @@ describe('AppContainer State Management', () => { expect(mockSetText).toHaveBeenCalledWith('previous message'); - unmount(); + unmount!(); }); it('input history is independent from conversation history (survives /clear)', async () => { @@ -1890,7 +2002,13 @@ describe('AppContainer State Management', () => { initializeFromLogger: vi.fn(), }); - const { unmount } = renderAppContainer(); + let rerender: (tree: ReactElement) => void; + let unmount; + await act(async () => { + const result = renderAppContainer(); + rerender = result.rerender; + unmount = result.unmount; + }); // Verify userMessages is populated from inputHistory await waitFor(() => @@ -1908,12 +2026,17 @@ describe('AppContainer State Management', () => { loadHistory: vi.fn(), }); + await act(async () => { + // Rerender to apply the new mock. + rerender(getAppContainer()); + }); + // Verify that userMessages still contains the input history // (it should not be affected by clearing conversation history) expect(capturedUIState.userMessages).toContain('first prompt'); expect(capturedUIState.userMessages).toContain('second prompt'); - unmount(); + unmount!(); }); }); @@ -1928,7 +2051,11 @@ describe('AppContainer State Management', () => { // Clear previous calls mocks.mockStdout.write.mockClear(); - const { unmount } = renderAppContainer(); + let compUnmount: () => void = () => {}; + await act(async () => { + const { unmount } = renderAppContainer(); + compUnmount = unmount; + }); // Allow async effects to run await waitFor(() => expect(capturedUIState).toBeTruthy()); @@ -1944,7 +2071,7 @@ describe('AppContainer State Management', () => { ); expect(clearTerminalCalls).toHaveLength(0); - unmount(); + compUnmount(); }); }); }); diff --git a/packages/cli/src/ui/AppContainer.tsx b/packages/cli/src/ui/AppContainer.tsx index d14a789d10..0030854c5f 100644 --- a/packages/cli/src/ui/AppContainer.tsx +++ b/packages/cli/src/ui/AppContainer.tsx @@ -35,7 +35,6 @@ import { type IdeContext, type UserTierId, type UserFeedbackPayload, - DEFAULT_GEMINI_FLASH_MODEL, IdeClient, ideContextStore, getErrorMessage, @@ -50,7 +49,6 @@ import { coreEvents, CoreEvent, refreshServerHierarchicalMemory, - type ModelChangedPayload, type MemoryChangedPayload, writeToStdout, disableMouseEvents, @@ -256,12 +254,7 @@ export const AppContainer = (props: AppContainerProps) => { ); // Helper to determine the effective model, considering the fallback state. - const getEffectiveModel = useCallback(() => { - if (config.isInFallbackMode()) { - return DEFAULT_GEMINI_FLASH_MODEL; - } - return config.getModel(); - }, [config]); + const getEffectiveModel = useCallback(() => config.getModel(), [config]); const [currentModel, setCurrentModel] = useState(getEffectiveModel()); @@ -340,22 +333,15 @@ export const AppContainer = (props: AppContainerProps) => { // Subscribe to fallback mode and model changes from core useEffect(() => { - const handleFallbackModeChanged = () => { - const effectiveModel = getEffectiveModel(); - setCurrentModel(effectiveModel); + const handleModelChanged = () => { + setCurrentModel(config.getModel()); }; - const handleModelChanged = (payload: ModelChangedPayload) => { - setCurrentModel(payload.model); - }; - - coreEvents.on(CoreEvent.FallbackModeChanged, handleFallbackModeChanged); coreEvents.on(CoreEvent.ModelChanged, handleModelChanged); return () => { - coreEvents.off(CoreEvent.FallbackModeChanged, handleFallbackModeChanged); coreEvents.off(CoreEvent.ModelChanged, handleModelChanged); }; - }, [getEffectiveModel]); + }, [getEffectiveModel, config]); const { consoleMessages, clearConsoleMessages: clearConsoleMessagesState } = useConsoleMessages(); diff --git a/packages/cli/src/ui/components/Footer.tsx b/packages/cli/src/ui/components/Footer.tsx index 6a803a39eb..32a5376cd7 100644 --- a/packages/cli/src/ui/components/Footer.tsx +++ b/packages/cli/src/ui/components/Footer.tsx @@ -7,7 +7,11 @@ import type React from 'react'; import { Box, Text } from 'ink'; import { theme } from '../semantic-colors.js'; -import { shortenPath, tildeifyPath } from '@google/gemini-cli-core'; +import { + shortenPath, + tildeifyPath, + getDisplayString, +} from '@google/gemini-cli-core'; import { ConsoleSummaryDisplay } from './ConsoleSummaryDisplay.js'; import process from 'node:process'; import { ThemedGradient } from './ThemedGradient.js'; @@ -145,7 +149,7 @@ export const Footer: React.FC = () => { - {model} + {getDisplayString(model)} {!hideContextPercentage && ( <> {' '} diff --git a/packages/cli/src/ui/components/ProQuotaDialog.tsx b/packages/cli/src/ui/components/ProQuotaDialog.tsx index cf7ae2a518..fd7ec15b01 100644 --- a/packages/cli/src/ui/components/ProQuotaDialog.tsx +++ b/packages/cli/src/ui/components/ProQuotaDialog.tsx @@ -13,6 +13,8 @@ import { DEFAULT_GEMINI_FLASH_LITE_MODEL, DEFAULT_GEMINI_FLASH_MODEL, DEFAULT_GEMINI_MODEL, + FLASH_PREVIEW_MODEL_REVERT_BEFORE_MERGE, + PREVIEW_GEMINI_FLASH_MODEL, UserTierId, } from '@google/gemini-cli-core'; @@ -44,7 +46,9 @@ export function ProQuotaDialog({ // flash and flash lite don't have options to switch or upgrade. if ( failedModel === DEFAULT_GEMINI_FLASH_MODEL || - failedModel === DEFAULT_GEMINI_FLASH_LITE_MODEL + failedModel === DEFAULT_GEMINI_FLASH_LITE_MODEL || + failedModel === PREVIEW_GEMINI_FLASH_MODEL || + failedModel === FLASH_PREVIEW_MODEL_REVERT_BEFORE_MERGE ) { items = [ { diff --git a/packages/cli/src/ui/hooks/useQuotaAndFallback.test.ts b/packages/cli/src/ui/hooks/useQuotaAndFallback.test.ts index dae2044af7..06a4d71af7 100644 --- a/packages/cli/src/ui/hooks/useQuotaAndFallback.test.ts +++ b/packages/cli/src/ui/hooks/useQuotaAndFallback.test.ts @@ -283,7 +283,7 @@ describe('useQuotaAndFallback', () => { const lastCall = (mockHistoryManager.addItem as Mock).mock .calls[0][0]; expect(lastCall.type).toBe(MessageType.INFO); - expect(lastCall.text).toContain('Switched to fallback model.'); + expect(lastCall.text).toContain('Switched to fallback model model-B'); }); } @@ -316,9 +316,9 @@ describe('useQuotaAndFallback', () => { const message = request!.message; expect(message).toBe( - `It seems like you don't have access to Gemini 3. + `It seems like you don't have access to gemini-3-pro-preview. Learn more at https://goo.gle/enable-preview-features -To disable Gemini 3, disable "Preview features" in /settings.`, +To disable gemini-3-pro-preview, disable "Preview features" in /settings.`, ); // Simulate the user choosing to switch @@ -415,7 +415,9 @@ To disable Gemini 3, disable "Preview features" in /settings.`, expect(mockHistoryManager.addItem).toHaveBeenCalledTimes(1); const lastCall = (mockHistoryManager.addItem as Mock).mock.calls[0][0]; expect(lastCall.type).toBe(MessageType.INFO); - expect(lastCall.text).toContain('Switched to fallback model.'); + expect(lastCall.text).toContain( + 'Switched to fallback model gemini-flash', + ); }); it('should show a special message when falling back from the preview model', async () => { @@ -449,7 +451,7 @@ To disable Gemini 3, disable "Preview features" in /settings.`, const lastCall = (mockHistoryManager.addItem as Mock).mock.calls[0][0]; expect(lastCall.type).toBe(MessageType.INFO); expect(lastCall.text).toContain( - `Switched to fallback model gemini-2.5-pro. We will periodically check if ${PREVIEW_GEMINI_MODEL} is available again.`, + `Switched to fallback model gemini-2.5-pro`, ); }); @@ -484,7 +486,7 @@ To disable Gemini 3, disable "Preview features" in /settings.`, const lastCall = (mockHistoryManager.addItem as Mock).mock.calls[0][0]; expect(lastCall.type).toBe(MessageType.INFO); expect(lastCall.text).toContain( - `Switched to fallback model gemini-2.5-flash.`, + `Switched to fallback model gemini-2.5-flash`, ); }); }); diff --git a/packages/cli/src/ui/hooks/useQuotaAndFallback.ts b/packages/cli/src/ui/hooks/useQuotaAndFallback.ts index 084494421a..6a93c0f4ce 100644 --- a/packages/cli/src/ui/hooks/useQuotaAndFallback.ts +++ b/packages/cli/src/ui/hooks/useQuotaAndFallback.ts @@ -14,6 +14,7 @@ import { type UserTierId, PREVIEW_GEMINI_MODEL, DEFAULT_GEMINI_MODEL, + VALID_GEMINI_MODELS, } from '@google/gemini-cli-core'; import { useCallback, useEffect, useRef, useState } from 'react'; import { type UseHistoryManagerReturn } from './useHistoryManager.js'; @@ -71,12 +72,15 @@ export function useQuotaAndFallback({ `/auth to switch to API key.`, ].filter(Boolean); message = messageLines.join('\n'); - } else if (error instanceof ModelNotFoundError) { + } else if ( + error instanceof ModelNotFoundError && + VALID_GEMINI_MODELS.has(failedModel) + ) { isModelNotFoundError = true; const messageLines = [ - `It seems like you don't have access to Gemini 3.`, + `It seems like you don't have access to ${failedModel}.`, `Learn more at https://goo.gle/enable-preview-features`, - `To disable Gemini 3, disable "Preview features" in /settings.`, + `To disable ${failedModel}, disable "Preview features" in /settings.`, ]; message = messageLines.join('\n'); } else { @@ -120,30 +124,20 @@ export function useQuotaAndFallback({ isDialogPending.current = false; // Reset the flag here if (choice === 'retry_always') { - // If we were recovering from a Preview Model failure, show a specific message. - if (proQuotaRequest.failedModel === PREVIEW_GEMINI_MODEL) { - const showPeriodicalCheckMessage = - !proQuotaRequest.isModelNotFoundError && - proQuotaRequest.fallbackModel === DEFAULT_GEMINI_MODEL; - historyManager.addItem( - { - type: MessageType.INFO, - text: `Switched to fallback model ${proQuotaRequest.fallbackModel}. ${showPeriodicalCheckMessage ? `We will periodically check if ${PREVIEW_GEMINI_MODEL} is available again.` : ''}`, - }, - Date.now(), - ); - } else { - historyManager.addItem( - { - type: MessageType.INFO, - text: 'Switched to fallback model.', - }, - Date.now(), - ); - } + // Explicitly set the model to the fallback model to persist the user's choice. + // This ensures the Footer updates and future turns use this model. + config.setModel(proQuotaRequest.fallbackModel); + + historyManager.addItem( + { + type: MessageType.INFO, + text: `Switched to fallback model ${proQuotaRequest.fallbackModel}`, + }, + Date.now(), + ); } }, - [proQuotaRequest, historyManager], + [proQuotaRequest, historyManager, config], ); return { diff --git a/packages/cli/src/zed-integration/zedIntegration.ts b/packages/cli/src/zed-integration/zedIntegration.ts index b20c01ee2e..2dd4197e60 100644 --- a/packages/cli/src/zed-integration/zedIntegration.ts +++ b/packages/cli/src/zed-integration/zedIntegration.ts @@ -257,7 +257,7 @@ export class Session { try { const model = getEffectiveModel( this.config.getModel(), - this.config.isInFallbackMode(), + this.config.getPreviewFeatures(), ); const responseStream = await chat.sendMessageStream( { model }, diff --git a/packages/core/src/availability/policyCatalog.test.ts b/packages/core/src/availability/policyCatalog.test.ts index 092123ff65..559f943294 100644 --- a/packages/core/src/availability/policyCatalog.test.ts +++ b/packages/core/src/availability/policyCatalog.test.ts @@ -19,7 +19,7 @@ describe('policyCatalog', () => { it('returns preview chain when preview enabled', () => { const chain = getModelPolicyChain({ previewEnabled: true }); expect(chain[0]?.model).toBe(PREVIEW_GEMINI_MODEL); - expect(chain).toHaveLength(3); + expect(chain).toHaveLength(2); }); it('returns default chain when preview disabled', () => { @@ -31,7 +31,7 @@ describe('policyCatalog', () => { it('marks preview transients as sticky retries', () => { const [previewPolicy] = getModelPolicyChain({ previewEnabled: true }); expect(previewPolicy.model).toBe(PREVIEW_GEMINI_MODEL); - expect(previewPolicy.stateTransitions.transient).toBe('sticky_retry'); + expect(previewPolicy.stateTransitions.transient).toBe('terminal'); }); it('applies default actions and state transitions for unspecified kinds', () => { diff --git a/packages/core/src/availability/policyCatalog.ts b/packages/core/src/availability/policyCatalog.ts index 6a0d9d07c7..d414158ed1 100644 --- a/packages/core/src/availability/policyCatalog.ts +++ b/packages/core/src/availability/policyCatalog.ts @@ -13,6 +13,7 @@ import type { import { DEFAULT_GEMINI_FLASH_MODEL, DEFAULT_GEMINI_MODEL, + PREVIEW_GEMINI_FLASH_MODEL, PREVIEW_GEMINI_MODEL, } from '../config/models.js'; import type { UserTierId } from '../code_assist/types.js'; @@ -48,13 +49,8 @@ const DEFAULT_CHAIN: ModelPolicyChain = [ ]; const PREVIEW_CHAIN: ModelPolicyChain = [ - definePolicy({ - model: PREVIEW_GEMINI_MODEL, - stateTransitions: { transient: 'sticky_retry' }, - actions: { transient: 'silent' }, - }), - definePolicy({ model: DEFAULT_GEMINI_MODEL }), - definePolicy({ model: DEFAULT_GEMINI_FLASH_MODEL, isLastResort: true }), + definePolicy({ model: PREVIEW_GEMINI_MODEL }), + definePolicy({ model: PREVIEW_GEMINI_FLASH_MODEL, isLastResort: true }), ]; /** diff --git a/packages/core/src/availability/policyHelpers.test.ts b/packages/core/src/availability/policyHelpers.test.ts index 8a5455b097..58aa069721 100644 --- a/packages/core/src/availability/policyHelpers.test.ts +++ b/packages/core/src/availability/policyHelpers.test.ts @@ -12,6 +12,7 @@ import { } from './policyHelpers.js'; import { createDefaultPolicy } from './policyCatalog.js'; import type { Config } from '../config/config.js'; +import { DEFAULT_GEMINI_MODEL_AUTO } from '../config/models.js'; const createMockConfig = (overrides: Partial = {}): Config => ({ @@ -43,7 +44,7 @@ describe('policyHelpers', () => { it('returns the default chain when active model is "auto"', () => { const config = createMockConfig({ - getModel: () => 'auto', + getModel: () => DEFAULT_GEMINI_MODEL_AUTO, }); const chain = resolvePolicyChain(config); @@ -63,7 +64,7 @@ describe('policyHelpers', () => { ]; const context = buildFallbackPolicyContext(chain, 'b'); expect(context.failedPolicy?.model).toBe('b'); - expect(context.candidates.map((p) => p.model)).toEqual(['c', 'a']); + expect(context.candidates.map((p) => p.model)).toEqual(['c']); }); it('returns full chain when model is not in policy list', () => { diff --git a/packages/core/src/availability/policyHelpers.ts b/packages/core/src/availability/policyHelpers.ts index a90386e824..b170587c9e 100644 --- a/packages/core/src/availability/policyHelpers.ts +++ b/packages/core/src/availability/policyHelpers.ts @@ -14,7 +14,7 @@ import type { RetryAvailabilityContext, } from './modelPolicy.js'; import { createDefaultPolicy, getModelPolicyChain } from './policyCatalog.js'; -import { DEFAULT_GEMINI_MODEL, getEffectiveModel } from '../config/models.js'; +import { DEFAULT_GEMINI_MODEL, resolveModel } from '../config/models.js'; import type { ModelSelectionResult } from './modelAvailabilityService.js'; /** @@ -24,23 +24,29 @@ import type { ModelSelectionResult } from './modelAvailabilityService.js'; export function resolvePolicyChain( config: Config, preferredModel?: string, + wrapsAround: boolean = false, ): ModelPolicyChain { + // Availability uses the active/requested model directly. Legacy fallback logic + // (getEffectiveModel) only applies when availability is disabled. + const modelFromConfig = + preferredModel ?? config.getActiveModel?.() ?? config.getModel(); + + const isPreviewRequest = + modelFromConfig.includes('gemini-3') || + modelFromConfig.includes('preview') || + modelFromConfig === 'fiercefalcon'; + const chain = getModelPolicyChain({ - previewEnabled: !!config.getPreviewFeatures(), + previewEnabled: isPreviewRequest, userTier: config.getUserTier(), }); - // TODO: This will be replaced when we get rid of Fallback Modes. - // Switch to getActiveModel() - const activeModel = - preferredModel ?? - getEffectiveModel(config.getModel(), config.isInFallbackMode()); + const activeModel = resolveModel(modelFromConfig); - if (activeModel === 'auto') { - return [...chain]; - } - - if (chain.some((policy) => policy.model === activeModel)) { - return [...chain]; + const activeIndex = chain.findIndex((policy) => policy.model === activeModel); + if (activeIndex !== -1) { + return wrapsAround + ? [...chain.slice(activeIndex), ...chain.slice(0, activeIndex)] + : [...chain.slice(activeIndex)]; } // If the user specified a model not in the default chain, we assume they want @@ -51,10 +57,14 @@ export function resolvePolicyChain( /** * Produces the failed policy (if it exists in the chain) and the list of * fallback candidates that follow it. + * @param chain - The ordered list of available model policies. + * @param failedModel - The identifier of the model that failed. + * @param wrapsAround - If true, treats the chain as a circular buffer. */ export function buildFallbackPolicyContext( chain: ModelPolicyChain, failedModel: string, + wrapsAround: boolean = false, ): { failedPolicy?: ModelPolicy; candidates: ModelPolicy[]; @@ -65,9 +75,12 @@ export function buildFallbackPolicyContext( } // Return [candidates_after, candidates_before] to prioritize downgrades // (continuing the chain) before wrapping around to upgrades. + const candidates = wrapsAround + ? [...chain.slice(index + 1), ...chain.slice(0, index)] + : [...chain.slice(index + 1)]; return { failedPolicy: chain[index], - candidates: [...chain.slice(index + 1), ...chain.slice(0, index)], + candidates, }; } diff --git a/packages/core/src/config/config.test.ts b/packages/core/src/config/config.test.ts index 23a7eefa41..17e1535834 100644 --- a/packages/core/src/config/config.test.ts +++ b/packages/core/src/config/config.test.ts @@ -34,6 +34,7 @@ import { logRipgrepFallback } from '../telemetry/loggers.js'; import { RipgrepFallbackEvent } from '../telemetry/types.js'; import { ToolRegistry } from '../tools/tool-registry.js'; import { DEFAULT_MODEL_CONFIGS } from './defaultModelConfigs.js'; +import { DEFAULT_GEMINI_MODEL } from './models.js'; vi.mock('fs', async (importOriginal) => { const actual = await importOriginal(); @@ -173,7 +174,7 @@ vi.mock('../code_assist/codeAssist.js'); vi.mock('../code_assist/experiments/experiments.js'); describe('Server Config (config.ts)', () => { - const MODEL = 'gemini-pro'; + const MODEL = DEFAULT_GEMINI_MODEL; const SANDBOX: SandboxConfig = { command: 'docker', image: 'gemini-cli-sandbox', diff --git a/packages/core/src/config/config.ts b/packages/core/src/config/config.ts index 38a1d2f977..875b3497ff 100644 --- a/packages/core/src/config/config.ts +++ b/packages/core/src/config/config.ts @@ -507,8 +507,7 @@ export class Config { this.bugCommand = params.bugCommand; this.model = params.model; this._activeModel = params.model; - this.enableModelAvailabilityService = - params.enableModelAvailabilityService ?? false; + this.enableModelAvailabilityService = true; this.enableAgents = params.enableAgents ?? false; this.experimentalJitContext = params.experimentalJitContext ?? false; this.modelAvailabilityService = new ModelAvailabilityService(); @@ -707,6 +706,9 @@ export class Config { this.geminiClient.stripThoughtsFromHistory(); } + // Reset availability status when switching auth (e.g. from limited key to OAuth) + this.modelAvailabilityService.reset(); + const newContentGeneratorConfig = await createContentGeneratorConfig( this, authMethod, diff --git a/packages/core/src/config/defaultModelConfigs.ts b/packages/core/src/config/defaultModelConfigs.ts index d90a7ef587..facea99252 100644 --- a/packages/core/src/config/defaultModelConfigs.ts +++ b/packages/core/src/config/defaultModelConfigs.ts @@ -10,7 +10,7 @@ import { DEFAULT_THINKING_MODE } from './models.js'; // TODO(joshualitt): This is safe for orions belt, but this change SHOULD NOT // be merged when orions belt is merged with it's upstream. -const FLASH_PREVIEW_MODEL_REVERT_BEFORE_MERGE = 'fiercefalcon'; +export const FLASH_PREVIEW_MODEL_REVERT_BEFORE_MERGE = 'fiercefalcon'; // The default model configs. We use `base` as the parent for all of our model // configs, while `chat-base`, a child of `base`, is the parent of the models diff --git a/packages/core/src/config/models.test.ts b/packages/core/src/config/models.test.ts index fe1298e09f..96053fc7eb 100644 --- a/packages/core/src/config/models.test.ts +++ b/packages/core/src/config/models.test.ts @@ -7,90 +7,85 @@ import { describe, it, expect } from 'vitest'; import { getEffectiveModel, + resolveClassifierModel, isGemini2Model, DEFAULT_GEMINI_MODEL, PREVIEW_GEMINI_MODEL, DEFAULT_GEMINI_FLASH_MODEL, - PREVIEW_GEMINI_FLASH_MODEL, DEFAULT_GEMINI_FLASH_LITE_MODEL, + GEMINI_MODEL_ALIAS_PRO, + GEMINI_MODEL_ALIAS_FLASH, + GEMINI_MODEL_ALIAS_FLASH_LITE, + PREVIEW_GEMINI_FLASH_MODEL, PREVIEW_GEMINI_MODEL_AUTO, DEFAULT_GEMINI_MODEL_AUTO, } from './models.js'; describe('getEffectiveModel', () => { - describe('When NOT in fallback mode', () => { - const useFallbackModel = false; - - it('should return the Preview Pro model when auto-preview is requested', () => { - const model = getEffectiveModel( - PREVIEW_GEMINI_MODEL_AUTO, - useFallbackModel, - ); + describe('delegation to resolveModel', () => { + it('should return the Preview Pro model when auto-gemini-3 is requested', () => { + const model = getEffectiveModel(PREVIEW_GEMINI_MODEL_AUTO, false); expect(model).toBe(PREVIEW_GEMINI_MODEL); }); - it('should return the Default Pro model when auto-default is requested', () => { - const model = getEffectiveModel( - DEFAULT_GEMINI_MODEL_AUTO, - useFallbackModel, - ); + it('should return the Default Pro model when auto-gemini-2.5 is requested', () => { + const model = getEffectiveModel(DEFAULT_GEMINI_MODEL_AUTO, false); expect(model).toBe(DEFAULT_GEMINI_MODEL); }); it('should return the requested model as-is for explicit specific models', () => { - expect(getEffectiveModel(DEFAULT_GEMINI_MODEL, useFallbackModel)).toBe( + expect(getEffectiveModel(DEFAULT_GEMINI_MODEL, false)).toBe( DEFAULT_GEMINI_MODEL, ); - expect( - getEffectiveModel(DEFAULT_GEMINI_FLASH_MODEL, useFallbackModel), - ).toBe(DEFAULT_GEMINI_FLASH_MODEL); - expect( - getEffectiveModel(DEFAULT_GEMINI_FLASH_LITE_MODEL, useFallbackModel), - ).toBe(DEFAULT_GEMINI_FLASH_LITE_MODEL); + expect(getEffectiveModel(DEFAULT_GEMINI_FLASH_MODEL, false)).toBe( + DEFAULT_GEMINI_FLASH_MODEL, + ); + expect(getEffectiveModel(DEFAULT_GEMINI_FLASH_LITE_MODEL, false)).toBe( + DEFAULT_GEMINI_FLASH_LITE_MODEL, + ); }); it('should return a custom model name when requested', () => { const customModel = 'custom-model-v1'; - const model = getEffectiveModel(customModel, useFallbackModel); + const model = getEffectiveModel(customModel, false); expect(model).toBe(customModel); }); - }); - describe('When IN fallback mode', () => { - const useFallbackModel = true; + describe('with preview features', () => { + it('should return the preview model when pro alias is requested', () => { + const model = getEffectiveModel(GEMINI_MODEL_ALIAS_PRO, true); + expect(model).toBe(PREVIEW_GEMINI_MODEL); + }); - it('should return the Preview Flash model when auto-preview is requested', () => { - const model = getEffectiveModel( - PREVIEW_GEMINI_MODEL_AUTO, - useFallbackModel, - ); - expect(model).toBe(PREVIEW_GEMINI_FLASH_MODEL); - }); + it('should return the default pro model when pro alias is requested and preview is off', () => { + const model = getEffectiveModel(GEMINI_MODEL_ALIAS_PRO, false); + expect(model).toBe(DEFAULT_GEMINI_MODEL); + }); - it('should return the Default Flash model when auto-default is requested', () => { - const model = getEffectiveModel( - DEFAULT_GEMINI_MODEL_AUTO, - useFallbackModel, - ); - expect(model).toBe(DEFAULT_GEMINI_FLASH_MODEL); - }); + it('should return the flash model when flash is requested and preview is on', () => { + const model = getEffectiveModel(GEMINI_MODEL_ALIAS_FLASH, true); + expect(model).toBe(PREVIEW_GEMINI_FLASH_MODEL); + }); - it('should return the requested model as-is for explicit specific models', () => { - expect(getEffectiveModel(DEFAULT_GEMINI_MODEL, useFallbackModel)).toBe( - DEFAULT_GEMINI_MODEL, - ); - expect( - getEffectiveModel(DEFAULT_GEMINI_FLASH_MODEL, useFallbackModel), - ).toBe(DEFAULT_GEMINI_FLASH_MODEL); - expect( - getEffectiveModel(DEFAULT_GEMINI_FLASH_LITE_MODEL, useFallbackModel), - ).toBe(DEFAULT_GEMINI_FLASH_LITE_MODEL); - }); + it('should return the flash model when lite is requested and preview is on', () => { + const model = getEffectiveModel(GEMINI_MODEL_ALIAS_FLASH_LITE, true); + expect(model).toBe(DEFAULT_GEMINI_FLASH_LITE_MODEL); + }); - it('should return custom model name as-is', () => { - const customModel = 'custom-model-v1'; - const model = getEffectiveModel(customModel, useFallbackModel); - expect(model).toBe(customModel); + it('should return the flash model when the flash model name is explicitly requested and preview is on', () => { + const model = getEffectiveModel(DEFAULT_GEMINI_FLASH_MODEL, true); + expect(model).toBe(DEFAULT_GEMINI_FLASH_MODEL); + }); + + it('should return the lite model when the lite model name is requested and preview is on', () => { + const model = getEffectiveModel(DEFAULT_GEMINI_FLASH_LITE_MODEL, true); + expect(model).toBe(DEFAULT_GEMINI_FLASH_LITE_MODEL); + }); + + it('should return the default gemini model when the model is explicitly set and preview is on', () => { + const model = getEffectiveModel(DEFAULT_GEMINI_MODEL, true); + expect(model).toBe(DEFAULT_GEMINI_MODEL); + }); }); }); }); @@ -120,3 +115,43 @@ describe('isGemini2Model', () => { expect(isGemini2Model('gpt-4')).toBe(false); }); }); + +describe('resolveClassifierModel', () => { + it('should return flash model when alias is flash', () => { + expect( + resolveClassifierModel( + DEFAULT_GEMINI_MODEL_AUTO, + GEMINI_MODEL_ALIAS_FLASH, + ), + ).toBe(DEFAULT_GEMINI_FLASH_MODEL); + expect( + resolveClassifierModel( + PREVIEW_GEMINI_MODEL_AUTO, + GEMINI_MODEL_ALIAS_FLASH, + ), + ).toBe(PREVIEW_GEMINI_FLASH_MODEL); + }); + + it('should return pro model when alias is pro', () => { + expect( + resolveClassifierModel(DEFAULT_GEMINI_MODEL_AUTO, GEMINI_MODEL_ALIAS_PRO), + ).toBe(DEFAULT_GEMINI_MODEL); + expect( + resolveClassifierModel(PREVIEW_GEMINI_MODEL_AUTO, GEMINI_MODEL_ALIAS_PRO), + ).toBe(PREVIEW_GEMINI_MODEL); + }); + + it('should handle preview features being enabled', () => { + // If preview is enabled, resolving 'flash' without context (fallback) might switch to preview flash, + // but here we test explicit auto models which should stick to their families if possible? + // Actually our logic forces DEFAULT_GEMINI_FLASH_MODEL for DEFAULT_GEMINI_MODEL_AUTO even if preview is on, + // because the USER requested 2.5 explicitly via "auto-gemini-2.5". + expect( + resolveClassifierModel( + DEFAULT_GEMINI_MODEL_AUTO, + GEMINI_MODEL_ALIAS_FLASH, + true, + ), + ).toBe(DEFAULT_GEMINI_FLASH_MODEL); + }); +}); diff --git a/packages/core/src/config/models.ts b/packages/core/src/config/models.ts index bf743d31e0..b5bb658e01 100644 --- a/packages/core/src/config/models.ts +++ b/packages/core/src/config/models.ts @@ -20,47 +20,97 @@ export const VALID_GEMINI_MODELS = new Set([ export const PREVIEW_GEMINI_MODEL_AUTO = 'auto-gemini-3'; export const DEFAULT_GEMINI_MODEL_AUTO = 'auto-gemini-2.5'; +// Model aliases for user convenience. +export const GEMINI_MODEL_ALIAS_PRO = 'pro'; +export const GEMINI_MODEL_ALIAS_FLASH = 'flash'; +export const GEMINI_MODEL_ALIAS_FLASH_LITE = 'flash-lite'; + export const DEFAULT_GEMINI_EMBEDDING_MODEL = 'gemini-embedding-001'; // Cap the thinking at 8192 to prevent run-away thinking loops. export const DEFAULT_THINKING_MODE = 8192; /** - * Determines the effective model to use, applying fallback logic if necessary. + * Resolves the requested model alias (e.g., 'auto', 'pro', 'flash', 'flash-lite') + * to a concrete model name, considering preview features. * - * When fallback mode is active, this function enforces the use of the standard - * fallback model. + * @param requestedModel The model alias or concrete model name requested by the user. + * @param previewFeaturesEnabled A boolean indicating if preview features are enabled. + * @returns The resolved concrete model name. + */ +export function resolveModel( + requestedModel: string, + previewFeaturesEnabled: boolean = false, +): string { + switch (requestedModel) { + case PREVIEW_GEMINI_MODEL_AUTO: { + return PREVIEW_GEMINI_MODEL; + } + case DEFAULT_GEMINI_MODEL_AUTO: { + return DEFAULT_GEMINI_MODEL; + } + case GEMINI_MODEL_ALIAS_PRO: { + return previewFeaturesEnabled + ? PREVIEW_GEMINI_MODEL + : DEFAULT_GEMINI_MODEL; + } + case GEMINI_MODEL_ALIAS_FLASH: { + return previewFeaturesEnabled + ? PREVIEW_GEMINI_FLASH_MODEL + : DEFAULT_GEMINI_FLASH_MODEL; + } + case GEMINI_MODEL_ALIAS_FLASH_LITE: { + return DEFAULT_GEMINI_FLASH_LITE_MODEL; + } + default: { + return requestedModel; + } + } +} + +/** + * Resolves the appropriate model based on the classifier's decision. + * + * @param requestedModel The current requested model (e.g. auto-gemini-2.5). + * @param modelAlias The alias selected by the classifier ('flash' or 'pro'). + * @param previewFeaturesEnabled Whether preview features are enabled. + * @returns The resolved concrete model name. + */ +export function resolveClassifierModel( + requestedModel: string, + modelAlias: string, + previewFeaturesEnabled: boolean = false, +): string { + if (modelAlias === GEMINI_MODEL_ALIAS_FLASH) { + if ( + requestedModel === DEFAULT_GEMINI_MODEL_AUTO || + requestedModel === DEFAULT_GEMINI_MODEL + ) { + return DEFAULT_GEMINI_FLASH_MODEL; + } + if ( + requestedModel === PREVIEW_GEMINI_MODEL_AUTO || + requestedModel === PREVIEW_GEMINI_MODEL + ) { + return PREVIEW_GEMINI_FLASH_MODEL; + } + return resolveModel(GEMINI_MODEL_ALIAS_FLASH, previewFeaturesEnabled); + } + return resolveModel(requestedModel, previewFeaturesEnabled); +} + +/** + * Determines the effective model to use. * * @param requestedModel The model that was originally requested. - * @param isInFallbackMode Whether the application is in fallback mode. + * @param previewFeaturesEnabled A boolean indicating if preview features are enabled. * @returns The effective model name. */ export function getEffectiveModel( requestedModel: string, - useFallbackModel: boolean, + previewFeaturesEnabled: boolean | undefined, ): string { - // If we are not in fallback mode, simply use the resolved model. - if (!useFallbackModel) { - switch (requestedModel) { - case PREVIEW_GEMINI_MODEL_AUTO: - return PREVIEW_GEMINI_MODEL; - case DEFAULT_GEMINI_MODEL_AUTO: - return DEFAULT_GEMINI_MODEL; - default: - return requestedModel; - } - } - - // Fallback model for corresponding model family. We are doing fallback only - // for Auto modes - switch (requestedModel) { - case PREVIEW_GEMINI_MODEL_AUTO: - return PREVIEW_GEMINI_FLASH_MODEL; - case DEFAULT_GEMINI_MODEL_AUTO: - return DEFAULT_GEMINI_FLASH_MODEL; - default: - return requestedModel; - } + return resolveModel(requestedModel, previewFeaturesEnabled); } export function getDisplayString(model: string) { diff --git a/packages/core/src/core/__snapshots__/prompts.test.ts.snap b/packages/core/src/core/__snapshots__/prompts.test.ts.snap index 8469813d7c..f4f8ebcce1 100644 --- a/packages/core/src/core/__snapshots__/prompts.test.ts.snap +++ b/packages/core/src/core/__snapshots__/prompts.test.ts.snap @@ -19,11 +19,10 @@ exports[`Core System Prompt (prompts.ts) > should append userMemory with separat ## Software Engineering Tasks When requested to perform tasks like fixing bugs, adding features, refactoring, or explaining code, follow this sequence: -1. **Understand:** Think about the user's request and the relevant codebase context. Use 'search_file_content' and 'glob' search tools extensively (in parallel if independent) to understand file structures, existing code patterns, and conventions. +1. **Understand:** Think about the user's request and the relevant codebase context. Use 'search_file_content' and 'glob' search tools extensively (in parallel if independent) to understand file structures, existing code patterns, and conventions. Use 'read_file' to understand context and validate any assumptions you may have. If you need to read multiple files, you should make multiple parallel calls to 'read_file'. 2. **Plan:** Build a coherent and grounded (based on the understanding in step 1) plan for how you intend to resolve the user's task. Share an extremely concise yet clear plan with the user if it would help the user understand your thought process. As part of the plan, you should use an iterative development process that includes writing unit tests to verify your changes. Use output logs or debug statements as part of this process to arrive at a solution. -3. **Implement:** Use the available tools (e.g., 'replace', 'write_file' 'run_shell_command' ...) to act on the plan, strictly adhering to the project's established conventions (detailed under 'Core -Mandates'). +3. **Implement:** Use the available tools (e.g., 'replace', 'write_file' 'run_shell_command' ...) to act on the plan, strictly adhering to the project's established conventions (detailed under 'Core Mandates'). 4. **Verify (Tests):** If applicable and feasible, verify the changes using the project's testing procedures. Identify the correct test commands and frameworks by examining 'README' files, build/package configuration (e.g., 'package.json'), or existing test execution patterns. NEVER assume standard test commands. 5. **Verify (Standards):** VERY IMPORTANT: After making code changes, execute the project-specific build, linting and type-checking commands (e.g., 'tsc', 'npm run lint', 'ruff check .') that you have identified for this project (or obtained from the user). This ensures code quality and adherence to standards. If unsure about these commands, you can ask the user if they'd like you to run them and if so how to. 6. **Finalize:** After all verification passes, consider the task complete. Do not remove or revert any changes or created files (like tests). Await the user's next instruction. @@ -121,11 +120,10 @@ exports[`Core System Prompt (prompts.ts) > should handle git instructions when i ## Software Engineering Tasks When requested to perform tasks like fixing bugs, adding features, refactoring, or explaining code, follow this sequence: -1. **Understand:** Think about the user's request and the relevant codebase context. Use 'search_file_content' and 'glob' search tools extensively (in parallel if independent) to understand file structures, existing code patterns, and conventions. +1. **Understand:** Think about the user's request and the relevant codebase context. Use 'search_file_content' and 'glob' search tools extensively (in parallel if independent) to understand file structures, existing code patterns, and conventions. Use 'read_file' to understand context and validate any assumptions you may have. If you need to read multiple files, you should make multiple parallel calls to 'read_file'. 2. **Plan:** Build a coherent and grounded (based on the understanding in step 1) plan for how you intend to resolve the user's task. Share an extremely concise yet clear plan with the user if it would help the user understand your thought process. As part of the plan, you should use an iterative development process that includes writing unit tests to verify your changes. Use output logs or debug statements as part of this process to arrive at a solution. -3. **Implement:** Use the available tools (e.g., 'replace', 'write_file' 'run_shell_command' ...) to act on the plan, strictly adhering to the project's established conventions (detailed under 'Core -Mandates'). +3. **Implement:** Use the available tools (e.g., 'replace', 'write_file' 'run_shell_command' ...) to act on the plan, strictly adhering to the project's established conventions (detailed under 'Core Mandates'). 4. **Verify (Tests):** If applicable and feasible, verify the changes using the project's testing procedures. Identify the correct test commands and frameworks by examining 'README' files, build/package configuration (e.g., 'package.json'), or existing test execution patterns. NEVER assume standard test commands. 5. **Verify (Standards):** VERY IMPORTANT: After making code changes, execute the project-specific build, linting and type-checking commands (e.g., 'tsc', 'npm run lint', 'ruff check .') that you have identified for this project (or obtained from the user). This ensures code quality and adherence to standards. If unsure about these commands, you can ask the user if they'd like you to run them and if so how to. 6. **Finalize:** After all verification passes, consider the task complete. Do not remove or revert any changes or created files (like tests). Await the user's next instruction. @@ -218,11 +216,10 @@ exports[`Core System Prompt (prompts.ts) > should handle git instructions when i ## Software Engineering Tasks When requested to perform tasks like fixing bugs, adding features, refactoring, or explaining code, follow this sequence: -1. **Understand:** Think about the user's request and the relevant codebase context. Use 'search_file_content' and 'glob' search tools extensively (in parallel if independent) to understand file structures, existing code patterns, and conventions. +1. **Understand:** Think about the user's request and the relevant codebase context. Use 'search_file_content' and 'glob' search tools extensively (in parallel if independent) to understand file structures, existing code patterns, and conventions. Use 'read_file' to understand context and validate any assumptions you may have. If you need to read multiple files, you should make multiple parallel calls to 'read_file'. 2. **Plan:** Build a coherent and grounded (based on the understanding in step 1) plan for how you intend to resolve the user's task. Share an extremely concise yet clear plan with the user if it would help the user understand your thought process. As part of the plan, you should use an iterative development process that includes writing unit tests to verify your changes. Use output logs or debug statements as part of this process to arrive at a solution. -3. **Implement:** Use the available tools (e.g., 'replace', 'write_file' 'run_shell_command' ...) to act on the plan, strictly adhering to the project's established conventions (detailed under 'Core -Mandates'). +3. **Implement:** Use the available tools (e.g., 'replace', 'write_file' 'run_shell_command' ...) to act on the plan, strictly adhering to the project's established conventions (detailed under 'Core Mandates'). 4. **Verify (Tests):** If applicable and feasible, verify the changes using the project's testing procedures. Identify the correct test commands and frameworks by examining 'README' files, build/package configuration (e.g., 'package.json'), or existing test execution patterns. NEVER assume standard test commands. 5. **Verify (Standards):** VERY IMPORTANT: After making code changes, execute the project-specific build, linting and type-checking commands (e.g., 'tsc', 'npm run lint', 'ruff check .') that you have identified for this project (or obtained from the user). This ensures code quality and adherence to standards. If unsure about these commands, you can ask the user if they'd like you to run them and if so how to. 6. **Finalize:** After all verification passes, consider the task complete. Do not remove or revert any changes or created files (like tests). Await the user's next instruction. @@ -330,11 +327,10 @@ exports[`Core System Prompt (prompts.ts) > should include correct sandbox instru ## Software Engineering Tasks When requested to perform tasks like fixing bugs, adding features, refactoring, or explaining code, follow this sequence: -1. **Understand:** Think about the user's request and the relevant codebase context. Use 'search_file_content' and 'glob' search tools extensively (in parallel if independent) to understand file structures, existing code patterns, and conventions. +1. **Understand:** Think about the user's request and the relevant codebase context. Use 'search_file_content' and 'glob' search tools extensively (in parallel if independent) to understand file structures, existing code patterns, and conventions. Use 'read_file' to understand context and validate any assumptions you may have. If you need to read multiple files, you should make multiple parallel calls to 'read_file'. 2. **Plan:** Build a coherent and grounded (based on the understanding in step 1) plan for how you intend to resolve the user's task. Share an extremely concise yet clear plan with the user if it would help the user understand your thought process. As part of the plan, you should use an iterative development process that includes writing unit tests to verify your changes. Use output logs or debug statements as part of this process to arrive at a solution. -3. **Implement:** Use the available tools (e.g., 'replace', 'write_file' 'run_shell_command' ...) to act on the plan, strictly adhering to the project's established conventions (detailed under 'Core -Mandates'). +3. **Implement:** Use the available tools (e.g., 'replace', 'write_file' 'run_shell_command' ...) to act on the plan, strictly adhering to the project's established conventions (detailed under 'Core Mandates'). 4. **Verify (Tests):** If applicable and feasible, verify the changes using the project's testing procedures. Identify the correct test commands and frameworks by examining 'README' files, build/package configuration (e.g., 'package.json'), or existing test execution patterns. NEVER assume standard test commands. 5. **Verify (Standards):** VERY IMPORTANT: After making code changes, execute the project-specific build, linting and type-checking commands (e.g., 'tsc', 'npm run lint', 'ruff check .') that you have identified for this project (or obtained from the user). This ensures code quality and adherence to standards. If unsure about these commands, you can ask the user if they'd like you to run them and if so how to. 6. **Finalize:** After all verification passes, consider the task complete. Do not remove or revert any changes or created files (like tests). Await the user's next instruction. @@ -427,11 +423,10 @@ exports[`Core System Prompt (prompts.ts) > should include correct sandbox instru ## Software Engineering Tasks When requested to perform tasks like fixing bugs, adding features, refactoring, or explaining code, follow this sequence: -1. **Understand:** Think about the user's request and the relevant codebase context. Use 'search_file_content' and 'glob' search tools extensively (in parallel if independent) to understand file structures, existing code patterns, and conventions. +1. **Understand:** Think about the user's request and the relevant codebase context. Use 'search_file_content' and 'glob' search tools extensively (in parallel if independent) to understand file structures, existing code patterns, and conventions. Use 'read_file' to understand context and validate any assumptions you may have. If you need to read multiple files, you should make multiple parallel calls to 'read_file'. 2. **Plan:** Build a coherent and grounded (based on the understanding in step 1) plan for how you intend to resolve the user's task. Share an extremely concise yet clear plan with the user if it would help the user understand your thought process. As part of the plan, you should use an iterative development process that includes writing unit tests to verify your changes. Use output logs or debug statements as part of this process to arrive at a solution. -3. **Implement:** Use the available tools (e.g., 'replace', 'write_file' 'run_shell_command' ...) to act on the plan, strictly adhering to the project's established conventions (detailed under 'Core -Mandates'). +3. **Implement:** Use the available tools (e.g., 'replace', 'write_file' 'run_shell_command' ...) to act on the plan, strictly adhering to the project's established conventions (detailed under 'Core Mandates'). 4. **Verify (Tests):** If applicable and feasible, verify the changes using the project's testing procedures. Identify the correct test commands and frameworks by examining 'README' files, build/package configuration (e.g., 'package.json'), or existing test execution patterns. NEVER assume standard test commands. 5. **Verify (Standards):** VERY IMPORTANT: After making code changes, execute the project-specific build, linting and type-checking commands (e.g., 'tsc', 'npm run lint', 'ruff check .') that you have identified for this project (or obtained from the user). This ensures code quality and adherence to standards. If unsure about these commands, you can ask the user if they'd like you to run them and if so how to. 6. **Finalize:** After all verification passes, consider the task complete. Do not remove or revert any changes or created files (like tests). Await the user's next instruction. @@ -524,11 +519,10 @@ exports[`Core System Prompt (prompts.ts) > should include correct sandbox instru ## Software Engineering Tasks When requested to perform tasks like fixing bugs, adding features, refactoring, or explaining code, follow this sequence: -1. **Understand:** Think about the user's request and the relevant codebase context. Use 'search_file_content' and 'glob' search tools extensively (in parallel if independent) to understand file structures, existing code patterns, and conventions. +1. **Understand:** Think about the user's request and the relevant codebase context. Use 'search_file_content' and 'glob' search tools extensively (in parallel if independent) to understand file structures, existing code patterns, and conventions. Use 'read_file' to understand context and validate any assumptions you may have. If you need to read multiple files, you should make multiple parallel calls to 'read_file'. 2. **Plan:** Build a coherent and grounded (based on the understanding in step 1) plan for how you intend to resolve the user's task. Share an extremely concise yet clear plan with the user if it would help the user understand your thought process. As part of the plan, you should use an iterative development process that includes writing unit tests to verify your changes. Use output logs or debug statements as part of this process to arrive at a solution. -3. **Implement:** Use the available tools (e.g., 'replace', 'write_file' 'run_shell_command' ...) to act on the plan, strictly adhering to the project's established conventions (detailed under 'Core -Mandates'). +3. **Implement:** Use the available tools (e.g., 'replace', 'write_file' 'run_shell_command' ...) to act on the plan, strictly adhering to the project's established conventions (detailed under 'Core Mandates'). 4. **Verify (Tests):** If applicable and feasible, verify the changes using the project's testing procedures. Identify the correct test commands and frameworks by examining 'README' files, build/package configuration (e.g., 'package.json'), or existing test execution patterns. NEVER assume standard test commands. 5. **Verify (Standards):** VERY IMPORTANT: After making code changes, execute the project-specific build, linting and type-checking commands (e.g., 'tsc', 'npm run lint', 'ruff check .') that you have identified for this project (or obtained from the user). This ensures code quality and adherence to standards. If unsure about these commands, you can ask the user if they'd like you to run them and if so how to. 6. **Finalize:** After all verification passes, consider the task complete. Do not remove or revert any changes or created files (like tests). Await the user's next instruction. @@ -621,11 +615,10 @@ exports[`Core System Prompt (prompts.ts) > should return the base prompt when us ## Software Engineering Tasks When requested to perform tasks like fixing bugs, adding features, refactoring, or explaining code, follow this sequence: -1. **Understand:** Think about the user's request and the relevant codebase context. Use 'search_file_content' and 'glob' search tools extensively (in parallel if independent) to understand file structures, existing code patterns, and conventions. +1. **Understand:** Think about the user's request and the relevant codebase context. Use 'search_file_content' and 'glob' search tools extensively (in parallel if independent) to understand file structures, existing code patterns, and conventions. Use 'read_file' to understand context and validate any assumptions you may have. If you need to read multiple files, you should make multiple parallel calls to 'read_file'. 2. **Plan:** Build a coherent and grounded (based on the understanding in step 1) plan for how you intend to resolve the user's task. Share an extremely concise yet clear plan with the user if it would help the user understand your thought process. As part of the plan, you should use an iterative development process that includes writing unit tests to verify your changes. Use output logs or debug statements as part of this process to arrive at a solution. -3. **Implement:** Use the available tools (e.g., 'replace', 'write_file' 'run_shell_command' ...) to act on the plan, strictly adhering to the project's established conventions (detailed under 'Core -Mandates'). +3. **Implement:** Use the available tools (e.g., 'replace', 'write_file' 'run_shell_command' ...) to act on the plan, strictly adhering to the project's established conventions (detailed under 'Core Mandates'). 4. **Verify (Tests):** If applicable and feasible, verify the changes using the project's testing procedures. Identify the correct test commands and frameworks by examining 'README' files, build/package configuration (e.g., 'package.json'), or existing test execution patterns. NEVER assume standard test commands. 5. **Verify (Standards):** VERY IMPORTANT: After making code changes, execute the project-specific build, linting and type-checking commands (e.g., 'tsc', 'npm run lint', 'ruff check .') that you have identified for this project (or obtained from the user). This ensures code quality and adherence to standards. If unsure about these commands, you can ask the user if they'd like you to run them and if so how to. 6. **Finalize:** After all verification passes, consider the task complete. Do not remove or revert any changes or created files (like tests). Await the user's next instruction. @@ -718,11 +711,10 @@ exports[`Core System Prompt (prompts.ts) > should return the base prompt when us ## Software Engineering Tasks When requested to perform tasks like fixing bugs, adding features, refactoring, or explaining code, follow this sequence: -1. **Understand:** Think about the user's request and the relevant codebase context. Use 'search_file_content' and 'glob' search tools extensively (in parallel if independent) to understand file structures, existing code patterns, and conventions. +1. **Understand:** Think about the user's request and the relevant codebase context. Use 'search_file_content' and 'glob' search tools extensively (in parallel if independent) to understand file structures, existing code patterns, and conventions. Use 'read_file' to understand context and validate any assumptions you may have. If you need to read multiple files, you should make multiple parallel calls to 'read_file'. 2. **Plan:** Build a coherent and grounded (based on the understanding in step 1) plan for how you intend to resolve the user's task. Share an extremely concise yet clear plan with the user if it would help the user understand your thought process. As part of the plan, you should use an iterative development process that includes writing unit tests to verify your changes. Use output logs or debug statements as part of this process to arrive at a solution. -3. **Implement:** Use the available tools (e.g., 'replace', 'write_file' 'run_shell_command' ...) to act on the plan, strictly adhering to the project's established conventions (detailed under 'Core -Mandates'). +3. **Implement:** Use the available tools (e.g., 'replace', 'write_file' 'run_shell_command' ...) to act on the plan, strictly adhering to the project's established conventions (detailed under 'Core Mandates'). 4. **Verify (Tests):** If applicable and feasible, verify the changes using the project's testing procedures. Identify the correct test commands and frameworks by examining 'README' files, build/package configuration (e.g., 'package.json'), or existing test execution patterns. NEVER assume standard test commands. 5. **Verify (Standards):** VERY IMPORTANT: After making code changes, execute the project-specific build, linting and type-checking commands (e.g., 'tsc', 'npm run lint', 'ruff check .') that you have identified for this project (or obtained from the user). This ensures code quality and adherence to standards. If unsure about these commands, you can ask the user if they'd like you to run them and if so how to. 6. **Finalize:** After all verification passes, consider the task complete. Do not remove or revert any changes or created files (like tests). Await the user's next instruction. @@ -815,11 +807,10 @@ exports[`Core System Prompt (prompts.ts) > should return the interactive avoidan ## Software Engineering Tasks When requested to perform tasks like fixing bugs, adding features, refactoring, or explaining code, follow this sequence: -1. **Understand:** Think about the user's request and the relevant codebase context. Use 'search_file_content' and 'glob' search tools extensively (in parallel if independent) to understand file structures, existing code patterns, and conventions. +1. **Understand:** Think about the user's request and the relevant codebase context. Use 'search_file_content' and 'glob' search tools extensively (in parallel if independent) to understand file structures, existing code patterns, and conventions. Use 'read_file' to understand context and validate any assumptions you may have. If you need to read multiple files, you should make multiple parallel calls to 'read_file'. 2. **Plan:** Build a coherent and grounded (based on the understanding in step 1) plan for how you intend to resolve the user's task. Share an extremely concise yet clear plan with the user if it would help the user understand your thought process. As part of the plan, you should use an iterative development process that includes writing unit tests to verify your changes. Use output logs or debug statements as part of this process to arrive at a solution. -3. **Implement:** Use the available tools (e.g., 'replace', 'write_file' 'run_shell_command' ...) to act on the plan, strictly adhering to the project's established conventions (detailed under 'Core -Mandates'). +3. **Implement:** Use the available tools (e.g., 'replace', 'write_file' 'run_shell_command' ...) to act on the plan, strictly adhering to the project's established conventions (detailed under 'Core Mandates'). 4. **Verify (Tests):** If applicable and feasible, verify the changes using the project's testing procedures. Identify the correct test commands and frameworks by examining 'README' files, build/package configuration (e.g., 'package.json'), or existing test execution patterns. NEVER assume standard test commands. 5. **Verify (Standards):** VERY IMPORTANT: After making code changes, execute the project-specific build, linting and type-checking commands (e.g., 'tsc', 'npm run lint', 'ruff check .') that you have identified for this project (or obtained from the user). This ensures code quality and adherence to standards. If unsure about these commands, you can ask the user if they'd like you to run them and if so how to. 6. **Finalize:** After all verification passes, consider the task complete. Do not remove or revert any changes or created files (like tests). Await the user's next instruction. @@ -913,11 +904,10 @@ exports[`Core System Prompt (prompts.ts) > should use chatty system prompt for p ## Software Engineering Tasks When requested to perform tasks like fixing bugs, adding features, refactoring, or explaining code, follow this sequence: -1. **Understand:** Think about the user's request and the relevant codebase context. Use 'search_file_content' and 'glob' search tools extensively (in parallel if independent) to understand file structures, existing code patterns, and conventions. +1. **Understand:** Think about the user's request and the relevant codebase context. Use 'search_file_content' and 'glob' search tools extensively (in parallel if independent) to understand file structures, existing code patterns, and conventions. Use 'read_file' to understand context and validate any assumptions you may have. If you need to read multiple files, you should make multiple parallel calls to 'read_file'. 2. **Plan:** Build a coherent and grounded (based on the understanding in step 1) plan for how you intend to resolve the user's task. Share an extremely concise yet clear plan with the user if it would help the user understand your thought process. As part of the plan, you should use an iterative development process that includes writing unit tests to verify your changes. Use output logs or debug statements as part of this process to arrive at a solution. -3. **Implement:** Use the available tools (e.g., 'replace', 'write_file' 'run_shell_command' ...) to act on the plan, strictly adhering to the project's established conventions (detailed under 'Core -Mandates'). +3. **Implement:** Use the available tools (e.g., 'replace', 'write_file' 'run_shell_command' ...) to act on the plan, strictly adhering to the project's established conventions (detailed under 'Core Mandates'). 4. **Verify (Tests):** If applicable and feasible, verify the changes using the project's testing procedures. Identify the correct test commands and frameworks by examining 'README' files, build/package configuration (e.g., 'package.json'), or existing test execution patterns. NEVER assume standard test commands. 5. **Verify (Standards):** VERY IMPORTANT: After making code changes, execute the project-specific build, linting and type-checking commands (e.g., 'tsc', 'npm run lint', 'ruff check .') that you have identified for this project (or obtained from the user). This ensures code quality and adherence to standards. If unsure about these commands, you can ask the user if they'd like you to run them and if so how to. 6. **Finalize:** After all verification passes, consider the task complete. Do not remove or revert any changes or created files (like tests). Await the user's next instruction. diff --git a/packages/core/src/core/baseLlmClient.ts b/packages/core/src/core/baseLlmClient.ts index 8313ee4bc9..6c0d5b7012 100644 --- a/packages/core/src/core/baseLlmClient.ts +++ b/packages/core/src/core/baseLlmClient.ts @@ -13,6 +13,8 @@ import type { } from '@google/genai'; import type { Config } from '../config/config.js'; import type { ContentGenerator } from './contentGenerator.js'; +import type { AuthType } from './contentGenerator.js'; +import { handleFallback } from '../fallback/handler.js'; import { getResponseText } from '../utils/partUtils.js'; import { reportError } from '../utils/errorReporting.js'; import { getErrorMessage } from '../utils/errors.js'; @@ -86,6 +88,7 @@ export class BaseLlmClient { constructor( private readonly contentGenerator: ContentGenerator, private readonly config: Config, + private readonly authType?: AuthType, ) {} async generateJson( @@ -286,6 +289,12 @@ export class BaseLlmClient { maxAttempts: availabilityMaxAttempts ?? maxAttempts ?? DEFAULT_MAX_ATTEMPTS, getAvailabilityContext, + onPersistent429: this.config.isInteractive() + ? (authType, error) => + handleFallback(this.config, requestParams.model, authType, error) + : undefined, + authType: + this.authType ?? this.config.getContentGeneratorConfig()?.authType, }); } catch (error) { if (abortSignal?.aborted) { diff --git a/packages/core/src/core/client.ts b/packages/core/src/core/client.ts index 236a9b9809..064dab2d5e 100644 --- a/packages/core/src/core/client.ts +++ b/packages/core/src/core/client.ts @@ -31,10 +31,7 @@ import type { ResumedSessionData, } from '../services/chatRecordingService.js'; import type { ContentGenerator } from './contentGenerator.js'; -import { - DEFAULT_GEMINI_FLASH_MODEL, - getEffectiveModel, -} from '../config/models.js'; +import { DEFAULT_GEMINI_FLASH_MODEL } from '../config/models.js'; import { LoopDetectionService } from '../services/loopDetectionService.js'; import { ChatCompressionService } from '../services/chatCompressionService.js'; import { ideContextStore } from '../ide/ideContext.js'; @@ -395,8 +392,9 @@ export class GeminiClient { return this.currentSequenceModel; } - const configModel = this.config.getModel(); - return getEffectiveModel(configModel, this.config.isInFallbackMode()); + // Availability logic: The configured model is the source of truth, + // including any permanent fallbacks (config.setModel) or manual overrides. + return this.config.getActiveModel(); } async *sendMessageStream( diff --git a/packages/core/src/core/geminiChat.test.ts b/packages/core/src/core/geminiChat.test.ts index 587d90f60d..1094bec07c 100644 --- a/packages/core/src/core/geminiChat.test.ts +++ b/packages/core/src/core/geminiChat.test.ts @@ -19,15 +19,12 @@ import type { Config } from '../config/config.js'; import { setSimulate429 } from '../utils/testUtils.js'; import { DEFAULT_GEMINI_FLASH_MODEL, - DEFAULT_GEMINI_MODEL, DEFAULT_THINKING_MODE, - PREVIEW_GEMINI_FLASH_MODEL, PREVIEW_GEMINI_MODEL, - PREVIEW_GEMINI_MODEL_AUTO, } from '../config/models.js'; import { AuthType } from './contentGenerator.js'; import { TerminalQuotaError } from '../utils/googleQuotaErrors.js'; -import { retryWithBackoff, type RetryOptions } from '../utils/retry.js'; +import { type RetryOptions } from '../utils/retry.js'; import { uiTelemetryService } from '../telemetry/uiTelemetry.js'; import { HookSystem } from '../hooks/hookSystem.js'; import { createMockMessageBus } from '../test-utils/mock-message-bus.js'; @@ -129,18 +126,25 @@ describe('GeminiChat', () => { } return result; }); + let currentModel = 'gemini-pro'; + let currentActiveModel = 'gemini-pro'; + mockConfig = { getSessionId: () => 'test-session-id', getTelemetryLogPromptsEnabled: () => true, getUsageStatisticsEnabled: () => true, getDebugMode: () => false, getPreviewFeatures: () => false, - getContentGeneratorConfig: vi.fn().mockReturnValue({ - authType: 'oauth-personal', // Ensure this is set for fallback tests - model: 'test-model', + getContentGeneratorConfig: vi.fn().mockImplementation(() => ({ + authType: 'oauth-personal', + model: currentModel, + })), + getModel: vi.fn().mockImplementation(() => currentModel), + setModel: vi.fn().mockImplementation((m: string) => { + currentModel = m; + // When model is explicitly set, active model usually resets or updates to it + currentActiveModel = m; }), - getModel: vi.fn().mockReturnValue('gemini-pro'), - setModel: vi.fn(), isInFallbackMode: vi.fn().mockReturnValue(false), getQuotaErrorOccurred: vi.fn().mockReturnValue(false), setQuotaErrorOccurred: vi.fn(), @@ -181,8 +185,10 @@ describe('GeminiChat', () => { isInteractive: vi.fn().mockReturnValue(false), getEnableHooks: vi.fn().mockReturnValue(false), isModelAvailabilityServiceEnabled: vi.fn().mockReturnValue(false), - getActiveModel: vi.fn().mockReturnValue('gemini-pro'), - setActiveModel: vi.fn(), + getActiveModel: vi.fn().mockImplementation(() => currentActiveModel), + setActiveModel: vi + .fn() + .mockImplementation((m: string) => (currentActiveModel = m)), getModelAvailabilityService: vi.fn(), } as unknown as Config; @@ -606,62 +612,6 @@ describe('GeminiChat', () => { ); }); - it('should pass DEFAULT_GEMINI_MODEL to handleFallback when Preview Model is bypassed (downgraded)', async () => { - // ARRANGE - vi.mocked(mockConfig.isPreviewModelBypassMode).mockReturnValue(true); - // Mock retryWithBackoff to simulate catching the error and calling onPersistent429 - vi.mocked(retryWithBackoff).mockImplementation( - async (apiCall, options) => { - const onPersistent429 = options?.onPersistent429; - try { - await apiCall(); - } catch (error) { - if (onPersistent429) { - await onPersistent429(AuthType.LOGIN_WITH_GOOGLE, error); - } - throw error; - } - }, - ); - - // We need the API call to fail so retryWithBackoff calls the callback. - vi.mocked(mockContentGenerator.generateContentStream).mockRejectedValue( - new TerminalQuotaError('Simulated Quota Error', { - code: 429, - message: 'Simulated Quota Error', - details: [], - }), - ); - - // ACT - const consumeStream = async () => { - const stream = await chat.sendMessageStream( - { model: PREVIEW_GEMINI_MODEL }, - 'test', - 'prompt-id-bypass', - new AbortController().signal, - ); - // Consume the stream to trigger execution - for await (const _ of stream) { - // do nothing - } - }; - - await expect(consumeStream()).rejects.toThrow('Simulated Quota Error'); - - expect(retryWithBackoff).toHaveBeenCalled(); - - // ASSERT - // handleFallback is called via onPersistent429Callback - // We verify it was called with DEFAULT_GEMINI_MODEL - expect(mockHandleFallback).toHaveBeenCalledWith( - expect.anything(), - DEFAULT_GEMINI_MODEL, // This is the key assertion - expect.anything(), - expect.anything(), - ); - }); - it('should throw an error when a tool call is followed by an empty stream response', async () => { // 1. Setup: A history where the model has just made a function call. const initialHistory: Content[] = [ @@ -1851,45 +1801,6 @@ describe('GeminiChat', () => { expect(turn4.parts[0].text).toBe('second response'); }); - describe('Model Resolution', () => { - const mockResponse = { - candidates: [ - { - content: { parts: [{ text: 'response' }], role: 'model' }, - finishReason: 'STOP', - }, - ], - } as unknown as GenerateContentResponse; - - it('should use the FLASH model when in fallback mode (sendMessageStream)', async () => { - vi.mocked(mockConfig.getModel).mockReturnValue('auto-gemini-2.5'); - vi.mocked(mockConfig.isInFallbackMode).mockReturnValue(true); - vi.mocked(mockContentGenerator.generateContentStream).mockImplementation( - async () => - (async function* () { - yield mockResponse; - })(), - ); - - const stream = await chat.sendMessageStream( - { model: 'auto-gemini-2.5' }, - 'test message', - 'prompt-id-res3', - new AbortController().signal, - ); - for await (const _ of stream) { - // consume stream - } - - expect(mockContentGenerator.generateContentStream).toHaveBeenCalledWith( - expect.objectContaining({ - model: DEFAULT_GEMINI_FLASH_MODEL, - }), - 'prompt-id-res3', - ); - }); - }); - describe('Fallback Integration (Retries)', () => { const error429 = new ApiError({ message: 'API Error 429: Quota exceeded', @@ -1986,92 +1897,6 @@ describe('GeminiChat', () => { expect(modelTurn.parts![0]!.text).toBe('Success on retry'); }); - it('should switch to PREVIEW_GEMINI_FLASH_MODEL and use thinkingLevel when falling back from a gemini-3 model', async () => { - // ARRANGE - const authType = AuthType.LOGIN_WITH_GOOGLE; - vi.mocked(mockConfig.getContentGeneratorConfig).mockReturnValue({ - authType, - }); - - // Initial state: Not in fallback mode - const isInFallbackModeSpy = vi.spyOn(mockConfig, 'isInFallbackMode'); - isInFallbackModeSpy.mockReturnValue(false); - - // Mock API calls: - // 1. Fails with 429 (simulating gemini-3 failure) - // 2. Succeeds (simulating fallback success) - vi.mocked(mockContentGenerator.generateContentStream) - .mockRejectedValueOnce(error429) - .mockResolvedValueOnce( - (async function* () { - yield { - candidates: [ - { - content: { parts: [{ text: 'Fallback success' }] }, - finishReason: 'STOP', - }, - ], - } as unknown as GenerateContentResponse; - })(), - ); - - // Mock handleFallback to enable fallback mode and signal retry - mockHandleFallback.mockImplementation(async () => { - isInFallbackModeSpy.mockReturnValue(true); // Next call will see fallback mode = true - return true; - }); - - // ACT - const stream = await chat.sendMessageStream( - { model: PREVIEW_GEMINI_MODEL_AUTO }, // Start with a gemini-3 model - 'test fallback thinking', - 'prompt-id-fb3', - new AbortController().signal, - ); - for await (const _ of stream) { - // consume stream - } - - // ASSERT - expect(mockContentGenerator.generateContentStream).toHaveBeenCalledTimes( - 2, - ); - - // First call: gemini-3 model, thinkingLevel set - expect( - mockContentGenerator.generateContentStream, - ).toHaveBeenNthCalledWith( - 1, - expect.objectContaining({ - model: PREVIEW_GEMINI_MODEL, - config: expect.objectContaining({ - thinkingConfig: { - thinkingBudget: undefined, - thinkingLevel: ThinkingLevel.HIGH, - }, - }), - }), - 'prompt-id-fb3', - ); - - // Second call: PREVIEW_GEMINI_FLASH_MODEL (due to fallback), thinkingLevel set - expect( - mockContentGenerator.generateContentStream, - ).toHaveBeenNthCalledWith( - 2, - expect.objectContaining({ - model: PREVIEW_GEMINI_FLASH_MODEL, - config: expect.objectContaining({ - thinkingConfig: { - thinkingBudget: undefined, - thinkingLevel: ThinkingLevel.HIGH, - }, - }), - }), - 'prompt-id-fb3', - ); - }); - it('should stop retrying if handleFallback returns false (e.g., auth intent)', async () => { vi.mocked(mockConfig.getModel).mockReturnValue('gemini-pro'); vi.mocked(mockContentGenerator.generateContentStream).mockRejectedValue( @@ -2203,92 +2028,6 @@ describe('GeminiChat', () => { }); }); - describe('Preview Model Fallback Logic', () => { - it('should reset previewModelBypassMode to false at the start of sendMessageStream', async () => { - const stream = (async function* () { - yield { - candidates: [ - { - content: { role: 'model', parts: [{ text: 'Success' }] }, - finishReason: 'STOP', - }, - ], - } as unknown as GenerateContentResponse; - })(); - vi.mocked(mockContentGenerator.generateContentStream).mockResolvedValue( - stream, - ); - - await chat.sendMessageStream( - { model: 'test-model' }, - 'test', - 'prompt-id-preview-model-reset', - new AbortController().signal, - ); - - expect(mockConfig.setPreviewModelBypassMode).toHaveBeenCalledWith(false); - }); - - it('should reset previewModelFallbackMode to false upon successful Preview Model usage', async () => { - const stream = (async function* () { - yield { - candidates: [ - { - content: { role: 'model', parts: [{ text: 'Success' }] }, - finishReason: 'STOP', - }, - ], - } as unknown as GenerateContentResponse; - })(); - vi.mocked(mockContentGenerator.generateContentStream).mockResolvedValue( - stream, - ); - - const resultStream = await chat.sendMessageStream( - { model: PREVIEW_GEMINI_MODEL }, - 'test', - 'prompt-id-preview-model-healing', - new AbortController().signal, - ); - for await (const _ of resultStream) { - // consume stream - } - - expect(mockConfig.setPreviewModelFallbackMode).toHaveBeenCalledWith( - false, - ); - }); - it('should NOT reset previewModelFallbackMode if Preview Model was bypassed (downgraded)', async () => { - const stream = (async function* () { - yield { - candidates: [ - { - content: { role: 'model', parts: [{ text: 'Success' }] }, - finishReason: 'STOP', - }, - ], - } as unknown as GenerateContentResponse; - })(); - vi.mocked(mockContentGenerator.generateContentStream).mockResolvedValue( - stream, - ); - // Simulate bypass mode being active (downgrade happened) - vi.mocked(mockConfig.isPreviewModelBypassMode).mockReturnValue(true); - - const resultStream = await chat.sendMessageStream( - { model: PREVIEW_GEMINI_MODEL }, - 'test', - 'prompt-id-bypass-no-healing', - new AbortController().signal, - ); - for await (const _ of resultStream) { - // consume stream - } - - expect(mockConfig.setPreviewModelFallbackMode).not.toHaveBeenCalled(); - }); - }); - describe('ensureActiveLoopHasThoughtSignatures', () => { it('should add thoughtSignature to the first functionCall in each model turn of the active loop', () => { const chat = new GeminiChat(mockConfig, '', [], []); diff --git a/packages/core/src/core/geminiChat.ts b/packages/core/src/core/geminiChat.ts index c1b0e17412..0f5f3633d1 100644 --- a/packages/core/src/core/geminiChat.ts +++ b/packages/core/src/core/geminiChat.ts @@ -22,10 +22,9 @@ import { createUserContent, FinishReason } from '@google/genai'; import { retryWithBackoff, isRetryableError } from '../utils/retry.js'; import type { Config } from '../config/config.js'; import { - DEFAULT_GEMINI_MODEL, DEFAULT_THINKING_MODE, PREVIEW_GEMINI_MODEL, - getEffectiveModel, + resolveModel, isGemini2Model, } from '../config/models.js'; import { hasCycleInSchema } from '../tools/tools.js'; @@ -441,11 +440,24 @@ export class GeminiChat { this.config, () => lastModelToUse, ); - const apiCall = async () => { - let modelToUse: string; + // Track initial active model to detect fallback changes + const initialActiveModel = this.config.getActiveModel(); + + const apiCall = async () => { + // Default to the last used model (which respects arguments/availability selection) + let modelToUse = resolveModel( + lastModelToUse, + this.config.getPreviewFeatures(), + ); + + // If the active model has changed (e.g. due to a fallback updating the config), + // we switch to the new active model. + if (this.config.getActiveModel() !== initialActiveModel) { + modelToUse = resolveModel( + this.config.getActiveModel(), + this.config.getPreviewFeatures(), + ); - if (this.config.isModelAvailabilityServiceEnabled()) { - modelToUse = this.config.getActiveModel(); if (modelToUse !== lastModelToUse) { const { generateContentConfig: newConfig } = this.config.modelConfigService.getResolvedConfig({ @@ -459,20 +471,6 @@ export class GeminiChat { currentGenerateContentConfig.abortSignal = abortSignal; } } - } else { - modelToUse = getEffectiveModel(model, this.config.isInFallbackMode()); - - // Preview Model Bypass Logic: - // If we are in "Preview Model Bypass Mode" (transient failure), we force downgrade to 2.5 Pro - // IF the effective model is currently Preview Model. - // Note: In availability mode, this should ideally be handled by policy, but preserving - // bypass logic for now as it handles specific transient behavior. - if ( - this.config.isPreviewModelBypassMode() && - modelToUse === PREVIEW_GEMINI_MODEL - ) { - modelToUse = DEFAULT_GEMINI_MODEL; - } } lastModelToUse = modelToUse; diff --git a/packages/core/src/core/geminiChat_network_retry.test.ts b/packages/core/src/core/geminiChat_network_retry.test.ts index 71085188dc..5b96c9d555 100644 --- a/packages/core/src/core/geminiChat_network_retry.test.ts +++ b/packages/core/src/core/geminiChat_network_retry.test.ts @@ -78,6 +78,8 @@ describe('GeminiChat Network Retries', () => { model: 'test-model', }), getModel: vi.fn().mockReturnValue('gemini-pro'), + getActiveModel: vi.fn().mockReturnValue('gemini-pro'), + setActiveModel: vi.fn(), isInFallbackMode: vi.fn().mockReturnValue(false), getQuotaErrorOccurred: vi.fn().mockReturnValue(false), getProjectRoot: vi.fn().mockReturnValue('/test/project/root'), diff --git a/packages/core/src/core/prompts.test.ts b/packages/core/src/core/prompts.test.ts index bb12eaaf31..f794a6c498 100644 --- a/packages/core/src/core/prompts.test.ts +++ b/packages/core/src/core/prompts.test.ts @@ -16,8 +16,8 @@ import { GEMINI_DIR } from '../utils/paths.js'; import { debugLogger } from '../utils/debugLogger.js'; import { DEFAULT_GEMINI_MODEL, - getEffectiveModel, PREVIEW_GEMINI_MODEL, + DEFAULT_GEMINI_MODEL_AUTO, } from '../config/models.js'; // Mock tool names if they are dynamically generated or complex @@ -43,10 +43,9 @@ vi.mock('../utils/gitUtils', () => ({ })); vi.mock('node:fs'); vi.mock('../config/models.js', async (importOriginal) => { - const actual = await importOriginal(); + const actual = await importOriginal(); return { - ...actual, - getEffectiveModel: vi.fn(), + ...(actual as object), }; }); @@ -66,15 +65,15 @@ describe('Core System Prompt (prompts.ts)', () => { }, isInteractive: vi.fn().mockReturnValue(true), isInteractiveShellEnabled: vi.fn().mockReturnValue(true), - getModel: vi.fn().mockReturnValue('auto'), + getModel: vi.fn().mockReturnValue(DEFAULT_GEMINI_MODEL_AUTO), + getActiveModel: vi.fn().mockReturnValue(DEFAULT_GEMINI_MODEL), getPreviewFeatures: vi.fn().mockReturnValue(false), isInFallbackMode: vi.fn().mockReturnValue(false), } as unknown as Config; - vi.mocked(getEffectiveModel).mockReturnValue(DEFAULT_GEMINI_MODEL); }); it('should use chatty system prompt for preview model', () => { - vi.mocked(getEffectiveModel).mockReturnValue(PREVIEW_GEMINI_MODEL); + vi.mocked(mockConfig.getActiveModel).mockReturnValue(PREVIEW_GEMINI_MODEL); const prompt = getCoreSystemPrompt(mockConfig); expect(prompt).toContain('You are an interactive CLI agent'); // Check for core content expect(prompt).not.toContain('No Chitchat:'); @@ -160,6 +159,7 @@ describe('Core System Prompt (prompts.ts)', () => { isInteractive: vi.fn().mockReturnValue(false), isInteractiveShellEnabled: vi.fn().mockReturnValue(false), getModel: vi.fn().mockReturnValue('auto'), + getActiveModel: vi.fn().mockReturnValue(DEFAULT_GEMINI_MODEL), getPreviewFeatures: vi.fn().mockReturnValue(false), isInFallbackMode: vi.fn().mockReturnValue(false), } as unknown as Config; diff --git a/packages/core/src/core/prompts.ts b/packages/core/src/core/prompts.ts index d0bf8808d2..9e433aa83a 100644 --- a/packages/core/src/core/prompts.ts +++ b/packages/core/src/core/prompts.ts @@ -24,7 +24,7 @@ import type { Config } from '../config/config.js'; import { GEMINI_DIR } from '../utils/paths.js'; import { debugLogger } from '../utils/debugLogger.js'; import { WriteTodosTool } from '../tools/write-todos.js'; -import { getEffectiveModel, PREVIEW_GEMINI_MODEL } from '../config/models.js'; +import { resolveModel, PREVIEW_GEMINI_MODEL } from '../config/models.js'; export function resolvePathFromEnv(envVar?: string): { isSwitch: boolean; @@ -105,9 +105,9 @@ export function getCoreSystemPrompt( } // TODO(joshualitt): Replace with system instructions on model configs. - const desiredModel = getEffectiveModel( - config.getModel(), - config.isInFallbackMode(), + const desiredModel = resolveModel( + config.getActiveModel(), + config.getPreviewFeatures(), ); const isGemini3 = desiredModel === PREVIEW_GEMINI_MODEL; @@ -157,7 +157,7 @@ export function getCoreSystemPrompt( ## Software Engineering Tasks When requested to perform tasks like fixing bugs, adding features, refactoring, or explaining code, follow this sequence: -1. **Understand:** Think about the user's request and the relevant codebase context. Use '${GREP_TOOL_NAME}' and '${GLOB_TOOL_NAME}' search tools extensively (in parallel if independent) to understand file structures, existing code patterns, and conventions. +1. **Understand:** Think about the user's request and the relevant codebase context. Use '${GREP_TOOL_NAME}' and '${GLOB_TOOL_NAME}' search tools extensively (in parallel if independent) to understand file structures, existing code patterns, and conventions. Use '${READ_FILE_TOOL_NAME}' to understand context and validate any assumptions you may have. If you need to read multiple files, you should make multiple parallel calls to '${READ_FILE_TOOL_NAME}'. 2. **Plan:** Build a coherent and grounded (based on the understanding in step 1) plan for how you intend to resolve the user's task. Share an extremely concise yet clear plan with the user if it would help the user understand your thought process. As part of the plan, you should use an iterative development process that includes writing unit tests to verify your changes. Use output logs or debug statements as part of this process to arrive at a solution.`, @@ -184,8 +184,7 @@ When requested to perform tasks like fixing bugs, adding features, refactoring, When requested to perform tasks like fixing bugs, adding features, refactoring, or explaining code, follow this sequence: 1. **Understand:** Think about the user's request and the relevant codebase context. Use '${GREP_TOOL_NAME}' and '${GLOB_TOOL_NAME}' search tools extensively (in parallel if independent) to understand file structures, existing code patterns, and conventions. Use '${READ_FILE_TOOL_NAME}' to understand context and validate any assumptions you may have. If you need to read multiple files, you should make multiple parallel calls to '${READ_FILE_TOOL_NAME}'. 2. **Plan:** Build a coherent and grounded (based on the understanding in step 1) plan for how you intend to resolve the user's task. For complex tasks, break them down into smaller, manageable subtasks and use the \`${WRITE_TODOS_TOOL_NAME}\` tool to track your progress. Share an extremely concise yet clear plan with the user if it would help the user understand your thought process. As part of the plan, you should use an iterative development process that includes writing unit tests to verify your changes. Use output logs or debug statements as part of this process to arrive at a solution.`, - primaryWorkflows_suffix: `3. **Implement:** Use the available tools (e.g., '${EDIT_TOOL_NAME}', '${WRITE_FILE_TOOL_NAME}' '${SHELL_TOOL_NAME}' ...) to act on the plan, strictly adhering to the project's established conventions (detailed under 'Core -Mandates'). + primaryWorkflows_suffix: `3. **Implement:** Use the available tools (e.g., '${EDIT_TOOL_NAME}', '${WRITE_FILE_TOOL_NAME}' '${SHELL_TOOL_NAME}' ...) to act on the plan, strictly adhering to the project's established conventions (detailed under 'Core Mandates'). 4. **Verify (Tests):** If applicable and feasible, verify the changes using the project's testing procedures. Identify the correct test commands and frameworks by examining 'README' files, build/package configuration (e.g., 'package.json'), or existing test execution patterns. NEVER assume standard test commands. 5. **Verify (Standards):** VERY IMPORTANT: After making code changes, execute the project-specific build, linting and type-checking commands (e.g., 'tsc', 'npm run lint', 'ruff check .') that you have identified for this project (or obtained from the user). This ensures code quality and adherence to standards.${interactiveMode ? " If unsure about these commands, you can ask the user if they'd like you to run them and if so how to." : ''} 6. **Finalize:** After all verification passes, consider the task complete. Do not remove or revert any changes or created files (like tests). Await the user's next instruction. @@ -412,7 +411,7 @@ The structure MUST be as follows: - Build Command: \`npm run build\` - Testing: Tests are run with \`npm test\`. Test files must end in \`.test.ts\`. - API Endpoint: The primary API endpoint is \`https://api.example.com/v2\`. - + --> diff --git a/packages/core/src/fallback/handler.test.ts b/packages/core/src/fallback/handler.test.ts index 29ba2d5b9e..2ffef97a05 100644 --- a/packages/core/src/fallback/handler.test.ts +++ b/packages/core/src/fallback/handler.test.ts @@ -22,11 +22,10 @@ import { AuthType } from '../core/contentGenerator.js'; import { DEFAULT_GEMINI_FLASH_MODEL, DEFAULT_GEMINI_MODEL, + PREVIEW_GEMINI_FLASH_MODEL, PREVIEW_GEMINI_MODEL, } from '../config/models.js'; -import { logFlashFallback } from '../telemetry/index.js'; import type { FallbackModelHandler } from './types.js'; -import { ModelNotFoundError } from '../utils/httpErrors.js'; import { openBrowserSecurely } from '../utils/secure-browser-launcher.js'; import { coreEvents } from '../utils/events.js'; import { debugLogger } from '../utils/debugLogger.js'; @@ -64,7 +63,7 @@ const createMockConfig = (overrides: Partial = {}): Config => ({ isInFallbackMode: vi.fn(() => false), setFallbackMode: vi.fn(), - isModelAvailabilityServiceEnabled: vi.fn(() => false), + isModelAvailabilityServiceEnabled: vi.fn(() => true), isPreviewModelFallbackMode: vi.fn(() => false), setPreviewModelFallbackMode: vi.fn(), isPreviewModelBypassMode: vi.fn(() => false), @@ -78,6 +77,7 @@ const createMockConfig = (overrides: Partial = {}): Config => skipped: [], }), ), + getActiveModel: vi.fn(() => MOCK_PRO_MODEL), getModel: vi.fn(() => MOCK_PRO_MODEL), getPreviewFeatures: vi.fn(() => false), getUserTier: vi.fn(() => undefined), @@ -113,430 +113,6 @@ describe('handleFallback', () => { fallbackEventSpy.mockRestore(); }); - it('should return null immediately if authType is not OAuth', async () => { - const result = await handleFallback( - mockConfig, - MOCK_PRO_MODEL, - AUTH_API_KEY, - ); - expect(result).toBeNull(); - expect(mockHandler).not.toHaveBeenCalled(); - expect(mockConfig.setFallbackMode).not.toHaveBeenCalled(); - }); - - it('should still consult the handler if the failed model is the fallback model', async () => { - mockHandler.mockResolvedValue('stop'); - const result = await handleFallback( - mockConfig, - FALLBACK_MODEL, // Failed model is Flash - AUTH_OAUTH, - ); - expect(result).toBe(false); - expect(mockHandler).toHaveBeenCalled(); - }); - - it('should return null if no fallbackHandler is injected in config', async () => { - const configWithoutHandler = createMockConfig({ - fallbackModelHandler: undefined, - }); - const result = await handleFallback( - configWithoutHandler, - MOCK_PRO_MODEL, - AUTH_OAUTH, - ); - expect(result).toBeNull(); - }); - - describe('when handler returns "retry_always"', () => { - it('should activate fallback mode, log telemetry, and return true', async () => { - mockHandler.mockResolvedValue('retry_always'); - - const result = await handleFallback( - mockConfig, - MOCK_PRO_MODEL, - AUTH_OAUTH, - ); - - expect(result).toBe(true); - expect(mockConfig.setFallbackMode).toHaveBeenCalledWith(true); - expect(logFlashFallback).toHaveBeenCalled(); - }); - }); - - describe('when handler returns "stop"', () => { - it('should activate fallback mode, log telemetry, and return false', async () => { - mockHandler.mockResolvedValue('stop'); - - const result = await handleFallback( - mockConfig, - MOCK_PRO_MODEL, - AUTH_OAUTH, - ); - - expect(result).toBe(false); - expect(mockConfig.setFallbackMode).toHaveBeenCalledWith(true); - expect(logFlashFallback).toHaveBeenCalled(); - }); - }); - - it('should return false without toggling fallback when handler returns "retry_later"', async () => { - mockHandler.mockResolvedValue('retry_later'); - - const result = await handleFallback(mockConfig, MOCK_PRO_MODEL, AUTH_OAUTH); - - expect(result).toBe(false); - expect(mockConfig.setFallbackMode).not.toHaveBeenCalled(); - expect(logFlashFallback).not.toHaveBeenCalled(); - expect(fallbackEventSpy).not.toHaveBeenCalled(); - }); - - it('should launch upgrade flow and avoid fallback mode when handler returns "upgrade"', async () => { - mockHandler.mockResolvedValue('upgrade'); - vi.mocked(openBrowserSecurely).mockResolvedValue(undefined); - - const result = await handleFallback(mockConfig, MOCK_PRO_MODEL, AUTH_OAUTH); - - expect(result).toBe(false); - expect(openBrowserSecurely).toHaveBeenCalledWith( - 'https://goo.gle/set-up-gemini-code-assist', - ); - expect(mockConfig.setFallbackMode).not.toHaveBeenCalled(); - expect(logFlashFallback).not.toHaveBeenCalled(); - expect(fallbackEventSpy).not.toHaveBeenCalled(); - }); - - it('should log a warning and continue when upgrade flow fails to open a browser', async () => { - mockHandler.mockResolvedValue('upgrade'); - const debugWarnSpy = vi.spyOn(debugLogger, 'warn'); - const consoleWarnSpy = vi - .spyOn(console, 'warn') - .mockImplementation(() => {}); - vi.mocked(openBrowserSecurely).mockRejectedValue(new Error('blocked')); - - const result = await handleFallback(mockConfig, MOCK_PRO_MODEL, AUTH_OAUTH); - - expect(result).toBe(false); - expect(debugWarnSpy).toHaveBeenCalledWith( - 'Failed to open browser automatically:', - 'blocked', - ); - expect(mockConfig.setFallbackMode).not.toHaveBeenCalled(); - expect(fallbackEventSpy).not.toHaveBeenCalled(); - debugWarnSpy.mockRestore(); - consoleWarnSpy.mockRestore(); - }); - - describe('when handler returns an unexpected value', () => { - it('should log an error and return null', async () => { - mockHandler.mockResolvedValue(null); - - const result = await handleFallback( - mockConfig, - MOCK_PRO_MODEL, - AUTH_OAUTH, - ); - - expect(result).toBeNull(); - expect(consoleErrorSpy).toHaveBeenCalledWith( - 'Fallback UI handler failed:', - new Error( - 'Unexpected fallback intent received from fallbackModelHandler: "null"', - ), - ); - expect(mockConfig.setFallbackMode).not.toHaveBeenCalled(); - }); - }); - - it('should pass the correct context (failedModel, fallbackModel, error) to the handler', async () => { - const mockError = new Error('Quota Exceeded'); - mockHandler.mockResolvedValue('retry_always'); - - await handleFallback(mockConfig, MOCK_PRO_MODEL, AUTH_OAUTH, mockError); - - expect(mockHandler).toHaveBeenCalledWith( - MOCK_PRO_MODEL, - FALLBACK_MODEL, - mockError, - ); - }); - - it('should not call setFallbackMode or log telemetry if already in fallback mode', async () => { - // Setup config where fallback mode is already active - const activeFallbackConfig = createMockConfig({ - fallbackModelHandler: mockHandler, - isInFallbackMode: vi.fn(() => true), // Already active - setFallbackMode: vi.fn(), - }); - - mockHandler.mockResolvedValue('retry_always'); - - const result = await handleFallback( - activeFallbackConfig, - MOCK_PRO_MODEL, - AUTH_OAUTH, - ); - - // Should still return true to allow the retry (which will use the active fallback mode) - expect(result).toBe(true); - // Should still consult the handler - expect(mockHandler).toHaveBeenCalled(); - // But should not mutate state or log telemetry again - expect(activeFallbackConfig.setFallbackMode).not.toHaveBeenCalled(); - expect(logFlashFallback).not.toHaveBeenCalled(); - }); - - it('should catch errors from the handler, log an error, and return null', async () => { - const handlerError = new Error('UI interaction failed'); - mockHandler.mockRejectedValue(handlerError); - - const result = await handleFallback(mockConfig, MOCK_PRO_MODEL, AUTH_OAUTH); - - expect(result).toBeNull(); - expect(consoleErrorSpy).toHaveBeenCalledWith( - 'Fallback UI handler failed:', - handlerError, - ); - expect(mockConfig.setFallbackMode).not.toHaveBeenCalled(); - }); - - describe('Preview Model Fallback Logic', () => { - const previewModel = PREVIEW_GEMINI_MODEL; - - it('should only set Preview Model bypass mode on retryable quota failure', async () => { - const mockGoogleApiError = { - code: 429, - message: 'mock error', - details: [], - }; - const retryableQuotaError = new RetryableQuotaError( - 'Capacity error', - mockGoogleApiError, - 5, - ); - await handleFallback( - mockConfig, - previewModel, - AUTH_OAUTH, - retryableQuotaError, - ); - expect(mockConfig.setPreviewModelBypassMode).toHaveBeenCalledWith(true); - }); - - it('should not set Preview Model bypass mode on non-retryable quota failure', async () => { - const mockGoogleApiError = { - code: 429, - message: 'mock error', - details: [], - }; - const terminalQuotaError = new TerminalQuotaError( - 'quota error', - mockGoogleApiError, - 5, - ); - await handleFallback( - mockConfig, - previewModel, - AUTH_OAUTH, - terminalQuotaError, - ); - - expect(mockConfig.setPreviewModelBypassMode).not.toHaveBeenCalled(); - }); - - it('should silently retry if Preview Model fallback mode is already active and error is retryable error', async () => { - vi.spyOn(mockConfig, 'isPreviewModelFallbackMode').mockReturnValue(true); - const mockGoogleApiError = { - code: 429, - message: 'mock error', - details: [], - }; - const retryableQuotaError = new RetryableQuotaError( - 'Capacity error', - mockGoogleApiError, - 5, - ); - const result = await handleFallback( - mockConfig, - previewModel, - AUTH_OAUTH, - retryableQuotaError, - ); - - expect(result).toBe(true); - expect(mockHandler).not.toHaveBeenCalled(); - }); - - it('should activate Preview Model fallback mode when handler returns "retry_always" and is RetryableQuotaError', async () => { - mockHandler.mockResolvedValue('retry_always'); - const mockGoogleApiError = { - code: 429, - message: 'mock error', - details: [], - }; - const retryableQuotaError = new RetryableQuotaError( - 'Capacity error', - mockGoogleApiError, - 5, - ); - const result = await handleFallback( - mockConfig, - previewModel, - AUTH_OAUTH, - retryableQuotaError, - ); - - expect(result).toBe(true); - expect(mockConfig.setPreviewModelBypassMode).toHaveBeenCalledWith(true); - expect(mockConfig.setPreviewModelFallbackMode).toHaveBeenCalledWith(true); - }); - - it('should activate regular fallback when handler returns "retry_always" and is TerminalQuotaError', async () => { - mockHandler.mockResolvedValue('retry_always'); - const mockGoogleApiError = { - code: 503, - message: 'mock error', - details: [], - }; - const terminalError = new TerminalQuotaError( - 'Quota error', - mockGoogleApiError, - 5, - ); - const result = await handleFallback( - mockConfig, - previewModel, - AUTH_OAUTH, - terminalError, - ); - - expect(result).toBe(true); - expect(mockConfig.setPreviewModelFallbackMode).not.toBeCalled(); - expect(mockConfig.setFallbackMode).toHaveBeenCalledWith(true); - }); - - it('should NOT set fallback mode if user chooses "retry_once"', async () => { - const mockGoogleApiError = { - code: 429, - message: 'mock error', - details: [], - }; - const terminalQuotaError = new TerminalQuotaError( - 'quota error', - mockGoogleApiError, - 5, - ); - mockHandler.mockResolvedValue('retry_once'); - - const result = await handleFallback( - mockConfig, - PREVIEW_GEMINI_MODEL, - AuthType.LOGIN_WITH_GOOGLE, - terminalQuotaError, - ); - - expect(result).toBe(true); - expect(mockConfig.setPreviewModelBypassMode).not.toHaveBeenCalled(); - expect(mockConfig.setPreviewModelFallbackMode).not.toHaveBeenCalled(); - expect(mockConfig.setFallbackMode).not.toHaveBeenCalled(); - }); - - it('should pass DEFAULT_GEMINI_MODEL as fallback when Preview Model fails with Retryable Error', async () => { - const mockFallbackHandler = vi.fn().mockResolvedValue('stop'); - vi.mocked(mockConfig.fallbackModelHandler!).mockImplementation( - mockFallbackHandler, - ); - const mockGoogleApiError = { - code: 429, - message: 'mock error', - details: [], - }; - const retryableQuotaError = new RetryableQuotaError( - 'Capacity error', - mockGoogleApiError, - 5, - ); - - await handleFallback( - mockConfig, - PREVIEW_GEMINI_MODEL, - AuthType.LOGIN_WITH_GOOGLE, - retryableQuotaError, - ); - - expect(mockConfig.fallbackModelHandler).toHaveBeenCalledWith( - PREVIEW_GEMINI_MODEL, - DEFAULT_GEMINI_MODEL, - retryableQuotaError, - ); - }); - - it('should pass DEFAULT_GEMINI_MODEL as fallback when Preview Model fails with other error', async () => { - await handleFallback( - mockConfig, - PREVIEW_GEMINI_MODEL, - AuthType.LOGIN_WITH_GOOGLE, - ); - - expect(mockConfig.fallbackModelHandler).toHaveBeenCalledWith( - PREVIEW_GEMINI_MODEL, - DEFAULT_GEMINI_MODEL, - undefined, - ); - }); - - it('should pass DEFAULT_GEMINI_FLASH_MODEL as fallback when Preview Model fails with other error', async () => { - const mockGoogleApiError = { - code: 429, - message: 'mock error', - details: [], - }; - const terminalQuotaError = new TerminalQuotaError( - 'quota error', - mockGoogleApiError, - 5, - ); - await handleFallback( - mockConfig, - PREVIEW_GEMINI_MODEL, - AuthType.LOGIN_WITH_GOOGLE, - terminalQuotaError, - ); - - expect(mockConfig.fallbackModelHandler).toHaveBeenCalledWith( - PREVIEW_GEMINI_MODEL, - DEFAULT_GEMINI_FLASH_MODEL, - terminalQuotaError, - ); - }); - }); - - it('should return null if ModelNotFoundError occurs for a non-preview model', async () => { - const modelNotFoundError = new ModelNotFoundError('Not found'); - const result = await handleFallback( - mockConfig, - DEFAULT_GEMINI_MODEL, // Not preview model - AUTH_OAUTH, - modelNotFoundError, - ); - expect(result).toBeNull(); - expect(mockHandler).not.toHaveBeenCalled(); - }); - - it('should consult handler if ModelNotFoundError occurs for preview model', async () => { - const modelNotFoundError = new ModelNotFoundError('Not found'); - mockHandler.mockResolvedValue('retry_always'); - - const result = await handleFallback( - mockConfig, - PREVIEW_GEMINI_MODEL, - AUTH_OAUTH, - modelNotFoundError, - ); - - expect(result).toBe(true); - expect(mockHandler).toHaveBeenCalled(); - }); - describe('policy-driven flow', () => { let policyConfig: Config; let availability: ModelAvailabilityService; @@ -550,27 +126,38 @@ describe('handleFallback', () => { }); policyHandler = vi.fn().mockResolvedValue('retry_once'); policyConfig = createMockConfig(); - vi.spyOn( - policyConfig, - 'isModelAvailabilityServiceEnabled', - ).mockReturnValue(true); - vi.spyOn(policyConfig, 'getModelAvailabilityService').mockReturnValue( + + // Ensure we test the availability path + vi.mocked(policyConfig.isModelAvailabilityServiceEnabled).mockReturnValue( + true, + ); + vi.mocked(policyConfig.getModelAvailabilityService).mockReturnValue( availability, ); - vi.spyOn(policyConfig, 'getFallbackModelHandler').mockReturnValue( + vi.mocked(policyConfig.getFallbackModelHandler).mockReturnValue( policyHandler, ); }); + it('should return null immediately if authType is not OAuth', async () => { + const result = await handleFallback( + policyConfig, + MOCK_PRO_MODEL, + AUTH_API_KEY, + ); + expect(result).toBeNull(); + expect(policyHandler).not.toHaveBeenCalled(); + }); + it('uses availability selection with correct candidates when enabled', async () => { - vi.spyOn(policyConfig, 'getPreviewFeatures').mockReturnValue(true); - vi.spyOn(policyConfig, 'getModel').mockReturnValue(DEFAULT_GEMINI_MODEL); + // Direct mock manipulation since it's already a vi.fn() + vi.mocked(policyConfig.getPreviewFeatures).mockReturnValue(true); + vi.mocked(policyConfig.getModel).mockReturnValue(DEFAULT_GEMINI_MODEL); await handleFallback(policyConfig, DEFAULT_GEMINI_MODEL, AUTH_OAUTH); expect(availability.selectFirstAvailable).toHaveBeenCalledWith([ DEFAULT_GEMINI_FLASH_MODEL, - PREVIEW_GEMINI_MODEL, ]); }); @@ -634,9 +221,9 @@ describe('handleFallback', () => { } }); - it('wraps around to upgrade candidates if the current model was selected mid-chain (e.g. by router)', async () => { + it('does not wrap around to upgrade candidates if the current model was selected at the end (e.g. by router)', async () => { // Last-resort failure (Flash) in [Preview, Pro, Flash] checks Preview then Pro (all upstream). - vi.spyOn(policyConfig, 'getPreviewFeatures').mockReturnValue(true); + vi.mocked(policyConfig.getPreviewFeatures).mockReturnValue(true); availability.selectFirstAvailable = vi.fn().mockReturnValue({ selectedModel: MOCK_PRO_MODEL, @@ -650,43 +237,25 @@ describe('handleFallback', () => { AUTH_OAUTH, ); - expect(availability.selectFirstAvailable).toHaveBeenCalledWith([ - PREVIEW_GEMINI_MODEL, - MOCK_PRO_MODEL, - ]); + expect(availability.selectFirstAvailable).not.toHaveBeenCalled(); expect(policyHandler).toHaveBeenCalledWith( DEFAULT_GEMINI_FLASH_MODEL, - MOCK_PRO_MODEL, + DEFAULT_GEMINI_FLASH_MODEL, undefined, ); }); - it('logs and returns null when handler resolves to null', async () => { - policyHandler.mockResolvedValue(null); - const debugLoggerErrorSpy = vi.spyOn(debugLogger, 'error'); - const result = await handleFallback( - policyConfig, - MOCK_PRO_MODEL, - AUTH_OAUTH, - ); - - expect(result).toBeNull(); - expect(debugLoggerErrorSpy).toHaveBeenCalledWith( - 'Fallback handler failed:', - new Error( - 'Unexpected fallback intent received from fallbackModelHandler: "null"', - ), - ); - debugLoggerErrorSpy.mockRestore(); - }); - it('successfully follows expected availability response for Preview Chain', async () => { - availability.selectFirstAvailable = vi - .fn() - .mockReturnValue({ selectedModel: DEFAULT_GEMINI_MODEL, skipped: [] }); + availability.selectFirstAvailable = vi.fn().mockReturnValue({ + selectedModel: PREVIEW_GEMINI_FLASH_MODEL, + skipped: [], + }); policyHandler.mockResolvedValue('retry_once'); - vi.spyOn(policyConfig, 'getPreviewFeatures').mockReturnValue(true); - vi.spyOn(policyConfig, 'getModel').mockReturnValue(PREVIEW_GEMINI_MODEL); + vi.mocked(policyConfig.getPreviewFeatures).mockReturnValue(true); + vi.mocked(policyConfig.getActiveModel).mockReturnValue( + PREVIEW_GEMINI_MODEL, + ); + vi.mocked(policyConfig.getModel).mockReturnValue(PREVIEW_GEMINI_MODEL); const result = await handleFallback( policyConfig, @@ -696,17 +265,99 @@ describe('handleFallback', () => { expect(result).toBe(true); expect(availability.selectFirstAvailable).toHaveBeenCalledWith([ - DEFAULT_GEMINI_MODEL, - DEFAULT_GEMINI_FLASH_MODEL, + PREVIEW_GEMINI_FLASH_MODEL, ]); - expect(policyHandler).toHaveBeenCalledWith( - PREVIEW_GEMINI_MODEL, - DEFAULT_GEMINI_MODEL, - undefined, + }); + + it('should launch upgrade flow and avoid fallback mode when handler returns "upgrade"', async () => { + policyHandler.mockResolvedValue('upgrade'); + vi.mocked(openBrowserSecurely).mockResolvedValue(undefined); + + const result = await handleFallback( + policyConfig, + MOCK_PRO_MODEL, + AUTH_OAUTH, + ); + + expect(result).toBe(false); + expect(openBrowserSecurely).toHaveBeenCalledWith( + 'https://goo.gle/set-up-gemini-code-assist', + ); + expect(policyConfig.setActiveModel).not.toHaveBeenCalled(); + }); + + it('should catch errors from the handler, log an error, and return null', async () => { + const handlerError = new Error('UI interaction failed'); + policyHandler.mockRejectedValue(handlerError); + + const result = await handleFallback( + policyConfig, + MOCK_PRO_MODEL, + AUTH_OAUTH, + ); + + expect(result).toBeNull(); + expect(debugLogger.error).toHaveBeenCalledWith( + 'Fallback handler failed:', + handlerError, ); }); - it('short-circuits when the failed model is the last-resort policy AND candidates are unavailable', async () => { + it('should pass TerminalQuotaError (429) correctly to the handler', async () => { + const mockGoogleApiError = { + code: 429, + message: 'mock error', + details: [], + }; + const terminalError = new TerminalQuotaError( + 'Quota error', + mockGoogleApiError, + 5, + ); + policyHandler.mockResolvedValue('retry_always'); + + await handleFallback( + policyConfig, + MOCK_PRO_MODEL, + AUTH_OAUTH, + terminalError, + ); + + expect(policyHandler).toHaveBeenCalledWith( + MOCK_PRO_MODEL, + DEFAULT_GEMINI_FLASH_MODEL, + terminalError, + ); + }); + + it('should pass RetryableQuotaError correctly to the handler', async () => { + const mockGoogleApiError = { + code: 503, + message: 'mock error', + details: [], + }; + const retryableError = new RetryableQuotaError( + 'Service unavailable', + mockGoogleApiError, + 1000, + ); + policyHandler.mockResolvedValue('retry_once'); + + await handleFallback( + policyConfig, + MOCK_PRO_MODEL, + AUTH_OAUTH, + retryableError, + ); + + expect(policyHandler).toHaveBeenCalledWith( + MOCK_PRO_MODEL, + DEFAULT_GEMINI_FLASH_MODEL, + retryableError, + ); + }); + + it('Call the handler with fallback model same as the failed model when the failed model is the last-resort policy', async () => { // Ensure short-circuit when wrapping to an unavailable upstream model. availability.selectFirstAvailable = vi .fn() @@ -718,10 +369,14 @@ describe('handleFallback', () => { AUTH_OAUTH, ); - expect(result).toBeNull(); - // Service called to check upstream; no UI handler since nothing selected. - expect(policyConfig.getModelAvailabilityService).toHaveBeenCalled(); - expect(policyConfig.getFallbackModelHandler).not.toHaveBeenCalled(); + policyHandler.mockResolvedValue('retry_once'); + + expect(result).not.toBeNull(); + expect(policyHandler).toHaveBeenCalledWith( + DEFAULT_GEMINI_FLASH_MODEL, + DEFAULT_GEMINI_FLASH_MODEL, + undefined, + ); }); it('calls setActiveModel and logs telemetry when handler returns "retry_always"', async () => { @@ -739,7 +394,7 @@ describe('handleFallback', () => { // TODO: add logging expect statement }); - it('calls setActiveModel when handler returns "stop"', async () => { + it('does NOT call setActiveModel when handler returns "stop"', async () => { policyHandler.mockResolvedValue('stop'); const result = await handleFallback( @@ -749,8 +404,21 @@ describe('handleFallback', () => { ); expect(result).toBe(false); - expect(policyConfig.setActiveModel).toHaveBeenCalledWith(FALLBACK_MODEL); + expect(policyConfig.setActiveModel).not.toHaveBeenCalled(); // TODO: add logging expect statement }); + + it('does NOT call setActiveModel when handler returns "retry_once"', async () => { + policyHandler.mockResolvedValue('retry_once'); + + const result = await handleFallback( + policyConfig, + MOCK_PRO_MODEL, + AUTH_OAUTH, + ); + + expect(result).toBe(true); + expect(policyConfig.setActiveModel).not.toHaveBeenCalled(); + }); }); }); diff --git a/packages/core/src/fallback/handler.ts b/packages/core/src/fallback/handler.ts index 93cab502fb..2d2bd55174 100644 --- a/packages/core/src/fallback/handler.ts +++ b/packages/core/src/fallback/handler.ts @@ -6,24 +6,16 @@ import type { Config } from '../config/config.js'; import { AuthType } from '../core/contentGenerator.js'; -import { - DEFAULT_GEMINI_FLASH_MODEL, - DEFAULT_GEMINI_MODEL, - PREVIEW_GEMINI_MODEL, -} from '../config/models.js'; -import { logFlashFallback, FlashFallbackEvent } from '../telemetry/index.js'; import { openBrowserSecurely } from '../utils/secure-browser-launcher.js'; import { debugLogger } from '../utils/debugLogger.js'; import { getErrorMessage } from '../utils/errors.js'; -import { ModelNotFoundError } from '../utils/httpErrors.js'; -import { TerminalQuotaError } from '../utils/googleQuotaErrors.js'; -import { coreEvents } from '../utils/events.js'; import type { FallbackIntent, FallbackRecommendation } from './types.js'; import { classifyFailureKind } from '../availability/errorClassification.js'; import { buildFallbackPolicyContext, resolvePolicyChain, resolvePolicyAction, + applyAvailabilityTransition, } from '../availability/policyHelpers.js'; const UPGRADE_URL_PAGE = 'https://goo.gle/set-up-gemini-code-assist'; @@ -34,75 +26,7 @@ export async function handleFallback( authType?: string, error?: unknown, ): Promise { - if (config.isModelAvailabilityServiceEnabled()) { - return handlePolicyDrivenFallback(config, failedModel, authType, error); - } - return legacyHandleFallback(config, failedModel, authType, error); -} - -/** - * Old fallback logic relying on hard coded strings - */ -async function legacyHandleFallback( - config: Config, - failedModel: string, - authType?: string, - error?: unknown, -): Promise { - if (authType !== AuthType.LOGIN_WITH_GOOGLE) return null; - - // Guardrail: If it's a ModelNotFoundError but NOT the preview model, do not handle it. - if ( - error instanceof ModelNotFoundError && - failedModel !== PREVIEW_GEMINI_MODEL - ) { - return null; - } - const shouldActivatePreviewFallback = - failedModel === PREVIEW_GEMINI_MODEL && - !(error instanceof TerminalQuotaError); - // Preview Model Specific Logic - if (shouldActivatePreviewFallback) { - // Always set bypass mode for the immediate retry, for non-TerminalQuotaErrors. - // This ensures the next attempt uses 2.5 Pro. - config.setPreviewModelBypassMode(true); - - // If we are already in Preview Model fallback mode (user previously said "Always"), - // we silently retry (which will use 2.5 Pro due to bypass mode). - if (config.isPreviewModelFallbackMode()) { - return true; - } - } - - const fallbackModel = shouldActivatePreviewFallback - ? DEFAULT_GEMINI_MODEL - : DEFAULT_GEMINI_FLASH_MODEL; - - // Consult UI Handler for Intent - const fallbackModelHandler = config.fallbackModelHandler; - if (typeof fallbackModelHandler !== 'function') return null; - - try { - // Pass the specific failed model to the UI handler. - const intent = await fallbackModelHandler( - failedModel, - fallbackModel, - error, - ); - - // Process Intent and Update State - return await processIntent( - config, - intent, - failedModel, - fallbackModel, - authType, - error, - ); - } catch (handlerError) { - console.error('Fallback UI handler failed:', handlerError); - return null; - } + return handlePolicyDrivenFallback(config, failedModel, authType, error); } /** @@ -125,50 +49,56 @@ async function handlePolicyDrivenFallback( ); const failureKind = classifyFailureKind(error); - - if (!candidates.length) { - return null; - } - const availability = config.getModelAvailabilityService(); - const selection = availability.selectFirstAvailable( - candidates.map((policy) => policy.model), - ); - - const lastResortPolicy = candidates.find((policy) => policy.isLastResort); - const fallbackModel = selection.selectedModel ?? lastResortPolicy?.model; - const selectedPolicy = candidates.find( - (policy) => policy.model === fallbackModel, - ); - - if (!fallbackModel || fallbackModel === failedModel || !selectedPolicy) { - return null; - } - - // failureKind is already declared and calculated above - const action = resolvePolicyAction(failureKind, selectedPolicy); - - if (action === 'silent') { - return processIntent( - config, - 'retry_always', - failedModel, - fallbackModel, - authType, - error, - ); - } - - // This will be used in the future when FallbackRecommendation is passed through UI - const recommendation: FallbackRecommendation = { - ...selection, - selectedModel: fallbackModel, - action, - failureKind, - failedPolicy, - selectedPolicy, + const getAvailabilityContext = () => { + if (!failedPolicy) return undefined; + return { service: availability, policy: failedPolicy }; }; - void recommendation; + + let fallbackModel: string; + if (!candidates.length) { + fallbackModel = failedModel; + } else { + const selection = availability.selectFirstAvailable( + candidates.map((policy) => policy.model), + ); + + const lastResortPolicy = candidates.find((policy) => policy.isLastResort); + const selectedFallbackModel = + selection.selectedModel ?? lastResortPolicy?.model; + const selectedPolicy = candidates.find( + (policy) => policy.model === selectedFallbackModel, + ); + + if ( + !selectedFallbackModel || + selectedFallbackModel === failedModel || + !selectedPolicy + ) { + return null; + } + + fallbackModel = selectedFallbackModel; + + // failureKind is already declared and calculated above + const action = resolvePolicyAction(failureKind, selectedPolicy); + + if (action === 'silent') { + applyAvailabilityTransition(getAvailabilityContext, failureKind); + return processIntent(config, 'retry_always', fallbackModel); + } + + // This will be used in the future when FallbackRecommendation is passed through UI + const recommendation: FallbackRecommendation = { + ...selection, + selectedModel: fallbackModel, + action, + failureKind, + failedPolicy, + selectedPolicy, + }; + void recommendation; + } const handler = config.getFallbackModelHandler(); if (typeof handler !== 'function') { @@ -177,14 +107,16 @@ async function handlePolicyDrivenFallback( try { const intent = await handler(failedModel, fallbackModel, error); - return await processIntent( - config, - intent, - failedModel, - fallbackModel, - authType, - error, // Pass the error so processIntent can handle preview-specific logic - ); + + // If the user chose to switch/retry, we apply the availability transition + // to the failed model (e.g. marking it terminal if it had a quota error). + // We DO NOT apply it if the user chose 'stop' or 'retry_later', allowing + // them to try again later with the same model state. + if (intent === 'retry_always' || intent === 'retry_once') { + applyAvailabilityTransition(getAvailabilityContext, failureKind); + } + + return await processIntent(config, intent, fallbackModel); } catch (handlerError) { debugLogger.error('Fallback handler failed:', handlerError); return null; @@ -205,47 +137,23 @@ async function handleUpgrade() { async function processIntent( config: Config, intent: FallbackIntent | null, - failedModel: string, fallbackModel: string, - authType?: string, - error?: unknown, ): Promise { - const isAvailabilityEnabled = config.isModelAvailabilityServiceEnabled(); - switch (intent) { case 'retry_always': - if (isAvailabilityEnabled) { - // TODO(telemetry): Implement generic fallback event logging. Existing - // logFlashFallback is specific to a single Model. - config.setActiveModel(fallbackModel); - } else { - // If the error is non-retryable, e.g. TerminalQuota Error, trigger a regular fallback to flash. - // For all other errors, activate previewModel fallback. - if ( - failedModel === PREVIEW_GEMINI_MODEL && - !(error instanceof TerminalQuotaError) - ) { - activatePreviewModelFallbackMode(config); - } else { - activateFallbackMode(config, authType); - } - } + // TODO(telemetry): Implement generic fallback event logging. Existing + // logFlashFallback is specific to a single Model. + config.setActiveModel(fallbackModel); return true; case 'retry_once': - if (isAvailabilityEnabled) { - config.setActiveModel(fallbackModel); - } + // For distinct retry (retry_once), we do NOT set the active model permanently. + // The FallbackStrategy will handle routing to the available model for this turn + // based on the availability service state (which is updated before this). return true; case 'stop': - if (isAvailabilityEnabled) { - // TODO(telemetry): Implement generic fallback event logging. Existing - // logFlashFallback is specific to a single Model. - config.setActiveModel(fallbackModel); - } else { - activateFallbackMode(config, authType); - } + // Do not switch model on stop. User wants to stay on current model (and stop). return false; case 'retry_later': @@ -261,20 +169,3 @@ async function processIntent( ); } } - -function activateFallbackMode(config: Config, authType: string | undefined) { - if (!config.isInFallbackMode()) { - config.setFallbackMode(true); - coreEvents.emitFallbackModeChanged(true); - if (authType) { - logFlashFallback(config, new FlashFallbackEvent(authType)); - } - } -} - -function activatePreviewModelFallbackMode(config: Config) { - if (!config.isPreviewModelFallbackMode()) { - config.setPreviewModelFallbackMode(true); - // We might want a specific event for Preview Model fallback, but for now we just set the mode. - } -} diff --git a/packages/core/src/routing/modelRouterService.test.ts b/packages/core/src/routing/modelRouterService.test.ts index da1b659d7c..f6b9df8a23 100644 --- a/packages/core/src/routing/modelRouterService.test.ts +++ b/packages/core/src/routing/modelRouterService.test.ts @@ -7,10 +7,7 @@ import { describe, it, expect, vi, beforeEach } from 'vitest'; import { ModelRouterService } from './modelRouterService.js'; import { Config } from '../config/config.js'; -import { - PREVIEW_GEMINI_MODEL, - DEFAULT_GEMINI_MODEL, -} from '../config/models.js'; + import type { BaseLlmClient } from '../core/baseLlmClient.js'; import type { RoutingContext, RoutingDecision } from './routingStrategy.js'; import { DefaultStrategy } from './strategies/defaultStrategy.js'; @@ -151,81 +148,5 @@ describe('ModelRouterService', () => { expect.any(ModelRoutingEvent), ); }); - - it('should upgrade to Preview Model when preview features are enabled and model is 2.5 Pro', async () => { - vi.spyOn(mockCompositeStrategy, 'route').mockResolvedValue({ - model: DEFAULT_GEMINI_MODEL, - metadata: { source: 'test', latencyMs: 0, reasoning: 'test' }, - }); - vi.spyOn(mockConfig, 'getPreviewFeatures').mockReturnValue(true); - vi.spyOn(mockConfig, 'isPreviewModelFallbackMode').mockReturnValue(false); - - const decision = await service.route(mockContext); - - expect(decision.model).toBe(PREVIEW_GEMINI_MODEL); - }); - - it('should NOT upgrade to Preview Model when preview features are disabled', async () => { - vi.spyOn(mockCompositeStrategy, 'route').mockResolvedValue({ - model: DEFAULT_GEMINI_MODEL, - metadata: { source: 'test', latencyMs: 0, reasoning: 'test' }, - }); - vi.spyOn(mockConfig, 'getPreviewFeatures').mockReturnValue(false); - - const decision = await service.route(mockContext); - - expect(decision.model).toBe(DEFAULT_GEMINI_MODEL); - }); - - it('should upgrade to Preview Model when preview features are enabled and model is explicitly set to Pro', async () => { - // Simulate OverrideStrategy returning Preview Model (as resolveModel would do for "pro") - vi.spyOn(mockCompositeStrategy, 'route').mockResolvedValue({ - model: PREVIEW_GEMINI_MODEL, - metadata: { - source: 'override', - latencyMs: 0, - reasoning: 'User selected', - }, - }); - vi.spyOn(mockConfig, 'getPreviewFeatures').mockReturnValue(true); - vi.spyOn(mockConfig, 'isPreviewModelFallbackMode').mockReturnValue(false); - - const decision = await service.route(mockContext); - - expect(decision.model).toBe(PREVIEW_GEMINI_MODEL); - }); - - it('should NOT upgrade to Preview Model when preview features are enabled and model is explicitly set to a specific string', async () => { - // Simulate OverrideStrategy returning a specific model (e.g. "gemini-2.5-pro") - // This happens when user explicitly sets model to "gemini-2.5-pro" instead of "pro" - vi.spyOn(mockCompositeStrategy, 'route').mockResolvedValue({ - model: DEFAULT_GEMINI_MODEL, - metadata: { - source: 'override', - latencyMs: 0, - reasoning: 'User selected', - }, - }); - vi.spyOn(mockConfig, 'getPreviewFeatures').mockReturnValue(true); - vi.spyOn(mockConfig, 'isPreviewModelFallbackMode').mockReturnValue(false); - - const decision = await service.route(mockContext); - - // Should NOT upgrade to Preview Model because source is 'override' and model is specific - expect(decision.model).toBe(DEFAULT_GEMINI_MODEL); - }); - - it('should upgrade to Preview Model even if fallback mode is active (probing behavior)', async () => { - vi.spyOn(mockCompositeStrategy, 'route').mockResolvedValue({ - model: DEFAULT_GEMINI_MODEL, - metadata: { source: 'default', latencyMs: 0, reasoning: 'Default' }, - }); - vi.spyOn(mockConfig, 'getPreviewFeatures').mockReturnValue(true); - vi.spyOn(mockConfig, 'isPreviewModelFallbackMode').mockReturnValue(true); - - const decision = await service.route(mockContext); - - expect(decision.model).toBe(PREVIEW_GEMINI_MODEL); - }); }); }); diff --git a/packages/core/src/routing/modelRouterService.ts b/packages/core/src/routing/modelRouterService.ts index ae82517831..3898ff4100 100644 --- a/packages/core/src/routing/modelRouterService.ts +++ b/packages/core/src/routing/modelRouterService.ts @@ -5,10 +5,6 @@ */ import type { Config } from '../config/config.js'; -import { - PREVIEW_GEMINI_MODEL, - DEFAULT_GEMINI_MODEL, -} from '../config/models.js'; import type { RoutingContext, RoutingDecision, @@ -66,23 +62,6 @@ export class ModelRouterService { this.config.getBaseLlmClient(), ); - // Unified Preview Model Logic: - // If the decision is to use 'gemini-2.5-pro' and preview features are enabled, - // we attempt to upgrade to 'gemini-3.0-pro' (Preview Model). - if ( - decision.model === DEFAULT_GEMINI_MODEL && - this.config.getPreviewFeatures() && - !decision.metadata.source.includes('override') - ) { - // We ALWAYS attempt to upgrade to Preview Model here. - // If we are in fallback mode, the 'previewModelBypassMode' flag (handled in handler.ts/geminiChat.ts) - // will ensure we downgrade to 2.5 Pro for the actual API call if needed. - // This allows us to "probe" Preview Model periodically (i.e., every new request tries Preview Model first). - decision.model = PREVIEW_GEMINI_MODEL; - decision.metadata.source += ' (Preview Model)'; - decision.metadata.reasoning += ' (Upgraded to Preview Model)'; - } - const event = new ModelRoutingEvent( decision.model, decision.metadata.source, diff --git a/packages/core/src/routing/strategies/classifierStrategy.ts b/packages/core/src/routing/strategies/classifierStrategy.ts index 2c52ee644e..4747bc5352 100644 --- a/packages/core/src/routing/strategies/classifierStrategy.ts +++ b/packages/core/src/routing/strategies/classifierStrategy.ts @@ -12,7 +12,7 @@ import type { RoutingDecision, RoutingStrategy, } from '../routingStrategy.js'; -import { getEffectiveModel } from '../../config/models.js'; +import { resolveClassifierModel } from '../../config/models.js'; import { createUserContent, Type } from '@google/genai'; import type { Config } from '../../config/config.js'; import { @@ -167,26 +167,20 @@ export class ClassifierStrategy implements RoutingStrategy { const reasoning = routerResponse.reasoning; const latencyMs = Date.now() - startTime; + const selectedModel = resolveClassifierModel( + config.getModel(), + routerResponse.model_choice, + config.getPreviewFeatures(), + ); - if (routerResponse.model_choice === FLASH_MODEL) { - return { - model: getEffectiveModel(config.getModel(), true), - metadata: { - source: 'Classifier', - latencyMs, - reasoning, - }, - }; - } else { - return { - model: getEffectiveModel(config.getModel(), false), - metadata: { - source: 'Classifier', - reasoning, - latencyMs, - }, - }; - } + return { + model: selectedModel, + metadata: { + source: 'Classifier', + latencyMs, + reasoning, + }, + }; } catch (error) { // If the classifier fails for any reason (API error, parsing error, etc.), // we log it and return null to allow the composite strategy to proceed. diff --git a/packages/core/src/routing/strategies/fallbackStrategy.test.ts b/packages/core/src/routing/strategies/fallbackStrategy.test.ts index 0913a84fa3..c9a0aa2179 100644 --- a/packages/core/src/routing/strategies/fallbackStrategy.test.ts +++ b/packages/core/src/routing/strategies/fallbackStrategy.test.ts @@ -4,125 +4,118 @@ * SPDX-License-Identifier: Apache-2.0 */ -import { describe, it, expect } from 'vitest'; +import { describe, it, expect, vi, beforeEach } from 'vitest'; import { FallbackStrategy } from './fallbackStrategy.js'; import type { RoutingContext } from '../routingStrategy.js'; import type { BaseLlmClient } from '../../core/baseLlmClient.js'; import type { Config } from '../../config/config.js'; +import type { ModelAvailabilityService } from '../../availability/modelAvailabilityService.js'; import { DEFAULT_GEMINI_MODEL, DEFAULT_GEMINI_FLASH_MODEL, - DEFAULT_GEMINI_FLASH_LITE_MODEL, DEFAULT_GEMINI_MODEL_AUTO, - PREVIEW_GEMINI_MODEL_AUTO, - PREVIEW_GEMINI_FLASH_MODEL, } from '../../config/models.js'; +import { selectModelForAvailability } from '../../availability/policyHelpers.js'; + +vi.mock('../../availability/policyHelpers.js', () => ({ + selectModelForAvailability: vi.fn(), +})); + +const createMockConfig = (overrides: Partial = {}): Config => + ({ + isModelAvailabilityServiceEnabled: vi.fn().mockReturnValue(true), + getModelAvailabilityService: vi.fn(), + getModel: vi.fn().mockReturnValue(DEFAULT_GEMINI_MODEL), + getPreviewFeatures: vi.fn().mockReturnValue(false), + ...overrides, + }) as unknown as Config; describe('FallbackStrategy', () => { const strategy = new FallbackStrategy(); const mockContext = {} as RoutingContext; const mockClient = {} as BaseLlmClient; + let mockService: ModelAvailabilityService; + let mockConfig: Config; - it('should return null when not in fallback mode', async () => { - const mockConfig = { - isInFallbackMode: () => false, - getModel: () => DEFAULT_GEMINI_MODEL, - getPreviewFeatures: () => false, - } as Config; + beforeEach(() => { + vi.resetAllMocks(); + + mockService = { + snapshot: vi.fn(), + } as unknown as ModelAvailabilityService; + + mockConfig = createMockConfig({ + getModelAvailabilityService: vi.fn().mockReturnValue(mockService), + }); + }); + + it('should return null if service is disabled', async () => { + vi.mocked(mockConfig.isModelAvailabilityServiceEnabled).mockReturnValue( + false, + ); const decision = await strategy.route(mockContext, mockConfig, mockClient); expect(decision).toBeNull(); }); - describe('when in fallback mode', () => { - it('should downgrade a default auto model to the flash model', async () => { - const mockConfig = { - isInFallbackMode: () => true, - getModel: () => DEFAULT_GEMINI_MODEL_AUTO, - } as Config; + it('should return null if the requested model is available', async () => { + // Mock snapshot to return available + vi.mocked(mockService.snapshot).mockReturnValue({ available: true }); - const decision = await strategy.route( - mockContext, - mockConfig, - mockClient, - ); + const decision = await strategy.route(mockContext, mockConfig, mockClient); + expect(decision).toBeNull(); + // Should check availability of the resolved model (DEFAULT_GEMINI_MODEL) + expect(mockService.snapshot).toHaveBeenCalledWith(DEFAULT_GEMINI_MODEL); + }); - expect(decision).not.toBeNull(); - expect(decision?.model).toBe(DEFAULT_GEMINI_FLASH_MODEL); - expect(decision?.metadata.source).toBe('fallback'); - expect(decision?.metadata.reasoning).toContain('In fallback mode'); + it('should return null if fallback selection is same as requested model', async () => { + // Mock snapshot to return unavailable + vi.mocked(mockService.snapshot).mockReturnValue({ + available: false, + reason: 'quota', + }); + // Mock selectModelForAvailability to return the SAME model (no fallback found) + vi.mocked(selectModelForAvailability).mockReturnValue({ + selectedModel: DEFAULT_GEMINI_MODEL, + skipped: [], }); - it('should downgrade a preview auto model to the preview flash model', async () => { - const mockConfig = { - isInFallbackMode: () => true, - getModel: () => PREVIEW_GEMINI_MODEL_AUTO, - } as Config; + const decision = await strategy.route(mockContext, mockConfig, mockClient); + expect(decision).toBeNull(); + }); - const decision = await strategy.route( - mockContext, - mockConfig, - mockClient, - ); - - expect(decision).not.toBeNull(); - expect(decision?.model).toBe(PREVIEW_GEMINI_FLASH_MODEL); - expect(decision?.metadata.source).toBe('fallback'); - expect(decision?.metadata.reasoning).toContain('In fallback mode'); + it('should return fallback decision if model is unavailable and fallback found', async () => { + // Mock snapshot to return unavailable + vi.mocked(mockService.snapshot).mockReturnValue({ + available: false, + reason: 'quota', }); - it('should not downgrade a pro model to the flash model', async () => { - const mockConfig = { - isInFallbackMode: () => true, - getModel: () => DEFAULT_GEMINI_MODEL, - } as Config; - - const decision = await strategy.route( - mockContext, - mockConfig, - mockClient, - ); - - expect(decision).not.toBeNull(); - expect(decision?.model).toBe(DEFAULT_GEMINI_MODEL); - expect(decision?.metadata.source).toBe('fallback'); - expect(decision?.metadata.reasoning).toContain('In fallback mode'); + // Mock selectModelForAvailability to find a fallback (Flash) + vi.mocked(selectModelForAvailability).mockReturnValue({ + selectedModel: DEFAULT_GEMINI_FLASH_MODEL, + skipped: [{ model: DEFAULT_GEMINI_MODEL, reason: 'quota' }], }); - it('should honor a lite model request', async () => { - const mockConfig = { - isInFallbackMode: () => true, - getModel: () => DEFAULT_GEMINI_FLASH_LITE_MODEL, - getPreviewFeatures: () => false, - } as Config; + const decision = await strategy.route(mockContext, mockConfig, mockClient); - const decision = await strategy.route( - mockContext, - mockConfig, - mockClient, - ); + expect(decision).not.toBeNull(); + expect(decision?.model).toBe(DEFAULT_GEMINI_FLASH_MODEL); + expect(decision?.metadata.source).toBe('fallback'); + expect(decision?.metadata.reasoning).toContain( + `Model ${DEFAULT_GEMINI_MODEL} is unavailable`, + ); + }); - expect(decision).not.toBeNull(); - expect(decision?.model).toBe(DEFAULT_GEMINI_FLASH_LITE_MODEL); - expect(decision?.metadata.source).toBe('fallback'); - }); + it('should correctly handle "auto" alias by resolving it before checking availability', async () => { + // Mock snapshot to return available for the RESOLVED model + vi.mocked(mockService.snapshot).mockReturnValue({ available: true }); + vi.mocked(mockConfig.getModel).mockReturnValue(DEFAULT_GEMINI_MODEL_AUTO); - it('should use the flash model if flash is requested', async () => { - const mockConfig = { - isInFallbackMode: () => true, - getModel: () => DEFAULT_GEMINI_FLASH_MODEL, - getPreviewFeatures: () => false, - } as Config; + const decision = await strategy.route(mockContext, mockConfig, mockClient); - const decision = await strategy.route( - mockContext, - mockConfig, - mockClient, - ); - - expect(decision).not.toBeNull(); - expect(decision?.model).toBe(DEFAULT_GEMINI_FLASH_MODEL); - expect(decision?.metadata.source).toBe('fallback'); - }); + expect(decision).toBeNull(); + // Important: check that it queried snapshot with the RESOLVED model, not 'auto' + expect(mockService.snapshot).toHaveBeenCalledWith(DEFAULT_GEMINI_MODEL); }); }); diff --git a/packages/core/src/routing/strategies/fallbackStrategy.ts b/packages/core/src/routing/strategies/fallbackStrategy.ts index 130a279eb4..1f89b2caa3 100644 --- a/packages/core/src/routing/strategies/fallbackStrategy.ts +++ b/packages/core/src/routing/strategies/fallbackStrategy.ts @@ -4,8 +4,9 @@ * SPDX-License-Identifier: Apache-2.0 */ +import { selectModelForAvailability } from '../../availability/policyHelpers.js'; import type { Config } from '../../config/config.js'; -import { getEffectiveModel } from '../../config/models.js'; +import { resolveModel } from '../../config/models.js'; import type { BaseLlmClient } from '../../core/baseLlmClient.js'; import type { RoutingContext, @@ -21,23 +22,38 @@ export class FallbackStrategy implements RoutingStrategy { config: Config, _baseLlmClient: BaseLlmClient, ): Promise { - const isInFallbackMode: boolean = config.isInFallbackMode(); - - if (!isInFallbackMode) { + if (!config.isModelAvailabilityServiceEnabled()) { return null; } - const effectiveModel = getEffectiveModel( - config.getModel(), - isInFallbackMode, + const requestedModel = config.getModel(); + const resolvedModel = resolveModel( + requestedModel, + config.getPreviewFeatures(), ); - return { - model: effectiveModel, - metadata: { - source: this.name, - latencyMs: 0, - reasoning: `In fallback mode. Using: ${effectiveModel}`, - }, - }; + const service = config.getModelAvailabilityService(); + const snapshot = service.snapshot(resolvedModel); + + if (snapshot.available) { + return null; + } + + const selection = selectModelForAvailability(config, requestedModel); + + if ( + selection?.selectedModel && + selection.selectedModel !== requestedModel + ) { + return { + model: selection.selectedModel, + metadata: { + source: this.name, + latencyMs: 0, + reasoning: `Model ${requestedModel} is unavailable (${snapshot.reason}). Using fallback: ${selection.selectedModel}`, + }, + }; + } + + return null; } } diff --git a/packages/core/src/routing/strategies/overrideStrategy.ts b/packages/core/src/routing/strategies/overrideStrategy.ts index bf7b7914ab..f16f8ca625 100644 --- a/packages/core/src/routing/strategies/overrideStrategy.ts +++ b/packages/core/src/routing/strategies/overrideStrategy.ts @@ -39,7 +39,7 @@ export class OverrideStrategy implements RoutingStrategy { // Return the overridden model name. return { - model: getEffectiveModel(overrideModel, false), + model: getEffectiveModel(overrideModel, config.getPreviewFeatures()), metadata: { source: this.name, latencyMs: 0, diff --git a/packages/core/src/utils/flashFallback.test.ts b/packages/core/src/utils/flashFallback.test.ts index 522f5230f1..ec95de94ef 100644 --- a/packages/core/src/utils/flashFallback.test.ts +++ b/packages/core/src/utils/flashFallback.test.ts @@ -124,7 +124,7 @@ describe('Retry Utility Fallback Integration', () => { }); await expect(promise).rejects.toThrow('Daily limit'); - expect(fallbackCallback).not.toHaveBeenCalled(); + expect(fallbackCallback).toHaveBeenCalledTimes(1); expect(mockApiCall).toHaveBeenCalledTimes(1); }); diff --git a/packages/core/src/utils/retry.test.ts b/packages/core/src/utils/retry.test.ts index 1b940ed38b..b4edf6a9ce 100644 --- a/packages/core/src/utils/retry.test.ts +++ b/packages/core/src/utils/retry.test.ts @@ -464,7 +464,7 @@ describe('retryWithBackoff', () => { }); it.each([[AuthType.USE_GEMINI], [AuthType.USE_VERTEX_AI], [undefined]])( - 'should not trigger fallback for non-Google auth users (authType: %s) on TerminalQuotaError', + 'should invoke onPersistent429 callback (delegating decision) for non-Google auth users (authType: %s) on TerminalQuotaError', async (authType) => { const fallbackCallback = vi.fn(); const mockFn = vi.fn().mockImplementation(async () => { @@ -478,7 +478,7 @@ describe('retryWithBackoff', () => { }); await expect(promise).rejects.toThrow('Daily limit reached'); - expect(fallbackCallback).not.toHaveBeenCalled(); + expect(fallbackCallback).toHaveBeenCalled(); expect(mockFn).toHaveBeenCalledTimes(1); }, ); @@ -629,20 +629,10 @@ describe('retryWithBackoff', () => { ).rejects.toThrow(TerminalQuotaError); // Verify failures - expect(mockService.markTerminal).toHaveBeenCalledWith('model-1', 'quota'); - expect(mockService.markTerminal).toHaveBeenCalledWith('model-2', 'quota'); + expect(mockService.markTerminal).not.toHaveBeenCalled(); + expect(mockService.markTerminal).not.toHaveBeenCalled(); // Verify sequences - expect(mockService.markTerminal).toHaveBeenNthCalledWith( - 1, - 'model-1', - 'quota', - ); - expect(mockService.markTerminal).toHaveBeenNthCalledWith( - 2, - 'model-2', - 'quota', - ); }); it('marks sticky_retry after retries are exhausted for transient failures', async () => { @@ -671,8 +661,8 @@ describe('retryWithBackoff', () => { expect(result).toBe(transientError); expect(fn).toHaveBeenCalledTimes(3); - expect(mockService.markRetryOncePerTurn).toHaveBeenCalledWith('model-1'); - expect(mockService.markRetryOncePerTurn).toHaveBeenCalledTimes(1); + expect(mockService.markRetryOncePerTurn).not.toHaveBeenCalled(); + expect(mockService.markRetryOncePerTurn).not.toHaveBeenCalled(); expect(mockService.markTerminal).not.toHaveBeenCalled(); }); @@ -710,29 +700,7 @@ describe('retryWithBackoff', () => { maxAttempts: 1, getAvailabilityContext: getContext, }).catch(() => {}); - expect(mockService.markTerminal).toHaveBeenCalledWith('model-1', 'quota'); - - // Run for notFoundError - await retryWithBackoff(fn, { - maxAttempts: 1, - getAvailabilityContext: getContext, - }).catch(() => {}); - expect(mockService.markTerminal).toHaveBeenCalledWith( - 'model-1', - 'capacity', - ); - - // Run for genericError - await retryWithBackoff(fn, { - maxAttempts: 1, - getAvailabilityContext: getContext, - }).catch(() => {}); - expect(mockService.markTerminal).toHaveBeenCalledWith( - 'model-1', - 'capacity', - ); - - expect(mockService.markTerminal).toHaveBeenCalledTimes(3); + expect(mockService.markTerminal).not.toHaveBeenCalled(); }); }); }); diff --git a/packages/core/src/utils/retry.ts b/packages/core/src/utils/retry.ts index b9224fe304..fd91cbd2ff 100644 --- a/packages/core/src/utils/retry.ts +++ b/packages/core/src/utils/retry.ts @@ -6,7 +6,6 @@ import type { GenerateContentResponse } from '@google/genai'; import { ApiError } from '@google/genai'; -import { AuthType } from '../core/contentGenerator.js'; import { TerminalQuotaError, RetryableQuotaError, @@ -16,8 +15,6 @@ import { delay, createAbortError } from './delay.js'; import { debugLogger } from './debugLogger.js'; import { getErrorStatus, ModelNotFoundError } from './httpErrors.js'; import type { RetryAvailabilityContext } from '../availability/modelPolicy.js'; -import { classifyFailureKind } from '../availability/errorClassification.js'; -import { applyAvailabilityTransition } from '../availability/policyHelpers.js'; export type { RetryAvailabilityContext }; @@ -192,12 +189,6 @@ export async function retryWithBackoff( } const classifiedError = classifyGoogleError(error); - const failureKind = classifyFailureKind(classifiedError); - const appliedImmediate = - failureKind === 'terminal' || failureKind === 'not_found'; - if (appliedImmediate) { - applyAvailabilityTransition(getAvailabilityContext, failureKind); - } const errorCode = getErrorStatus(error); @@ -205,7 +196,7 @@ export async function retryWithBackoff( classifiedError instanceof TerminalQuotaError || classifiedError instanceof ModelNotFoundError ) { - if (onPersistent429 && authType === AuthType.LOGIN_WITH_GOOGLE) { + if (onPersistent429) { try { const fallbackModel = await onPersistent429( authType, @@ -229,7 +220,7 @@ export async function retryWithBackoff( if (classifiedError instanceof RetryableQuotaError || is500) { if (attempt >= maxAttempts) { - if (onPersistent429 && authType === AuthType.LOGIN_WITH_GOOGLE) { + if (onPersistent429) { try { const fallbackModel = await onPersistent429( authType, @@ -244,9 +235,6 @@ export async function retryWithBackoff( console.warn('Model fallback failed:', fallbackError); } } - if (!appliedImmediate) { - applyAvailabilityTransition(getAvailabilityContext, failureKind); - } throw classifiedError instanceof RetryableQuotaError ? classifiedError : error; @@ -276,9 +264,6 @@ export async function retryWithBackoff( attempt >= maxAttempts || !shouldRetryOnError(error as Error, retryFetchErrors) ) { - if (!appliedImmediate) { - applyAvailabilityTransition(getAvailabilityContext, failureKind); - } throw error; }