diff --git a/packages/core/src/availability/policyHelpers.test.ts b/packages/core/src/availability/policyHelpers.test.ts index df20d28fad..53a7734b8b 100644 --- a/packages/core/src/availability/policyHelpers.test.ts +++ b/packages/core/src/availability/policyHelpers.test.ts @@ -132,11 +132,15 @@ describe('policyHelpers', () => { it('returns requested model if it is available', () => { const config = createExtendedMockConfig(); + mockModelConfigService.getResolvedConfig.mockReturnValue({ + model: 'gemini-pro', + generateContentConfig: {}, + }); mockAvailabilityService.selectFirstAvailable.mockReturnValue({ selectedModel: 'gemini-pro', }); - const result = applyModelSelection(config, 'gemini-pro'); + const result = applyModelSelection(config, { model: 'gemini-pro' }); expect(result.model).toBe('gemini-pro'); expect(result.maxAttempts).toBeUndefined(); expect(config.setActiveModel).toHaveBeenCalledWith('gemini-pro'); @@ -144,15 +148,20 @@ describe('policyHelpers', () => { it('switches to backup model and updates config if requested is unavailable', () => { const config = createExtendedMockConfig(); + mockModelConfigService.getResolvedConfig + .mockReturnValueOnce({ + model: 'gemini-pro', + generateContentConfig: { temperature: 0.9, topP: 1 }, + }) + .mockReturnValueOnce({ + model: 'gemini-flash', + generateContentConfig: { temperature: 0.1, topP: 1 }, + }); mockAvailabilityService.selectFirstAvailable.mockReturnValue({ selectedModel: 'gemini-flash', }); - mockModelConfigService.getResolvedConfig.mockReturnValue({ - generateContentConfig: { temperature: 0.1 }, - }); - const currentConfig = { temperature: 0.9, topP: 1 }; - const result = applyModelSelection(config, 'gemini-pro', currentConfig); + const result = applyModelSelection(config, { model: 'gemini-pro' }); expect(result.model).toBe('gemini-flash'); expect(result.config).toEqual({ @@ -160,6 +169,9 @@ describe('policyHelpers', () => { topP: 1, }); + expect(mockModelConfigService.getResolvedConfig).toHaveBeenCalledWith({ + model: 'gemini-pro', + }); expect(mockModelConfigService.getResolvedConfig).toHaveBeenCalledWith({ model: 'gemini-flash', }); @@ -168,12 +180,16 @@ describe('policyHelpers', () => { it('consumes sticky attempt if indicated', () => { const config = createExtendedMockConfig(); + mockModelConfigService.getResolvedConfig.mockReturnValue({ + model: 'gemini-pro', + generateContentConfig: {}, + }); mockAvailabilityService.selectFirstAvailable.mockReturnValue({ selectedModel: 'gemini-pro', attempts: 1, }); - const result = applyModelSelection(config, 'gemini-pro'); + const result = applyModelSelection(config, { model: 'gemini-pro' }); expect(mockAvailabilityService.consumeStickyAttempt).toHaveBeenCalledWith( 'gemini-pro', ); @@ -182,6 +198,10 @@ describe('policyHelpers', () => { it('does not consume sticky attempt if consumeAttempt is false', () => { const config = createExtendedMockConfig(); + mockModelConfigService.getResolvedConfig.mockReturnValue({ + model: 'gemini-pro', + generateContentConfig: {}, + }); mockAvailabilityService.selectFirstAvailable.mockReturnValue({ selectedModel: 'gemini-pro', attempts: 1, @@ -189,9 +209,7 @@ describe('policyHelpers', () => { const result = applyModelSelection( config, - 'gemini-pro', - undefined, - undefined, + { model: 'gemini-pro' }, { consumeAttempt: false, }, diff --git a/packages/core/src/availability/policyHelpers.ts b/packages/core/src/availability/policyHelpers.ts index 484bec3215..02b83871dc 100644 --- a/packages/core/src/availability/policyHelpers.ts +++ b/packages/core/src/availability/policyHelpers.ts @@ -25,6 +25,7 @@ import { resolveModel, } from '../config/models.js'; import type { ModelSelectionResult } from './modelAvailabilityService.js'; +import type { ModelConfigKey } from '../services/modelConfigService.js'; /** * Resolves the active policy chain for the given config, ensuring the @@ -155,31 +156,26 @@ export function selectModelForAvailability( */ export function applyModelSelection( config: Config, - requestedModel: string, - currentConfig?: GenerateContentConfig, - overrideScope?: string, + modelConfigKey: ModelConfigKey, options: { consumeAttempt?: boolean } = {}, -): { model: string; config?: GenerateContentConfig; maxAttempts?: number } { - const selection = selectModelForAvailability(config, requestedModel); +): { model: string; config: GenerateContentConfig; maxAttempts?: number } { + const resolved = config.modelConfigService.getResolvedConfig(modelConfigKey); + const model = resolved.model; + const selection = selectModelForAvailability(config, model); - if (!selection?.selectedModel) { - return { model: requestedModel, config: currentConfig }; + if (!selection) { + return { model, config: resolved.generateContentConfig }; } - const finalModel = selection.selectedModel; - let finalConfig = currentConfig; + const finalModel = selection.selectedModel ?? model; + let generateContentConfig = resolved.generateContentConfig; - // If model changed, re-resolve config - if (finalModel !== requestedModel) { - const { generateContentConfig } = - config.modelConfigService.getResolvedConfig({ - overrideScope, - model: finalModel, - }); - - finalConfig = currentConfig - ? { ...currentConfig, ...generateContentConfig } - : generateContentConfig; + if (finalModel !== model) { + const fallbackResolved = config.modelConfigService.getResolvedConfig({ + ...modelConfigKey, + model: finalModel, + }); + generateContentConfig = fallbackResolved.generateContentConfig; } config.setActiveModel(finalModel); @@ -190,7 +186,7 @@ export function applyModelSelection( return { model: finalModel, - config: finalConfig, + config: generateContentConfig, maxAttempts: selection.attempts, }; } diff --git a/packages/core/src/core/baseLlmClient.test.ts b/packages/core/src/core/baseLlmClient.test.ts index c3499f6e75..a14bdc8307 100644 --- a/packages/core/src/core/baseLlmClient.test.ts +++ b/packages/core/src/core/baseLlmClient.test.ts @@ -776,13 +776,15 @@ describe('BaseLlmClient', () => { const getResolvedConfigMock = vi.mocked( mockConfig.modelConfigService.getResolvedConfig, ); - getResolvedConfigMock - .mockReturnValueOnce( - makeResolvedModelConfig(firstModel, { temperature: 0.1 }), - ) - .mockReturnValueOnce( - makeResolvedModelConfig(fallbackModel, { temperature: 0.9 }), - ); + getResolvedConfigMock.mockImplementation((key) => { + if (key.model === firstModel) { + return makeResolvedModelConfig(firstModel, { temperature: 0.1 }); + } + if (key.model === fallbackModel) { + return makeResolvedModelConfig(fallbackModel, { temperature: 0.9 }); + } + return makeResolvedModelConfig(key.model); + }); // Availability selects the first model initially vi.mocked(mockAvailabilityService.selectFirstAvailable).mockReturnValue({ diff --git a/packages/core/src/core/baseLlmClient.ts b/packages/core/src/core/baseLlmClient.ts index 22c406ef0f..a508cdd038 100644 --- a/packages/core/src/core/baseLlmClient.ts +++ b/packages/core/src/core/baseLlmClient.ts @@ -10,6 +10,7 @@ import type { EmbedContentParameters, GenerateContentResponse, GenerateContentParameters, + GenerateContentConfig, } from '@google/genai'; import type { Config } from '../config/config.js'; import type { ContentGenerator } from './contentGenerator.js'; @@ -81,6 +82,19 @@ export interface GenerateContentOptions { maxAttempts?: number; } +interface _CommonGenerateOptions { + modelConfigKey: ModelConfigKey; + contents: Content[]; + systemInstruction?: string | Part | Part[] | Content; + abortSignal: AbortSignal; + promptId: string; + maxAttempts?: number; + additionalProperties?: { + responseJsonSchema: Record; + responseMimeType: string; + }; +} + /** * A client dedicated to stateless, utility-focused LLM calls. */ @@ -104,7 +118,7 @@ export class BaseLlmClient { maxAttempts, } = options; - const { model, generateContentConfig } = + const { model } = this.config.modelConfigService.getResolvedConfig(modelConfigKey); const shouldRetryOnContent = (response: GenerateContentResponse) => { @@ -123,18 +137,17 @@ export class BaseLlmClient { const result = await this._generateWithRetry( { - model, + modelConfigKey, contents, - config: { - ...generateContentConfig, - ...(systemInstruction && { systemInstruction }), + abortSignal, + promptId, + maxAttempts, + systemInstruction, + additionalProperties: { responseJsonSchema: schema, responseMimeType: 'application/json', - abortSignal, }, }, - promptId, - maxAttempts, shouldRetryOnContent, 'generateJson', ); @@ -205,9 +218,6 @@ export class BaseLlmClient { maxAttempts, } = options; - const { model, generateContentConfig } = - this.config.modelConfigService.getResolvedConfig(modelConfigKey); - const shouldRetryOnContent = (response: GenerateContentResponse) => { const text = getResponseText(response)?.trim(); return !text; // Retry on empty response @@ -215,70 +225,74 @@ export class BaseLlmClient { return this._generateWithRetry( { - model, + modelConfigKey, contents, - config: { - ...generateContentConfig, - ...(systemInstruction && { systemInstruction }), - abortSignal, - }, + systemInstruction, + abortSignal, + promptId, + maxAttempts, }, - promptId, - maxAttempts, shouldRetryOnContent, 'generateContent', ); } private async _generateWithRetry( - requestParams: GenerateContentParameters, - promptId: string, - maxAttempts: number | undefined, + options: _CommonGenerateOptions, shouldRetryOnContent: (response: GenerateContentResponse) => boolean, errorContext: 'generateJson' | 'generateContent', ): Promise { - const abortSignal = requestParams.config?.abortSignal; + const { + modelConfigKey, + contents, + systemInstruction, + abortSignal, + promptId, + maxAttempts, + additionalProperties, + } = options; + + const { + model, + config: generateContentConfig, + maxAttempts: availabilityMaxAttempts, + } = applyModelSelection(this.config, modelConfigKey); + + let currentModel = model; + let currentGenerateContentConfig = generateContentConfig; // Define callback to fetch context dynamically since active model may get updated during retry loop const getAvailabilityContext = createAvailabilityContextProvider( this.config, - () => requestParams.model, + () => currentModel, ); - const { - model, - config: newConfig, - maxAttempts: availabilityMaxAttempts, - } = applyModelSelection( - this.config, - requestParams.model, - requestParams.config, - ); - requestParams.model = model; - if (newConfig) { - requestParams.config = newConfig; - } - if (abortSignal) { - requestParams.config = { ...requestParams.config, abortSignal }; - } - try { const apiCall = () => { // Ensure we use the current active model // in case a fallback occurred in a previous attempt. const activeModel = this.config.getActiveModel(); - if (activeModel !== requestParams.model) { - requestParams.model = activeModel; + if (activeModel !== currentModel) { + currentModel = activeModel; // Re-resolve config if model changed during retry const { generateContentConfig } = this.config.modelConfigService.getResolvedConfig({ + ...modelConfigKey, model: activeModel, }); - requestParams.config = { - ...requestParams.config, - ...generateContentConfig, - }; + currentGenerateContentConfig = generateContentConfig; } + const finalConfig: GenerateContentConfig = { + ...currentGenerateContentConfig, + ...(systemInstruction && { systemInstruction }), + ...additionalProperties, + abortSignal, + }; + const requestParams: GenerateContentParameters = { + model: currentModel, + config: finalConfig, + contents, + }; return this.contentGenerator.generateContent(requestParams, promptId); }; @@ -289,7 +303,7 @@ export class BaseLlmClient { getAvailabilityContext, onPersistent429: this.config.isInteractive() ? (authType, error) => - handleFallback(this.config, requestParams.model, authType, error) + handleFallback(this.config, currentModel, authType, error) : undefined, authType: this.authType ?? this.config.getContentGeneratorConfig()?.authType, @@ -307,14 +321,14 @@ export class BaseLlmClient { await reportError( error, `API returned invalid content after all retries.`, - requestParams.contents as Content[], + contents, `${errorContext}-invalid-content`, ); } else { await reportError( error, `Error generating content via API.`, - requestParams.contents as Content[], + contents, `${errorContext}-api`, ); } diff --git a/packages/core/src/core/client.ts b/packages/core/src/core/client.ts index 8d14c8cebd..fba1a5bd1a 100644 --- a/packages/core/src/core/client.ts +++ b/packages/core/src/core/client.ts @@ -539,11 +539,10 @@ export class GeminiClient { } // availability logic + const modelConfigKey: ModelConfigKey = { model: modelToUse }; const { model: finalModel } = applyModelSelection( this.config, - modelToUse, - undefined, - undefined, + modelConfigKey, { consumeAttempt: false }, ); modelToUse = finalModel; @@ -551,7 +550,7 @@ export class GeminiClient { this.currentSequenceModel = modelToUse; yield { type: GeminiEventType.ModelInfo, value: modelToUse }; - const resultStream = turn.run({ model: modelToUse }, request, linkedSignal); + const resultStream = turn.run(modelConfigKey, request, linkedSignal); for await (const event of resultStream) { if (this.loopDetector.addAndCheck(event)) { yield { type: GeminiEventType.LoopDetected }; @@ -676,12 +675,7 @@ export class GeminiClient { model, config: newConfig, maxAttempts: availabilityMaxAttempts, - } = applyModelSelection( - this.config, - currentAttemptModel, - currentAttemptGenerateContentConfig, - modelConfigKey.overrideScope, - ); + } = applyModelSelection(this.config, modelConfigKey); currentAttemptModel = model; if (newConfig) { currentAttemptGenerateContentConfig = newConfig; diff --git a/packages/core/src/core/geminiChat.test.ts b/packages/core/src/core/geminiChat.test.ts index 0801c0747b..d7444c52bb 100644 --- a/packages/core/src/core/geminiChat.test.ts +++ b/packages/core/src/core/geminiChat.test.ts @@ -173,7 +173,7 @@ describe('GeminiChat', () => { return { model, generateContentConfig: { - temperature: 0, + temperature: modelConfigKey.isRetry ? 1 : 0, thinkingConfig, }, }; @@ -2332,13 +2332,18 @@ describe('GeminiChat', () => { }); // Different configs per model - vi.mocked(mockConfig.modelConfigService.getResolvedConfig) - .mockReturnValueOnce( - makeResolvedModelConfig('model-a', { temperature: 0.1 }), - ) - .mockReturnValueOnce( - makeResolvedModelConfig('model-b', { temperature: 0.9 }), - ); + vi.mocked( + mockConfig.modelConfigService.getResolvedConfig, + ).mockImplementation((key) => { + if (key.model === 'model-a') { + return makeResolvedModelConfig('model-a', { temperature: 0.1 }); + } + if (key.model === 'model-b') { + return makeResolvedModelConfig('model-b', { temperature: 0.9 }); + } + // Default for the initial requested model in this test + return makeResolvedModelConfig('model-a', { temperature: 0.1 }); + }); // First attempt uses model-a, then simulate availability switching to model-b mockRetryWithBackoff.mockImplementation(async (apiCall) => { diff --git a/packages/core/src/core/geminiChat.ts b/packages/core/src/core/geminiChat.ts index 43b5ad3a8e..300844e9c9 100644 --- a/packages/core/src/core/geminiChat.ts +++ b/packages/core/src/core/geminiChat.ts @@ -16,13 +16,11 @@ import type { GenerateContentConfig, GenerateContentParameters, } from '@google/genai'; -import { ThinkingLevel } from '@google/genai'; import { toParts } from '../code_assist/converter.js'; import { createUserContent, FinishReason } from '@google/genai'; import { retryWithBackoff, isRetryableError } from '../utils/retry.js'; import type { Config } from '../config/config.js'; import { - DEFAULT_THINKING_MODE, resolveModel, isGemini2Model, isPreviewModel, @@ -276,9 +274,8 @@ export class GeminiChat { this.sendPromise = streamDonePromise; const userContent = createUserContent(message); - const { model, generateContentConfig } = + const { model } = this.config.modelConfigService.getResolvedConfig(modelConfigKey); - generateContentConfig.abortSignal = signal; // Record user input - capture complete message with all parts (text, files, images, etc.) // but skip recording function responses (tool call results) as they should be stored in tool call records @@ -316,17 +313,18 @@ export class GeminiChat { yield { type: StreamEventType.RETRY }; } - // If this is a retry, set temperature to 1 to encourage different output. - if (attempt > 0) { - generateContentConfig.temperature = 1; - } + // If this is a retry, update the key with the new context. + const currentConfigKey = + attempt > 0 + ? { ...modelConfigKey, isRetry: true } + : modelConfigKey; isConnectionPhase = true; const stream = await this.makeApiCallAndProcessStream( - model, - generateContentConfig, + currentConfigKey, requestContents, prompt_id, + signal, ); isConnectionPhase = false; for await (const chunk of stream) { @@ -399,10 +397,10 @@ export class GeminiChat { } private async makeApiCallAndProcessStream( - model: string, - generateContentConfig: GenerateContentConfig, + modelConfigKey: ModelConfigKey, requestContents: Content[], prompt_id: string, + abortSignal: AbortSignal, ): Promise> { const contentsForPreviewModel = this.ensureActiveLoopHasThoughtSignatures(requestContents); @@ -412,18 +410,11 @@ export class GeminiChat { model: availabilityFinalModel, config: newAvailabilityConfig, maxAttempts: availabilityMaxAttempts, - } = applyModelSelection(this.config, model, generateContentConfig); + } = applyModelSelection(this.config, modelConfigKey); - const abortSignal = generateContentConfig.abortSignal; let lastModelToUse = availabilityFinalModel; let currentGenerateContentConfig: GenerateContentConfig = - newAvailabilityConfig ?? generateContentConfig; - if (abortSignal) { - currentGenerateContentConfig = { - ...currentGenerateContentConfig, - abortSignal, - }; - } + newAvailabilityConfig; let lastConfig: GenerateContentConfig = currentGenerateContentConfig; let lastContentsToUse: Content[] = requestContents; @@ -448,47 +439,27 @@ export class GeminiChat { this.config.getActiveModel(), this.config.getPreviewFeatures(), ); + } - if (modelToUse !== lastModelToUse) { - const { generateContentConfig: newConfig } = - this.config.modelConfigService.getResolvedConfig({ - model: modelToUse, - }); - currentGenerateContentConfig = { - ...currentGenerateContentConfig, - ...newConfig, - }; - if (abortSignal) { - currentGenerateContentConfig.abortSignal = abortSignal; - } - } + if (modelToUse !== lastModelToUse) { + const { generateContentConfig: newConfig } = + this.config.modelConfigService.getResolvedConfig({ + ...modelConfigKey, + model: modelToUse, + }); + currentGenerateContentConfig = newConfig; } lastModelToUse = modelToUse; - const config = { + const config: GenerateContentConfig = { ...currentGenerateContentConfig, // TODO(12622): Ensure we don't overrwrite these when they are // passed via config. systemInstruction: this.systemInstruction, tools: this.tools, + abortSignal, }; - // TODO(joshualitt): Clean this up with model configs. - if (modelToUse.startsWith('gemini-3')) { - config.thinkingConfig = { - ...config.thinkingConfig, - thinkingLevel: ThinkingLevel.HIGH, - }; - delete config.thinkingConfig?.thinkingBudget; - } else { - // The `gemini-3` configs use thinkingLevel, so we have to invert the - // change above. - config.thinkingConfig = { - ...config.thinkingConfig, - thinkingBudget: DEFAULT_THINKING_MODE, - }; - delete config.thinkingConfig?.thinkingLevel; - } let contentsToUse = isPreviewModel(modelToUse) ? contentsForPreviewModel : requestContents; @@ -576,10 +547,11 @@ export class GeminiChat { onPersistent429: onPersistent429Callback, authType: this.config.getContentGeneratorConfig()?.authType, retryFetchErrors: this.config.getRetryFetchErrors(), - signal: generateContentConfig.abortSignal, + signal: abortSignal, maxAttempts: availabilityMaxAttempts ?? - (this.config.isPreviewModelFallbackMode() && isPreviewModel(model) + (this.config.isPreviewModelFallbackMode() && + isPreviewModel(lastModelToUse) ? 1 : undefined), getAvailabilityContext,