feat(core): Late resolve GenerateContentConfigs and reduce mutation. (#14920)

This commit is contained in:
joshualitt
2025-12-17 12:29:26 -08:00
committed by GitHub
parent b465e12747
commit de7e1937f6
7 changed files with 160 additions and 159 deletions

View File

@@ -132,11 +132,15 @@ describe('policyHelpers', () => {
it('returns requested model if it is available', () => {
const config = createExtendedMockConfig();
mockModelConfigService.getResolvedConfig.mockReturnValue({
model: 'gemini-pro',
generateContentConfig: {},
});
mockAvailabilityService.selectFirstAvailable.mockReturnValue({
selectedModel: 'gemini-pro',
});
const result = applyModelSelection(config, 'gemini-pro');
const result = applyModelSelection(config, { model: 'gemini-pro' });
expect(result.model).toBe('gemini-pro');
expect(result.maxAttempts).toBeUndefined();
expect(config.setActiveModel).toHaveBeenCalledWith('gemini-pro');
@@ -144,15 +148,20 @@ describe('policyHelpers', () => {
it('switches to backup model and updates config if requested is unavailable', () => {
const config = createExtendedMockConfig();
mockModelConfigService.getResolvedConfig
.mockReturnValueOnce({
model: 'gemini-pro',
generateContentConfig: { temperature: 0.9, topP: 1 },
})
.mockReturnValueOnce({
model: 'gemini-flash',
generateContentConfig: { temperature: 0.1, topP: 1 },
});
mockAvailabilityService.selectFirstAvailable.mockReturnValue({
selectedModel: 'gemini-flash',
});
mockModelConfigService.getResolvedConfig.mockReturnValue({
generateContentConfig: { temperature: 0.1 },
});
const currentConfig = { temperature: 0.9, topP: 1 };
const result = applyModelSelection(config, 'gemini-pro', currentConfig);
const result = applyModelSelection(config, { model: 'gemini-pro' });
expect(result.model).toBe('gemini-flash');
expect(result.config).toEqual({
@@ -160,6 +169,9 @@ describe('policyHelpers', () => {
topP: 1,
});
expect(mockModelConfigService.getResolvedConfig).toHaveBeenCalledWith({
model: 'gemini-pro',
});
expect(mockModelConfigService.getResolvedConfig).toHaveBeenCalledWith({
model: 'gemini-flash',
});
@@ -168,12 +180,16 @@ describe('policyHelpers', () => {
it('consumes sticky attempt if indicated', () => {
const config = createExtendedMockConfig();
mockModelConfigService.getResolvedConfig.mockReturnValue({
model: 'gemini-pro',
generateContentConfig: {},
});
mockAvailabilityService.selectFirstAvailable.mockReturnValue({
selectedModel: 'gemini-pro',
attempts: 1,
});
const result = applyModelSelection(config, 'gemini-pro');
const result = applyModelSelection(config, { model: 'gemini-pro' });
expect(mockAvailabilityService.consumeStickyAttempt).toHaveBeenCalledWith(
'gemini-pro',
);
@@ -182,6 +198,10 @@ describe('policyHelpers', () => {
it('does not consume sticky attempt if consumeAttempt is false', () => {
const config = createExtendedMockConfig();
mockModelConfigService.getResolvedConfig.mockReturnValue({
model: 'gemini-pro',
generateContentConfig: {},
});
mockAvailabilityService.selectFirstAvailable.mockReturnValue({
selectedModel: 'gemini-pro',
attempts: 1,
@@ -189,9 +209,7 @@ describe('policyHelpers', () => {
const result = applyModelSelection(
config,
'gemini-pro',
undefined,
undefined,
{ model: 'gemini-pro' },
{
consumeAttempt: false,
},

View File

@@ -25,6 +25,7 @@ import {
resolveModel,
} from '../config/models.js';
import type { ModelSelectionResult } from './modelAvailabilityService.js';
import type { ModelConfigKey } from '../services/modelConfigService.js';
/**
* Resolves the active policy chain for the given config, ensuring the
@@ -155,31 +156,26 @@ export function selectModelForAvailability(
*/
export function applyModelSelection(
config: Config,
requestedModel: string,
currentConfig?: GenerateContentConfig,
overrideScope?: string,
modelConfigKey: ModelConfigKey,
options: { consumeAttempt?: boolean } = {},
): { model: string; config?: GenerateContentConfig; maxAttempts?: number } {
const selection = selectModelForAvailability(config, requestedModel);
): { model: string; config: GenerateContentConfig; maxAttempts?: number } {
const resolved = config.modelConfigService.getResolvedConfig(modelConfigKey);
const model = resolved.model;
const selection = selectModelForAvailability(config, model);
if (!selection?.selectedModel) {
return { model: requestedModel, config: currentConfig };
if (!selection) {
return { model, config: resolved.generateContentConfig };
}
const finalModel = selection.selectedModel;
let finalConfig = currentConfig;
const finalModel = selection.selectedModel ?? model;
let generateContentConfig = resolved.generateContentConfig;
// If model changed, re-resolve config
if (finalModel !== requestedModel) {
const { generateContentConfig } =
config.modelConfigService.getResolvedConfig({
overrideScope,
model: finalModel,
});
finalConfig = currentConfig
? { ...currentConfig, ...generateContentConfig }
: generateContentConfig;
if (finalModel !== model) {
const fallbackResolved = config.modelConfigService.getResolvedConfig({
...modelConfigKey,
model: finalModel,
});
generateContentConfig = fallbackResolved.generateContentConfig;
}
config.setActiveModel(finalModel);
@@ -190,7 +186,7 @@ export function applyModelSelection(
return {
model: finalModel,
config: finalConfig,
config: generateContentConfig,
maxAttempts: selection.attempts,
};
}

View File

@@ -776,13 +776,15 @@ describe('BaseLlmClient', () => {
const getResolvedConfigMock = vi.mocked(
mockConfig.modelConfigService.getResolvedConfig,
);
getResolvedConfigMock
.mockReturnValueOnce(
makeResolvedModelConfig(firstModel, { temperature: 0.1 }),
)
.mockReturnValueOnce(
makeResolvedModelConfig(fallbackModel, { temperature: 0.9 }),
);
getResolvedConfigMock.mockImplementation((key) => {
if (key.model === firstModel) {
return makeResolvedModelConfig(firstModel, { temperature: 0.1 });
}
if (key.model === fallbackModel) {
return makeResolvedModelConfig(fallbackModel, { temperature: 0.9 });
}
return makeResolvedModelConfig(key.model);
});
// Availability selects the first model initially
vi.mocked(mockAvailabilityService.selectFirstAvailable).mockReturnValue({

View File

@@ -10,6 +10,7 @@ import type {
EmbedContentParameters,
GenerateContentResponse,
GenerateContentParameters,
GenerateContentConfig,
} from '@google/genai';
import type { Config } from '../config/config.js';
import type { ContentGenerator } from './contentGenerator.js';
@@ -81,6 +82,19 @@ export interface GenerateContentOptions {
maxAttempts?: number;
}
interface _CommonGenerateOptions {
modelConfigKey: ModelConfigKey;
contents: Content[];
systemInstruction?: string | Part | Part[] | Content;
abortSignal: AbortSignal;
promptId: string;
maxAttempts?: number;
additionalProperties?: {
responseJsonSchema: Record<string, unknown>;
responseMimeType: string;
};
}
/**
* A client dedicated to stateless, utility-focused LLM calls.
*/
@@ -104,7 +118,7 @@ export class BaseLlmClient {
maxAttempts,
} = options;
const { model, generateContentConfig } =
const { model } =
this.config.modelConfigService.getResolvedConfig(modelConfigKey);
const shouldRetryOnContent = (response: GenerateContentResponse) => {
@@ -123,18 +137,17 @@ export class BaseLlmClient {
const result = await this._generateWithRetry(
{
model,
modelConfigKey,
contents,
config: {
...generateContentConfig,
...(systemInstruction && { systemInstruction }),
abortSignal,
promptId,
maxAttempts,
systemInstruction,
additionalProperties: {
responseJsonSchema: schema,
responseMimeType: 'application/json',
abortSignal,
},
},
promptId,
maxAttempts,
shouldRetryOnContent,
'generateJson',
);
@@ -205,9 +218,6 @@ export class BaseLlmClient {
maxAttempts,
} = options;
const { model, generateContentConfig } =
this.config.modelConfigService.getResolvedConfig(modelConfigKey);
const shouldRetryOnContent = (response: GenerateContentResponse) => {
const text = getResponseText(response)?.trim();
return !text; // Retry on empty response
@@ -215,70 +225,74 @@ export class BaseLlmClient {
return this._generateWithRetry(
{
model,
modelConfigKey,
contents,
config: {
...generateContentConfig,
...(systemInstruction && { systemInstruction }),
abortSignal,
},
systemInstruction,
abortSignal,
promptId,
maxAttempts,
},
promptId,
maxAttempts,
shouldRetryOnContent,
'generateContent',
);
}
private async _generateWithRetry(
requestParams: GenerateContentParameters,
promptId: string,
maxAttempts: number | undefined,
options: _CommonGenerateOptions,
shouldRetryOnContent: (response: GenerateContentResponse) => boolean,
errorContext: 'generateJson' | 'generateContent',
): Promise<GenerateContentResponse> {
const abortSignal = requestParams.config?.abortSignal;
const {
modelConfigKey,
contents,
systemInstruction,
abortSignal,
promptId,
maxAttempts,
additionalProperties,
} = options;
const {
model,
config: generateContentConfig,
maxAttempts: availabilityMaxAttempts,
} = applyModelSelection(this.config, modelConfigKey);
let currentModel = model;
let currentGenerateContentConfig = generateContentConfig;
// Define callback to fetch context dynamically since active model may get updated during retry loop
const getAvailabilityContext = createAvailabilityContextProvider(
this.config,
() => requestParams.model,
() => currentModel,
);
const {
model,
config: newConfig,
maxAttempts: availabilityMaxAttempts,
} = applyModelSelection(
this.config,
requestParams.model,
requestParams.config,
);
requestParams.model = model;
if (newConfig) {
requestParams.config = newConfig;
}
if (abortSignal) {
requestParams.config = { ...requestParams.config, abortSignal };
}
try {
const apiCall = () => {
// Ensure we use the current active model
// in case a fallback occurred in a previous attempt.
const activeModel = this.config.getActiveModel();
if (activeModel !== requestParams.model) {
requestParams.model = activeModel;
if (activeModel !== currentModel) {
currentModel = activeModel;
// Re-resolve config if model changed during retry
const { generateContentConfig } =
this.config.modelConfigService.getResolvedConfig({
...modelConfigKey,
model: activeModel,
});
requestParams.config = {
...requestParams.config,
...generateContentConfig,
};
currentGenerateContentConfig = generateContentConfig;
}
const finalConfig: GenerateContentConfig = {
...currentGenerateContentConfig,
...(systemInstruction && { systemInstruction }),
...additionalProperties,
abortSignal,
};
const requestParams: GenerateContentParameters = {
model: currentModel,
config: finalConfig,
contents,
};
return this.contentGenerator.generateContent(requestParams, promptId);
};
@@ -289,7 +303,7 @@ export class BaseLlmClient {
getAvailabilityContext,
onPersistent429: this.config.isInteractive()
? (authType, error) =>
handleFallback(this.config, requestParams.model, authType, error)
handleFallback(this.config, currentModel, authType, error)
: undefined,
authType:
this.authType ?? this.config.getContentGeneratorConfig()?.authType,
@@ -307,14 +321,14 @@ export class BaseLlmClient {
await reportError(
error,
`API returned invalid content after all retries.`,
requestParams.contents as Content[],
contents,
`${errorContext}-invalid-content`,
);
} else {
await reportError(
error,
`Error generating content via API.`,
requestParams.contents as Content[],
contents,
`${errorContext}-api`,
);
}

View File

@@ -539,11 +539,10 @@ export class GeminiClient {
}
// availability logic
const modelConfigKey: ModelConfigKey = { model: modelToUse };
const { model: finalModel } = applyModelSelection(
this.config,
modelToUse,
undefined,
undefined,
modelConfigKey,
{ consumeAttempt: false },
);
modelToUse = finalModel;
@@ -551,7 +550,7 @@ export class GeminiClient {
this.currentSequenceModel = modelToUse;
yield { type: GeminiEventType.ModelInfo, value: modelToUse };
const resultStream = turn.run({ model: modelToUse }, request, linkedSignal);
const resultStream = turn.run(modelConfigKey, request, linkedSignal);
for await (const event of resultStream) {
if (this.loopDetector.addAndCheck(event)) {
yield { type: GeminiEventType.LoopDetected };
@@ -676,12 +675,7 @@ export class GeminiClient {
model,
config: newConfig,
maxAttempts: availabilityMaxAttempts,
} = applyModelSelection(
this.config,
currentAttemptModel,
currentAttemptGenerateContentConfig,
modelConfigKey.overrideScope,
);
} = applyModelSelection(this.config, modelConfigKey);
currentAttemptModel = model;
if (newConfig) {
currentAttemptGenerateContentConfig = newConfig;

View File

@@ -173,7 +173,7 @@ describe('GeminiChat', () => {
return {
model,
generateContentConfig: {
temperature: 0,
temperature: modelConfigKey.isRetry ? 1 : 0,
thinkingConfig,
},
};
@@ -2332,13 +2332,18 @@ describe('GeminiChat', () => {
});
// Different configs per model
vi.mocked(mockConfig.modelConfigService.getResolvedConfig)
.mockReturnValueOnce(
makeResolvedModelConfig('model-a', { temperature: 0.1 }),
)
.mockReturnValueOnce(
makeResolvedModelConfig('model-b', { temperature: 0.9 }),
);
vi.mocked(
mockConfig.modelConfigService.getResolvedConfig,
).mockImplementation((key) => {
if (key.model === 'model-a') {
return makeResolvedModelConfig('model-a', { temperature: 0.1 });
}
if (key.model === 'model-b') {
return makeResolvedModelConfig('model-b', { temperature: 0.9 });
}
// Default for the initial requested model in this test
return makeResolvedModelConfig('model-a', { temperature: 0.1 });
});
// First attempt uses model-a, then simulate availability switching to model-b
mockRetryWithBackoff.mockImplementation(async (apiCall) => {

View File

@@ -16,13 +16,11 @@ import type {
GenerateContentConfig,
GenerateContentParameters,
} from '@google/genai';
import { ThinkingLevel } from '@google/genai';
import { toParts } from '../code_assist/converter.js';
import { createUserContent, FinishReason } from '@google/genai';
import { retryWithBackoff, isRetryableError } from '../utils/retry.js';
import type { Config } from '../config/config.js';
import {
DEFAULT_THINKING_MODE,
resolveModel,
isGemini2Model,
isPreviewModel,
@@ -276,9 +274,8 @@ export class GeminiChat {
this.sendPromise = streamDonePromise;
const userContent = createUserContent(message);
const { model, generateContentConfig } =
const { model } =
this.config.modelConfigService.getResolvedConfig(modelConfigKey);
generateContentConfig.abortSignal = signal;
// Record user input - capture complete message with all parts (text, files, images, etc.)
// but skip recording function responses (tool call results) as they should be stored in tool call records
@@ -316,17 +313,18 @@ export class GeminiChat {
yield { type: StreamEventType.RETRY };
}
// If this is a retry, set temperature to 1 to encourage different output.
if (attempt > 0) {
generateContentConfig.temperature = 1;
}
// If this is a retry, update the key with the new context.
const currentConfigKey =
attempt > 0
? { ...modelConfigKey, isRetry: true }
: modelConfigKey;
isConnectionPhase = true;
const stream = await this.makeApiCallAndProcessStream(
model,
generateContentConfig,
currentConfigKey,
requestContents,
prompt_id,
signal,
);
isConnectionPhase = false;
for await (const chunk of stream) {
@@ -399,10 +397,10 @@ export class GeminiChat {
}
private async makeApiCallAndProcessStream(
model: string,
generateContentConfig: GenerateContentConfig,
modelConfigKey: ModelConfigKey,
requestContents: Content[],
prompt_id: string,
abortSignal: AbortSignal,
): Promise<AsyncGenerator<GenerateContentResponse>> {
const contentsForPreviewModel =
this.ensureActiveLoopHasThoughtSignatures(requestContents);
@@ -412,18 +410,11 @@ export class GeminiChat {
model: availabilityFinalModel,
config: newAvailabilityConfig,
maxAttempts: availabilityMaxAttempts,
} = applyModelSelection(this.config, model, generateContentConfig);
} = applyModelSelection(this.config, modelConfigKey);
const abortSignal = generateContentConfig.abortSignal;
let lastModelToUse = availabilityFinalModel;
let currentGenerateContentConfig: GenerateContentConfig =
newAvailabilityConfig ?? generateContentConfig;
if (abortSignal) {
currentGenerateContentConfig = {
...currentGenerateContentConfig,
abortSignal,
};
}
newAvailabilityConfig;
let lastConfig: GenerateContentConfig = currentGenerateContentConfig;
let lastContentsToUse: Content[] = requestContents;
@@ -448,47 +439,27 @@ export class GeminiChat {
this.config.getActiveModel(),
this.config.getPreviewFeatures(),
);
}
if (modelToUse !== lastModelToUse) {
const { generateContentConfig: newConfig } =
this.config.modelConfigService.getResolvedConfig({
model: modelToUse,
});
currentGenerateContentConfig = {
...currentGenerateContentConfig,
...newConfig,
};
if (abortSignal) {
currentGenerateContentConfig.abortSignal = abortSignal;
}
}
if (modelToUse !== lastModelToUse) {
const { generateContentConfig: newConfig } =
this.config.modelConfigService.getResolvedConfig({
...modelConfigKey,
model: modelToUse,
});
currentGenerateContentConfig = newConfig;
}
lastModelToUse = modelToUse;
const config = {
const config: GenerateContentConfig = {
...currentGenerateContentConfig,
// TODO(12622): Ensure we don't overrwrite these when they are
// passed via config.
systemInstruction: this.systemInstruction,
tools: this.tools,
abortSignal,
};
// TODO(joshualitt): Clean this up with model configs.
if (modelToUse.startsWith('gemini-3')) {
config.thinkingConfig = {
...config.thinkingConfig,
thinkingLevel: ThinkingLevel.HIGH,
};
delete config.thinkingConfig?.thinkingBudget;
} else {
// The `gemini-3` configs use thinkingLevel, so we have to invert the
// change above.
config.thinkingConfig = {
...config.thinkingConfig,
thinkingBudget: DEFAULT_THINKING_MODE,
};
delete config.thinkingConfig?.thinkingLevel;
}
let contentsToUse = isPreviewModel(modelToUse)
? contentsForPreviewModel
: requestContents;
@@ -576,10 +547,11 @@ export class GeminiChat {
onPersistent429: onPersistent429Callback,
authType: this.config.getContentGeneratorConfig()?.authType,
retryFetchErrors: this.config.getRetryFetchErrors(),
signal: generateContentConfig.abortSignal,
signal: abortSignal,
maxAttempts:
availabilityMaxAttempts ??
(this.config.isPreviewModelFallbackMode() && isPreviewModel(model)
(this.config.isPreviewModelFallbackMode() &&
isPreviewModel(lastModelToUse)
? 1
: undefined),
getAvailabilityContext,