mirror of
https://github.com/google-gemini/gemini-cli.git
synced 2026-03-22 20:14:58 -07:00
feat: launch Gemini 3 Flash in Gemini CLI ⚡️⚡️⚡️ (#15196)
Co-authored-by: gemini-cli-robot <gemini-cli-robot@google.com> Co-authored-by: joshualitt <joshualitt@google.com> Co-authored-by: Sehoon Shon <sshon@google.com> Co-authored-by: Adam Weidman <65992621+adamfweidman@users.noreply.github.com> Co-authored-by: Adib234 <30782825+Adib234@users.noreply.github.com> Co-authored-by: Jenna Inouye <jinouye@google.com>
This commit is contained in:
committed by
GitHub
parent
18698d6929
commit
bf90b59935
@@ -7,10 +7,7 @@
|
||||
import { describe, it, expect, vi, beforeEach } from 'vitest';
|
||||
import { ModelRouterService } from './modelRouterService.js';
|
||||
import { Config } from '../config/config.js';
|
||||
import {
|
||||
PREVIEW_GEMINI_MODEL,
|
||||
DEFAULT_GEMINI_MODEL,
|
||||
} from '../config/models.js';
|
||||
|
||||
import type { BaseLlmClient } from '../core/baseLlmClient.js';
|
||||
import type { RoutingContext, RoutingDecision } from './routingStrategy.js';
|
||||
import { DefaultStrategy } from './strategies/defaultStrategy.js';
|
||||
@@ -151,81 +148,5 @@ describe('ModelRouterService', () => {
|
||||
expect.any(ModelRoutingEvent),
|
||||
);
|
||||
});
|
||||
|
||||
it('should upgrade to Preview Model when preview features are enabled and model is 2.5 Pro', async () => {
|
||||
vi.spyOn(mockCompositeStrategy, 'route').mockResolvedValue({
|
||||
model: DEFAULT_GEMINI_MODEL,
|
||||
metadata: { source: 'test', latencyMs: 0, reasoning: 'test' },
|
||||
});
|
||||
vi.spyOn(mockConfig, 'getPreviewFeatures').mockReturnValue(true);
|
||||
vi.spyOn(mockConfig, 'isPreviewModelFallbackMode').mockReturnValue(false);
|
||||
|
||||
const decision = await service.route(mockContext);
|
||||
|
||||
expect(decision.model).toBe(PREVIEW_GEMINI_MODEL);
|
||||
});
|
||||
|
||||
it('should NOT upgrade to Preview Model when preview features are disabled', async () => {
|
||||
vi.spyOn(mockCompositeStrategy, 'route').mockResolvedValue({
|
||||
model: DEFAULT_GEMINI_MODEL,
|
||||
metadata: { source: 'test', latencyMs: 0, reasoning: 'test' },
|
||||
});
|
||||
vi.spyOn(mockConfig, 'getPreviewFeatures').mockReturnValue(false);
|
||||
|
||||
const decision = await service.route(mockContext);
|
||||
|
||||
expect(decision.model).toBe(DEFAULT_GEMINI_MODEL);
|
||||
});
|
||||
|
||||
it('should upgrade to Preview Model when preview features are enabled and model is explicitly set to Pro', async () => {
|
||||
// Simulate OverrideStrategy returning Preview Model (as resolveModel would do for "pro")
|
||||
vi.spyOn(mockCompositeStrategy, 'route').mockResolvedValue({
|
||||
model: PREVIEW_GEMINI_MODEL,
|
||||
metadata: {
|
||||
source: 'override',
|
||||
latencyMs: 0,
|
||||
reasoning: 'User selected',
|
||||
},
|
||||
});
|
||||
vi.spyOn(mockConfig, 'getPreviewFeatures').mockReturnValue(true);
|
||||
vi.spyOn(mockConfig, 'isPreviewModelFallbackMode').mockReturnValue(false);
|
||||
|
||||
const decision = await service.route(mockContext);
|
||||
|
||||
expect(decision.model).toBe(PREVIEW_GEMINI_MODEL);
|
||||
});
|
||||
|
||||
it('should NOT upgrade to Preview Model when preview features are enabled and model is explicitly set to a specific string', async () => {
|
||||
// Simulate OverrideStrategy returning a specific model (e.g. "gemini-2.5-pro")
|
||||
// This happens when user explicitly sets model to "gemini-2.5-pro" instead of "pro"
|
||||
vi.spyOn(mockCompositeStrategy, 'route').mockResolvedValue({
|
||||
model: DEFAULT_GEMINI_MODEL,
|
||||
metadata: {
|
||||
source: 'override',
|
||||
latencyMs: 0,
|
||||
reasoning: 'User selected',
|
||||
},
|
||||
});
|
||||
vi.spyOn(mockConfig, 'getPreviewFeatures').mockReturnValue(true);
|
||||
vi.spyOn(mockConfig, 'isPreviewModelFallbackMode').mockReturnValue(false);
|
||||
|
||||
const decision = await service.route(mockContext);
|
||||
|
||||
// Should NOT upgrade to Preview Model because source is 'override' and model is specific
|
||||
expect(decision.model).toBe(DEFAULT_GEMINI_MODEL);
|
||||
});
|
||||
|
||||
it('should upgrade to Preview Model even if fallback mode is active (probing behavior)', async () => {
|
||||
vi.spyOn(mockCompositeStrategy, 'route').mockResolvedValue({
|
||||
model: DEFAULT_GEMINI_MODEL,
|
||||
metadata: { source: 'default', latencyMs: 0, reasoning: 'Default' },
|
||||
});
|
||||
vi.spyOn(mockConfig, 'getPreviewFeatures').mockReturnValue(true);
|
||||
vi.spyOn(mockConfig, 'isPreviewModelFallbackMode').mockReturnValue(true);
|
||||
|
||||
const decision = await service.route(mockContext);
|
||||
|
||||
expect(decision.model).toBe(PREVIEW_GEMINI_MODEL);
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
@@ -5,10 +5,6 @@
|
||||
*/
|
||||
|
||||
import type { Config } from '../config/config.js';
|
||||
import {
|
||||
PREVIEW_GEMINI_MODEL,
|
||||
DEFAULT_GEMINI_MODEL,
|
||||
} from '../config/models.js';
|
||||
import type {
|
||||
RoutingContext,
|
||||
RoutingDecision,
|
||||
@@ -66,23 +62,6 @@ export class ModelRouterService {
|
||||
this.config.getBaseLlmClient(),
|
||||
);
|
||||
|
||||
// Unified Preview Model Logic:
|
||||
// If the decision is to use 'gemini-2.5-pro' and preview features are enabled,
|
||||
// we attempt to upgrade to 'gemini-3.0-pro' (Preview Model).
|
||||
if (
|
||||
decision.model === DEFAULT_GEMINI_MODEL &&
|
||||
this.config.getPreviewFeatures() &&
|
||||
!decision.metadata.source.includes('override')
|
||||
) {
|
||||
// We ALWAYS attempt to upgrade to Preview Model here.
|
||||
// If we are in fallback mode, the 'previewModelBypassMode' flag (handled in handler.ts/geminiChat.ts)
|
||||
// will ensure we downgrade to 2.5 Pro for the actual API call if needed.
|
||||
// This allows us to "probe" Preview Model periodically (i.e., every new request tries Preview Model first).
|
||||
decision.model = PREVIEW_GEMINI_MODEL;
|
||||
decision.metadata.source += ' (Preview Model)';
|
||||
decision.metadata.reasoning += ' (Upgraded to Preview Model)';
|
||||
}
|
||||
|
||||
const event = new ModelRoutingEvent(
|
||||
decision.model,
|
||||
decision.metadata.source,
|
||||
|
||||
@@ -16,6 +16,7 @@ import {
|
||||
import {
|
||||
DEFAULT_GEMINI_FLASH_MODEL,
|
||||
DEFAULT_GEMINI_MODEL,
|
||||
DEFAULT_GEMINI_MODEL_AUTO,
|
||||
} from '../../config/models.js';
|
||||
import { promptIdContext } from '../../utils/promptIdContext.js';
|
||||
import type { Content } from '@google/genai';
|
||||
@@ -50,6 +51,7 @@ describe('ClassifierStrategy', () => {
|
||||
modelConfigService: {
|
||||
getResolvedConfig: vi.fn().mockReturnValue(mockResolvedConfig),
|
||||
},
|
||||
getModel: () => DEFAULT_GEMINI_MODEL_AUTO,
|
||||
getPreviewFeatures: () => false,
|
||||
} as unknown as Config;
|
||||
mockBaseLlmClient = {
|
||||
|
||||
@@ -12,11 +12,7 @@ import type {
|
||||
RoutingDecision,
|
||||
RoutingStrategy,
|
||||
} from '../routingStrategy.js';
|
||||
import {
|
||||
GEMINI_MODEL_ALIAS_FLASH,
|
||||
GEMINI_MODEL_ALIAS_PRO,
|
||||
resolveModel,
|
||||
} from '../../config/models.js';
|
||||
import { resolveClassifierModel } from '../../config/models.js';
|
||||
import { createUserContent, Type } from '@google/genai';
|
||||
import type { Config } from '../../config/config.js';
|
||||
import {
|
||||
@@ -171,32 +167,20 @@ export class ClassifierStrategy implements RoutingStrategy {
|
||||
|
||||
const reasoning = routerResponse.reasoning;
|
||||
const latencyMs = Date.now() - startTime;
|
||||
const selectedModel = resolveClassifierModel(
|
||||
config.getModel(),
|
||||
routerResponse.model_choice,
|
||||
config.getPreviewFeatures(),
|
||||
);
|
||||
|
||||
if (routerResponse.model_choice === FLASH_MODEL) {
|
||||
return {
|
||||
model: resolveModel(
|
||||
GEMINI_MODEL_ALIAS_FLASH,
|
||||
config.getPreviewFeatures(),
|
||||
),
|
||||
metadata: {
|
||||
source: 'Classifier',
|
||||
latencyMs,
|
||||
reasoning,
|
||||
},
|
||||
};
|
||||
} else {
|
||||
return {
|
||||
model: resolveModel(
|
||||
GEMINI_MODEL_ALIAS_PRO,
|
||||
config.getPreviewFeatures(),
|
||||
),
|
||||
metadata: {
|
||||
source: 'Classifier',
|
||||
reasoning,
|
||||
latencyMs,
|
||||
},
|
||||
};
|
||||
}
|
||||
return {
|
||||
model: selectedModel,
|
||||
metadata: {
|
||||
source: 'Classifier',
|
||||
latencyMs,
|
||||
reasoning,
|
||||
},
|
||||
};
|
||||
} catch (error) {
|
||||
// If the classifier fails for any reason (API error, parsing error, etc.),
|
||||
// we log it and return null to allow the composite strategy to proceed.
|
||||
|
||||
@@ -4,87 +4,118 @@
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
import { describe, it, expect } from 'vitest';
|
||||
import { describe, it, expect, vi, beforeEach } from 'vitest';
|
||||
import { FallbackStrategy } from './fallbackStrategy.js';
|
||||
import type { RoutingContext } from '../routingStrategy.js';
|
||||
import type { BaseLlmClient } from '../../core/baseLlmClient.js';
|
||||
import type { Config } from '../../config/config.js';
|
||||
import type { ModelAvailabilityService } from '../../availability/modelAvailabilityService.js';
|
||||
import {
|
||||
DEFAULT_GEMINI_MODEL,
|
||||
DEFAULT_GEMINI_FLASH_MODEL,
|
||||
DEFAULT_GEMINI_FLASH_LITE_MODEL,
|
||||
DEFAULT_GEMINI_MODEL_AUTO,
|
||||
} from '../../config/models.js';
|
||||
import { selectModelForAvailability } from '../../availability/policyHelpers.js';
|
||||
|
||||
vi.mock('../../availability/policyHelpers.js', () => ({
|
||||
selectModelForAvailability: vi.fn(),
|
||||
}));
|
||||
|
||||
const createMockConfig = (overrides: Partial<Config> = {}): Config =>
|
||||
({
|
||||
isModelAvailabilityServiceEnabled: vi.fn().mockReturnValue(true),
|
||||
getModelAvailabilityService: vi.fn(),
|
||||
getModel: vi.fn().mockReturnValue(DEFAULT_GEMINI_MODEL),
|
||||
getPreviewFeatures: vi.fn().mockReturnValue(false),
|
||||
...overrides,
|
||||
}) as unknown as Config;
|
||||
|
||||
describe('FallbackStrategy', () => {
|
||||
const strategy = new FallbackStrategy();
|
||||
const mockContext = {} as RoutingContext;
|
||||
const mockClient = {} as BaseLlmClient;
|
||||
let mockService: ModelAvailabilityService;
|
||||
let mockConfig: Config;
|
||||
|
||||
it('should return null when not in fallback mode', async () => {
|
||||
const mockConfig = {
|
||||
isInFallbackMode: () => false,
|
||||
getModel: () => DEFAULT_GEMINI_MODEL,
|
||||
getPreviewFeatures: () => false,
|
||||
} as Config;
|
||||
beforeEach(() => {
|
||||
vi.resetAllMocks();
|
||||
|
||||
mockService = {
|
||||
snapshot: vi.fn(),
|
||||
} as unknown as ModelAvailabilityService;
|
||||
|
||||
mockConfig = createMockConfig({
|
||||
getModelAvailabilityService: vi.fn().mockReturnValue(mockService),
|
||||
});
|
||||
});
|
||||
|
||||
it('should return null if service is disabled', async () => {
|
||||
vi.mocked(mockConfig.isModelAvailabilityServiceEnabled).mockReturnValue(
|
||||
false,
|
||||
);
|
||||
|
||||
const decision = await strategy.route(mockContext, mockConfig, mockClient);
|
||||
expect(decision).toBeNull();
|
||||
});
|
||||
|
||||
describe('when in fallback mode', () => {
|
||||
it('should downgrade a pro model to the flash model', async () => {
|
||||
const mockConfig = {
|
||||
isInFallbackMode: () => true,
|
||||
getModel: () => DEFAULT_GEMINI_MODEL,
|
||||
getPreviewFeatures: () => false,
|
||||
} as Config;
|
||||
it('should return null if the requested model is available', async () => {
|
||||
// Mock snapshot to return available
|
||||
vi.mocked(mockService.snapshot).mockReturnValue({ available: true });
|
||||
|
||||
const decision = await strategy.route(
|
||||
mockContext,
|
||||
mockConfig,
|
||||
mockClient,
|
||||
);
|
||||
const decision = await strategy.route(mockContext, mockConfig, mockClient);
|
||||
expect(decision).toBeNull();
|
||||
// Should check availability of the resolved model (DEFAULT_GEMINI_MODEL)
|
||||
expect(mockService.snapshot).toHaveBeenCalledWith(DEFAULT_GEMINI_MODEL);
|
||||
});
|
||||
|
||||
expect(decision).not.toBeNull();
|
||||
expect(decision?.model).toBe(DEFAULT_GEMINI_FLASH_MODEL);
|
||||
expect(decision?.metadata.source).toBe('fallback');
|
||||
expect(decision?.metadata.reasoning).toContain('In fallback mode');
|
||||
it('should return null if fallback selection is same as requested model', async () => {
|
||||
// Mock snapshot to return unavailable
|
||||
vi.mocked(mockService.snapshot).mockReturnValue({
|
||||
available: false,
|
||||
reason: 'quota',
|
||||
});
|
||||
// Mock selectModelForAvailability to return the SAME model (no fallback found)
|
||||
vi.mocked(selectModelForAvailability).mockReturnValue({
|
||||
selectedModel: DEFAULT_GEMINI_MODEL,
|
||||
skipped: [],
|
||||
});
|
||||
|
||||
it('should honor a lite model request', async () => {
|
||||
const mockConfig = {
|
||||
isInFallbackMode: () => true,
|
||||
getModel: () => DEFAULT_GEMINI_FLASH_LITE_MODEL,
|
||||
getPreviewFeatures: () => false,
|
||||
} as Config;
|
||||
const decision = await strategy.route(mockContext, mockConfig, mockClient);
|
||||
expect(decision).toBeNull();
|
||||
});
|
||||
|
||||
const decision = await strategy.route(
|
||||
mockContext,
|
||||
mockConfig,
|
||||
mockClient,
|
||||
);
|
||||
|
||||
expect(decision).not.toBeNull();
|
||||
expect(decision?.model).toBe(DEFAULT_GEMINI_FLASH_LITE_MODEL);
|
||||
expect(decision?.metadata.source).toBe('fallback');
|
||||
it('should return fallback decision if model is unavailable and fallback found', async () => {
|
||||
// Mock snapshot to return unavailable
|
||||
vi.mocked(mockService.snapshot).mockReturnValue({
|
||||
available: false,
|
||||
reason: 'quota',
|
||||
});
|
||||
|
||||
it('should use the flash model if flash is requested', async () => {
|
||||
const mockConfig = {
|
||||
isInFallbackMode: () => true,
|
||||
getModel: () => DEFAULT_GEMINI_FLASH_MODEL,
|
||||
getPreviewFeatures: () => false,
|
||||
} as Config;
|
||||
|
||||
const decision = await strategy.route(
|
||||
mockContext,
|
||||
mockConfig,
|
||||
mockClient,
|
||||
);
|
||||
|
||||
expect(decision).not.toBeNull();
|
||||
expect(decision?.model).toBe(DEFAULT_GEMINI_FLASH_MODEL);
|
||||
expect(decision?.metadata.source).toBe('fallback');
|
||||
// Mock selectModelForAvailability to find a fallback (Flash)
|
||||
vi.mocked(selectModelForAvailability).mockReturnValue({
|
||||
selectedModel: DEFAULT_GEMINI_FLASH_MODEL,
|
||||
skipped: [{ model: DEFAULT_GEMINI_MODEL, reason: 'quota' }],
|
||||
});
|
||||
|
||||
const decision = await strategy.route(mockContext, mockConfig, mockClient);
|
||||
|
||||
expect(decision).not.toBeNull();
|
||||
expect(decision?.model).toBe(DEFAULT_GEMINI_FLASH_MODEL);
|
||||
expect(decision?.metadata.source).toBe('fallback');
|
||||
expect(decision?.metadata.reasoning).toContain(
|
||||
`Model ${DEFAULT_GEMINI_MODEL} is unavailable`,
|
||||
);
|
||||
});
|
||||
|
||||
it('should correctly handle "auto" alias by resolving it before checking availability', async () => {
|
||||
// Mock snapshot to return available for the RESOLVED model
|
||||
vi.mocked(mockService.snapshot).mockReturnValue({ available: true });
|
||||
vi.mocked(mockConfig.getModel).mockReturnValue(DEFAULT_GEMINI_MODEL_AUTO);
|
||||
|
||||
const decision = await strategy.route(mockContext, mockConfig, mockClient);
|
||||
|
||||
expect(decision).toBeNull();
|
||||
// Important: check that it queried snapshot with the RESOLVED model, not 'auto'
|
||||
expect(mockService.snapshot).toHaveBeenCalledWith(DEFAULT_GEMINI_MODEL);
|
||||
});
|
||||
});
|
||||
|
||||
@@ -4,8 +4,9 @@
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
import { selectModelForAvailability } from '../../availability/policyHelpers.js';
|
||||
import type { Config } from '../../config/config.js';
|
||||
import { getEffectiveModel } from '../../config/models.js';
|
||||
import { resolveModel } from '../../config/models.js';
|
||||
import type { BaseLlmClient } from '../../core/baseLlmClient.js';
|
||||
import type {
|
||||
RoutingContext,
|
||||
@@ -21,24 +22,38 @@ export class FallbackStrategy implements RoutingStrategy {
|
||||
config: Config,
|
||||
_baseLlmClient: BaseLlmClient,
|
||||
): Promise<RoutingDecision | null> {
|
||||
const isInFallbackMode: boolean = config.isInFallbackMode();
|
||||
|
||||
if (!isInFallbackMode) {
|
||||
if (!config.isModelAvailabilityServiceEnabled()) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const effectiveModel = getEffectiveModel(
|
||||
isInFallbackMode,
|
||||
config.getModel(),
|
||||
const requestedModel = config.getModel();
|
||||
const resolvedModel = resolveModel(
|
||||
requestedModel,
|
||||
config.getPreviewFeatures(),
|
||||
);
|
||||
return {
|
||||
model: effectiveModel,
|
||||
metadata: {
|
||||
source: this.name,
|
||||
latencyMs: 0,
|
||||
reasoning: `In fallback mode. Using: ${effectiveModel}`,
|
||||
},
|
||||
};
|
||||
const service = config.getModelAvailabilityService();
|
||||
const snapshot = service.snapshot(resolvedModel);
|
||||
|
||||
if (snapshot.available) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const selection = selectModelForAvailability(config, requestedModel);
|
||||
|
||||
if (
|
||||
selection?.selectedModel &&
|
||||
selection.selectedModel !== requestedModel
|
||||
) {
|
||||
return {
|
||||
model: selection.selectedModel,
|
||||
metadata: {
|
||||
source: this.name,
|
||||
latencyMs: 0,
|
||||
reasoning: `Model ${requestedModel} is unavailable (${snapshot.reason}). Using fallback: ${selection.selectedModel}`,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -7,7 +7,8 @@
|
||||
import type { Config } from '../../config/config.js';
|
||||
import {
|
||||
DEFAULT_GEMINI_MODEL_AUTO,
|
||||
resolveModel,
|
||||
getEffectiveModel,
|
||||
PREVIEW_GEMINI_MODEL_AUTO,
|
||||
} from '../../config/models.js';
|
||||
import type { BaseLlmClient } from '../../core/baseLlmClient.js';
|
||||
import type {
|
||||
@@ -30,11 +31,15 @@ export class OverrideStrategy implements RoutingStrategy {
|
||||
const overrideModel = config.getModel();
|
||||
|
||||
// If the model is 'auto' we should pass to the next strategy.
|
||||
if (overrideModel === DEFAULT_GEMINI_MODEL_AUTO) return null;
|
||||
if (
|
||||
overrideModel === DEFAULT_GEMINI_MODEL_AUTO ||
|
||||
overrideModel === PREVIEW_GEMINI_MODEL_AUTO
|
||||
)
|
||||
return null;
|
||||
|
||||
// Return the overridden model name.
|
||||
return {
|
||||
model: resolveModel(overrideModel, config.getPreviewFeatures()),
|
||||
model: getEffectiveModel(overrideModel, config.getPreviewFeatures()),
|
||||
metadata: {
|
||||
source: this.name,
|
||||
latencyMs: 0,
|
||||
|
||||
Reference in New Issue
Block a user