mirror of
https://github.com/google-gemini/gemini-cli.git
synced 2026-06-12 12:26:57 -07:00
perf(core): skip model routing classification when redundant
This commit is contained in:
@@ -7,5 +7,8 @@
|
||||
},
|
||||
"general": {
|
||||
"devtools": true
|
||||
},
|
||||
"model": {
|
||||
"gemma4Variant": "gemma-4-31b-it"
|
||||
}
|
||||
}
|
||||
|
||||
@@ -383,6 +383,56 @@ describe('ClassifierStrategy', () => {
|
||||
expect(decision?.model).toBe(DEFAULT_GEMINI_FLASH_MODEL);
|
||||
});
|
||||
|
||||
it('should skip classification if both pro and flash resolve to the same model', async () => {
|
||||
// We mock the config to trigger the fast path by returning a specific model
|
||||
// that the router will see as identical for both 'pro' and 'flash' tiers.
|
||||
vi.mocked(mockConfig.getModel).mockReturnValue(DEFAULT_GEMINI_MODEL_AUTO);
|
||||
|
||||
// By overriding the modelConfigService, we can simulate gemma4Variant
|
||||
// or any other scenario where both tiers resolve to the same target model.
|
||||
const mockResolveClassifierModelId = vi
|
||||
.fn()
|
||||
.mockReturnValue('gemma-4-31b-it');
|
||||
Object.defineProperty(
|
||||
mockConfig.modelConfigService,
|
||||
'resolveClassifierModelId',
|
||||
{
|
||||
value: mockResolveClassifierModelId,
|
||||
writable: true,
|
||||
},
|
||||
);
|
||||
|
||||
// We also need to mock config.getExperimentalDynamicModelConfiguration()
|
||||
// if that is what resolveClassifierModel uses. Since resolveClassifierModel
|
||||
// is a standalone function, we can mock its behavior indirectly via config.
|
||||
Object.defineProperty(
|
||||
mockConfig,
|
||||
'getExperimentalDynamicModelConfiguration',
|
||||
{
|
||||
value: vi.fn().mockReturnValue(true),
|
||||
writable: true,
|
||||
},
|
||||
);
|
||||
|
||||
const decision = await strategy.route(
|
||||
mockContext,
|
||||
mockConfig,
|
||||
mockBaseLlmClient,
|
||||
mockLocalLiteRtLmClient,
|
||||
);
|
||||
|
||||
expect(decision).toEqual({
|
||||
model: 'gemma-4-31b-it',
|
||||
metadata: {
|
||||
source: 'classifier',
|
||||
latencyMs: 0,
|
||||
reasoning:
|
||||
'Skipped classification because both tiers resolve to the same model: gemma-4-31b-it',
|
||||
},
|
||||
});
|
||||
expect(mockBaseLlmClient.generateJson).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
describe('Gemini 3.1 and Custom Tools Routing', () => {
|
||||
it('should route to PREVIEW_GEMINI_3_1_MODEL when Gemini 3.1 is launched', async () => {
|
||||
vi.mocked(mockConfig.getGemini31Launched).mockResolvedValue(true);
|
||||
|
||||
@@ -137,6 +137,47 @@ export class ClassifierStrategy implements RoutingStrategy {
|
||||
const startTime = Date.now();
|
||||
try {
|
||||
const model = context.requestedModel ?? config.getModel();
|
||||
|
||||
const [useGemini3_1, useGemini3_1FlashLite, useCustomToolModel] =
|
||||
await Promise.all([
|
||||
config.getGemini31Launched(),
|
||||
config.getGemini31FlashLiteLaunched(),
|
||||
config.getUseCustomToolModel(),
|
||||
]);
|
||||
|
||||
const hasAccessToPreview = config.getHasAccessToPreviewModel?.() ?? true;
|
||||
|
||||
// Check if classification is redundant (i.e., both tiers resolve to the same model)
|
||||
const proModel = resolveClassifierModel(
|
||||
model,
|
||||
'pro',
|
||||
useGemini3_1,
|
||||
useGemini3_1FlashLite,
|
||||
useCustomToolModel,
|
||||
hasAccessToPreview,
|
||||
config,
|
||||
);
|
||||
const flashModel = resolveClassifierModel(
|
||||
model,
|
||||
'flash',
|
||||
useGemini3_1,
|
||||
useGemini3_1FlashLite,
|
||||
useCustomToolModel,
|
||||
hasAccessToPreview,
|
||||
config,
|
||||
);
|
||||
|
||||
if (proModel === flashModel) {
|
||||
return {
|
||||
model: proModel,
|
||||
metadata: {
|
||||
source: this.name,
|
||||
latencyMs: 0,
|
||||
reasoning: `Skipped classification because both tiers resolve to the same model: ${proModel}`,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
if (
|
||||
(await config.getNumericalRoutingEnabled()) &&
|
||||
isGemini3Model(model, config)
|
||||
@@ -171,19 +212,13 @@ export class ClassifierStrategy implements RoutingStrategy {
|
||||
|
||||
const reasoning = routerResponse.reasoning;
|
||||
const latencyMs = Date.now() - startTime;
|
||||
const [useGemini3_1, useGemini3_1FlashLite, useCustomToolModel] =
|
||||
await Promise.all([
|
||||
config.getGemini31Launched(),
|
||||
config.getGemini31FlashLiteLaunched(),
|
||||
config.getUseCustomToolModel(),
|
||||
]);
|
||||
const selectedModel = resolveClassifierModel(
|
||||
model,
|
||||
routerResponse.model_choice,
|
||||
useGemini3_1,
|
||||
useGemini3_1FlashLite,
|
||||
useCustomToolModel,
|
||||
config.getHasAccessToPreviewModel?.() ?? true,
|
||||
hasAccessToPreview,
|
||||
config,
|
||||
);
|
||||
|
||||
|
||||
@@ -32,6 +32,9 @@ describe('GemmaClassifierStrategy', () => {
|
||||
mockGenerateJson = vi.fn();
|
||||
|
||||
mockConfig = {
|
||||
modelConfigService: {
|
||||
resolveClassifierModelId: vi.fn(),
|
||||
},
|
||||
getGemmaModelRouterSettings: vi.fn().mockReturnValue({
|
||||
enabled: true,
|
||||
classifier: { model: 'gemma3-1b-gpu-custom' },
|
||||
@@ -83,6 +86,44 @@ describe('GemmaClassifierStrategy', () => {
|
||||
).rejects.toThrow('Only gemma3-1b-gpu-custom has been tested');
|
||||
});
|
||||
|
||||
it('should skip classification if both pro and flash resolve to the same model', async () => {
|
||||
// Setup the mock config to use the dynamic model config service.
|
||||
Object.defineProperty(
|
||||
mockConfig,
|
||||
'getExperimentalDynamicModelConfiguration',
|
||||
{
|
||||
value: vi.fn().mockReturnValue(true),
|
||||
writable: true,
|
||||
},
|
||||
);
|
||||
Object.defineProperty(
|
||||
mockConfig.modelConfigService,
|
||||
'resolveClassifierModelId',
|
||||
{
|
||||
value: vi.fn().mockReturnValue('gemma-4-31b-it'),
|
||||
writable: true,
|
||||
},
|
||||
);
|
||||
|
||||
const decision = await strategy.route(
|
||||
mockContext,
|
||||
mockConfig,
|
||||
mockBaseLlmClient,
|
||||
mockLocalLiteRtLmClient,
|
||||
);
|
||||
|
||||
expect(decision).toEqual({
|
||||
model: 'gemma-4-31b-it',
|
||||
metadata: {
|
||||
source: 'gemma-classifier',
|
||||
latencyMs: 0,
|
||||
reasoning:
|
||||
'Skipped classification because both tiers resolve to the same model: gemma-4-31b-it',
|
||||
},
|
||||
});
|
||||
expect(mockGenerateJson).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('should call generateJson with the correct parameters', async () => {
|
||||
const mockApiResponse = {
|
||||
reasoning: 'Simple task',
|
||||
|
||||
@@ -178,6 +178,39 @@ ${formattedHistory}
|
||||
return null;
|
||||
}
|
||||
|
||||
const model = context.requestedModel ?? config.getModel();
|
||||
|
||||
// Check if classification is redundant (i.e., both tiers resolve to the same model)
|
||||
const proModel = resolveClassifierModel(
|
||||
model,
|
||||
'pro',
|
||||
false, // useGemini3_1
|
||||
false, // useGemini3_1FlashLite
|
||||
false, // useCustomToolModel
|
||||
true, // hasAccessToPreview
|
||||
config,
|
||||
);
|
||||
const flashModel = resolveClassifierModel(
|
||||
model,
|
||||
'flash',
|
||||
false, // useGemini3_1
|
||||
false, // useGemini3_1FlashLite
|
||||
false, // useCustomToolModel
|
||||
true, // hasAccessToPreview
|
||||
config,
|
||||
);
|
||||
|
||||
if (proModel === flashModel) {
|
||||
return {
|
||||
model: proModel,
|
||||
metadata: {
|
||||
source: this.name,
|
||||
latencyMs: 0,
|
||||
reasoning: `Skipped classification because both tiers resolve to the same model: ${proModel}`,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
// Only the gemma3-1b-gpu-custom model has been tested and verified.
|
||||
if (gemmaRouterSettings.classifier?.model !== 'gemma3-1b-gpu-custom') {
|
||||
throw new Error('Only gemma3-1b-gpu-custom has been tested');
|
||||
@@ -210,8 +243,13 @@ ${formattedHistory}
|
||||
const reasoning = routerResponse.reasoning;
|
||||
const latencyMs = Date.now() - startTime;
|
||||
const selectedModel = resolveClassifierModel(
|
||||
context.requestedModel ?? config.getModel(),
|
||||
model,
|
||||
routerResponse.model_choice,
|
||||
false, // useGemini3_1
|
||||
false, // useGemini3_1FlashLite
|
||||
false, // useCustomToolModel
|
||||
true, // hasAccessToPreview
|
||||
config,
|
||||
);
|
||||
|
||||
return {
|
||||
|
||||
Reference in New Issue
Block a user