feat(routing): availability-aware auto-routing with best-effort pro

Adds settings and logic to detect slow/hanging Pro model requests, marking them as temporarily unavailable and automatically triggering a fallback to Flash. Introduces a proTimeoutMinutes and bestEffortPro strategy configuration.
This commit is contained in:
jacob314
2026-04-23 13:50:24 -07:00
parent ff28d55100
commit 19601955fd
15 changed files with 269 additions and 44 deletions
+3 -32
View File
@@ -449,8 +449,7 @@
"version": "2.11.0",
"resolved": "https://registry.npmjs.org/@bufbuild/protobuf/-/protobuf-2.11.0.tgz",
"integrity": "sha512-sBXGT13cpmPR5BMgHE6UEEfEaShh5Ror6rfN3yEK5si7QVrtZg8LEPQb0VVhiLRUslD2yLnXtnRzG035J/mZXQ==",
"license": "(Apache-2.0 AND BSD-3-Clause)",
"peer": true
"license": "(Apache-2.0 AND BSD-3-Clause)"
},
"node_modules/@bundled-es-modules/cookie": {
"version": "2.0.1",
@@ -1474,7 +1473,6 @@
"resolved": "https://registry.npmjs.org/@grpc/grpc-js/-/grpc-js-1.13.4.tgz",
"integrity": "sha512-GsFaMXCkMqkKIvwCQjCrwH+GHbPKBjhwo/8ZuUkWHqbI73Kky9I+pQltrlT0+MWpedCoosda53lgjYfyEPgxBg==",
"license": "Apache-2.0",
"peer": true,
"dependencies": {
"@grpc/proto-loader": "^0.7.13",
"@js-sdsl/ordered-map": "^4.4.2"
@@ -2152,7 +2150,6 @@
"integrity": "sha512-t54CUOsFMappY1Jbzb7fetWeO0n6K0k/4+/ZpkS+3Joz8I4VcvY9OiEBFRYISqaI2fq5sCiPtAjRDOzVYG8m+Q==",
"dev": true,
"license": "MIT",
"peer": true,
"dependencies": {
"@octokit/auth-token": "^6.0.0",
"@octokit/graphql": "^9.0.2",
@@ -2333,7 +2330,6 @@
"resolved": "https://registry.npmjs.org/@opentelemetry/api/-/api-1.9.0.tgz",
"integrity": "sha512-3giAOQvZiH5F9bMlMiv8+GSPMeqg0dbaeo58/0SlA9sxSqZhnUtxzX9/2FzyhS9sWQf5S0GJE0AKBrFqjpeYcg==",
"license": "Apache-2.0",
"peer": true,
"engines": {
"node": ">=8.0.0"
}
@@ -2383,7 +2379,6 @@
"resolved": "https://registry.npmjs.org/@opentelemetry/core/-/core-2.5.0.tgz",
"integrity": "sha512-ka4H8OM6+DlUhSAZpONu0cPBtPPTQKxbxVzC4CzVx5+K4JnroJVBtDzLAMx4/3CDTJXRvVFhpFjtl4SaiTNoyQ==",
"license": "Apache-2.0",
"peer": true,
"dependencies": {
"@opentelemetry/semantic-conventions": "^1.29.0"
},
@@ -2758,7 +2753,6 @@
"resolved": "https://registry.npmjs.org/@opentelemetry/resources/-/resources-2.5.0.tgz",
"integrity": "sha512-F8W52ApePshpoSrfsSk1H2yJn9aKjCrbpQF1M9Qii0GHzbfVeFUB+rc3X4aggyZD8x9Gu3Slua+s6krmq6Dt8g==",
"license": "Apache-2.0",
"peer": true,
"dependencies": {
"@opentelemetry/core": "2.5.0",
"@opentelemetry/semantic-conventions": "^1.29.0"
@@ -2792,7 +2786,6 @@
"resolved": "https://registry.npmjs.org/@opentelemetry/sdk-metrics/-/sdk-metrics-2.5.0.tgz",
"integrity": "sha512-BeJLtU+f5Gf905cJX9vXFQorAr6TAfK3SPvTFqP+scfIpDQEJfRaGJWta7sJgP+m4dNtBf9y3yvBKVAZZtJQVA==",
"license": "Apache-2.0",
"peer": true,
"dependencies": {
"@opentelemetry/core": "2.5.0",
"@opentelemetry/resources": "2.5.0"
@@ -2847,7 +2840,6 @@
"resolved": "https://registry.npmjs.org/@opentelemetry/sdk-trace-base/-/sdk-trace-base-2.5.0.tgz",
"integrity": "sha512-VzRf8LzotASEyNDUxTdaJ9IRJ1/h692WyArDBInf5puLCjxbICD6XkHgpuudis56EndyS7LYFmtTMny6UABNdQ==",
"license": "Apache-2.0",
"peer": true,
"dependencies": {
"@opentelemetry/core": "2.5.0",
"@opentelemetry/resources": "2.5.0",
@@ -4054,7 +4046,6 @@
"integrity": "sha512-6mDvHUFSjyT2B2yeNx2nUgMxh9LtOWvkhIU3uePn2I2oyNymUAX1NIsdgviM4CH+JSrp2D2hsMvJOkxY+0wNRA==",
"devOptional": true,
"license": "MIT",
"peer": true,
"dependencies": {
"csstype": "^3.0.2"
}
@@ -4328,7 +4319,6 @@
"integrity": "sha512-/Zb/xaIDfxeJnvishjGdcR4jmr7S+bda8PKNhRGdljDM+elXhlvN0FyPSsMnLmJUrVG9aPO6dof80wjMawsASg==",
"dev": true,
"license": "MIT",
"peer": true,
"dependencies": {
"@typescript-eslint/scope-manager": "8.58.2",
"@typescript-eslint/types": "8.58.2",
@@ -5073,7 +5063,6 @@
"resolved": "https://registry.npmjs.org/acorn/-/acorn-8.15.0.tgz",
"integrity": "sha512-NZyJarBfL7nWwIq+FDL6Zp/yHEhePMNnnJ0y3qfieCrmNvYct8uvtiV41UvlSe6apAfk0fY1FbWx+NwfmpvtTg==",
"license": "MIT",
"peer": true,
"bin": {
"acorn": "bin/acorn"
},
@@ -7151,8 +7140,7 @@
"version": "0.0.1581282",
"resolved": "https://registry.npmjs.org/devtools-protocol/-/devtools-protocol-0.0.1581282.tgz",
"integrity": "sha512-nv7iKtNZQshSW2hKzYNr46nM/Cfh5SEvE2oV0/SEGgc9XupIY5ggf84Cz8eJIkBce7S3bmTAauFD6aysMpnqsQ==",
"license": "BSD-3-Clause",
"peer": true
"license": "BSD-3-Clause"
},
"node_modules/dezalgo": {
"version": "1.0.4",
@@ -7737,7 +7725,6 @@
"integrity": "sha512-GsGizj2Y1rCWDu6XoEekL3RLilp0voSePurjZIkxL3wlm5o5EC9VpgaP7lrCvjnkuLvzFBQWB3vWB3K5KQTveQ==",
"dev": true,
"license": "MIT",
"peer": true,
"dependencies": {
"@eslint-community/eslint-utils": "^4.2.0",
"@eslint-community/regexpp": "^4.12.1",
@@ -8255,7 +8242,6 @@
"resolved": "https://registry.npmjs.org/express/-/express-5.2.1.tgz",
"integrity": "sha512-hIS4idWWai69NezIdRt2xFVofaF4j+6INOpJlVOLDO8zXGpUVEVzIYk12UUi2JzjEzWL3IOAxcTubgz9Po0yXw==",
"license": "MIT",
"peer": true,
"dependencies": {
"accepts": "^2.0.0",
"body-parser": "^2.2.1",
@@ -9522,7 +9508,6 @@
"resolved": "https://registry.npmjs.org/hono/-/hono-4.12.12.tgz",
"integrity": "sha512-p1JfQMKaceuCbpJKAPKVqyqviZdS0eUxH9v82oWo1kb9xjQ5wA6iP3FNVAPDFlz5/p7d45lO+BpSk1tuSZMF4Q==",
"license": "MIT",
"peer": true,
"engines": {
"node": ">=16.9.0"
}
@@ -9782,7 +9767,6 @@
"resolved": "https://registry.npmjs.org/@jrichman/ink/-/ink-6.6.9.tgz",
"integrity": "sha512-RL9sSiLQZECnjbmBwjIHOp8yVGdWF7C/uifg7ISv/e+F3nLNsfl7FdUFQs8iZARFMJAYxMFpxW6OW+HSt9drwQ==",
"license": "MIT",
"peer": true,
"dependencies": {
"ansi-escapes": "^7.0.0",
"ansi-styles": "^6.2.3",
@@ -13496,7 +13480,6 @@
"resolved": "https://registry.npmjs.org/react/-/react-19.2.4.tgz",
"integrity": "sha512-9nfp2hYpCwOjAN+8TZFGhtWEwgvWHXqESH8qT89AT/lWklpLON22Lc8pEtnpsZz7VmawabSU0gCjnj8aC0euHQ==",
"license": "MIT",
"peer": true,
"engines": {
"node": ">=0.10.0"
}
@@ -13507,7 +13490,6 @@
"integrity": "sha512-ePrwPfxAnB+7hgnEr8vpKxL9cmnp7F322t8oqcPshbIQQhDKgFDW4tjhF2wjVbdXF9O/nyuy3sQWd9JGpiLPvA==",
"devOptional": true,
"license": "MIT",
"peer": true,
"dependencies": {
"shell-quote": "^1.6.1",
"ws": "^7"
@@ -15627,7 +15609,6 @@
"resolved": "https://registry.npmjs.org/picomatch/-/picomatch-4.0.4.tgz",
"integrity": "sha512-QP88BAKvMam/3NxH6vj2o21R6MjxZUAd6nlwAS/pnGvN9IVLocLHxGYIzFhg6fUQ+5th6P4dv4eW9jX3DSIj7A==",
"license": "MIT",
"peer": true,
"engines": {
"node": ">=12"
},
@@ -15850,8 +15831,7 @@
"version": "2.8.1",
"resolved": "https://registry.npmjs.org/tslib/-/tslib-2.8.1.tgz",
"integrity": "sha512-oJFu94HQb+KVduSUQL7wnpmqnfmLsOA/nAh6b6EH0wCEoK0/mPeXU6c3wKDV83MkOuHPRHtSXKKU99IBazS/2w==",
"license": "0BSD",
"peer": true
"license": "0BSD"
},
"node_modules/tsx": {
"version": "4.20.3",
@@ -15859,7 +15839,6 @@
"integrity": "sha512-qjbnuR9Tr+FJOMBqJCW5ehvIo/buZq7vH7qD7JziU98h6l3qGy0a/yPFjwO+y0/T7GFpNgNAvEcPPVfyT8rrPQ==",
"devOptional": true,
"license": "MIT",
"peer": true,
"dependencies": {
"esbuild": "~0.25.0",
"get-tsconfig": "^4.7.5"
@@ -16025,7 +16004,6 @@
"integrity": "sha512-p1diW6TqL9L07nNxvRMM7hMMw4c5XOo/1ibL4aAIGmSAt9slTE1Xgw5KWuof2uTOvCg9BY7ZRi+GaF+7sfgPeQ==",
"devOptional": true,
"license": "Apache-2.0",
"peer": true,
"bin": {
"tsc": "bin/tsc",
"tsserver": "bin/tsserver"
@@ -16093,7 +16071,6 @@
"integrity": "sha512-6sMvZePQrnZH2/cJkwRpkT7DxoAWh+g6+GFRK6bV3YQo7ogi3SX5rgF6099r5Q53Ma5qeT7LGmOmuIutF4t3lA==",
"dev": true,
"license": "MIT",
"peer": true,
"dependencies": {
"@typescript-eslint/scope-manager": "8.35.0",
"@typescript-eslint/types": "8.35.0",
@@ -16480,7 +16457,6 @@
"resolved": "https://registry.npmjs.org/vite/-/vite-7.3.2.tgz",
"integrity": "sha512-Bby3NOsna2jsjfLVOHKes8sGwgl4TT0E6vvpYgnAYDIF/tie7MRaFthmKuHx1NSXjiTueXH3do80FMQgvEktRg==",
"license": "MIT",
"peer": true,
"dependencies": {
"esbuild": "^0.27.0",
"fdir": "^6.5.0",
@@ -17051,7 +17027,6 @@
"resolved": "https://registry.npmjs.org/picomatch/-/picomatch-4.0.4.tgz",
"integrity": "sha512-QP88BAKvMam/3NxH6vj2o21R6MjxZUAd6nlwAS/pnGvN9IVLocLHxGYIzFhg6fUQ+5th6P4dv4eW9jX3DSIj7A==",
"license": "MIT",
"peer": true,
"engines": {
"node": ">=12"
},
@@ -17064,7 +17039,6 @@
"resolved": "https://registry.npmjs.org/vitest/-/vitest-3.2.4.tgz",
"integrity": "sha512-LUCP5ev3GURDysTWiP47wRRUpLKMOfPh+yKTx3kVIEiu5KOMeqzpnYNsKyOoVrULivR8tLcks4+lga33Whn90A==",
"license": "MIT",
"peer": true,
"dependencies": {
"@types/chai": "^5.2.2",
"@vitest/expect": "3.2.4",
@@ -17703,7 +17677,6 @@
"resolved": "https://registry.npmjs.org/zod/-/zod-3.25.76.tgz",
"integrity": "sha512-gzUt/qt81nXsFGKIFcC3YnfEAx5NkunCfnDlvuBSSFS02bcXu4Lmea0AFIUwbLWxWPx3d9p8S5QoaujKcNQxcQ==",
"license": "MIT",
"peer": true,
"funding": {
"url": "https://github.com/sponsors/colinhacks"
}
@@ -18139,7 +18112,6 @@
"resolved": "https://registry.npmjs.org/@grpc/grpc-js/-/grpc-js-1.14.3.tgz",
"integrity": "sha512-Iq8QQQ/7X3Sac15oB6p0FmUg/klxQvXLeileoqrTRGJYLV+/9tubbr9ipz0GKHjmXVsgFPo/+W+2cA8eNcR+XA==",
"license": "Apache-2.0",
"peer": true,
"dependencies": {
"@grpc/proto-loader": "^0.8.0",
"@js-sdsl/ordered-map": "^4.4.2"
@@ -18258,7 +18230,6 @@
"resolved": "https://registry.npmjs.org/picomatch/-/picomatch-4.0.4.tgz",
"integrity": "sha512-QP88BAKvMam/3NxH6vj2o21R6MjxZUAd6nlwAS/pnGvN9IVLocLHxGYIzFhg6fUQ+5th6P4dv4eW9jX3DSIj7A==",
"license": "MIT",
"peer": true,
"engines": {
"node": ">=12"
},
+1
View File
@@ -1044,6 +1044,7 @@ export async function loadCliConfig(
format: (argv.outputFormat ?? settings.output?.format) as OutputFormat,
},
gemmaModelRouter: settings.experimental?.gemmaModelRouter,
autoRouting: settings.model?.autoRouting,
adk: settings.experimental?.adk,
fakeResponses: argv.fakeResponses,
recordResponses: argv.recordResponses,
+40
View File
@@ -1112,6 +1112,46 @@ const SETTINGS_SCHEMA = {
description: 'Skip the next speaker check.',
showInDialog: true,
},
autoRouting: {
type: 'object',
label: 'Auto Routing',
category: 'Model',
requiresRestart: false,
default: {},
description: 'Settings for automatic model routing.',
showInDialog: false,
properties: {
bestEffortPro: {
type: 'boolean',
label: 'Best Effort Pro',
category: 'Model',
requiresRestart: false,
default: false,
description:
'Always prefer the Pro model unless it is unavailable (e.g., due to timeouts or quota), ignoring other routing hints.',
showInDialog: true,
},
proTimeoutMinutes: {
type: 'number',
label: 'Pro Timeout (Minutes)',
category: 'Model',
requiresRestart: false,
default: 5,
description:
'If a Pro request takes longer than this many minutes, it will be marked as temporarily unavailable and fallback to Flash.',
showInDialog: true,
},
proTimeoutFallbackDurationMinutes: {
type: 'number',
label: 'Pro Timeout Fallback Duration (Minutes)',
category: 'Model',
requiresRestart: false,
default: 60,
description: 'How long to route to Flash after Pro times out.',
showInDialog: true,
},
},
},
},
},
@@ -124,6 +124,9 @@ export const createMockConfig = (overrides: Partial<Config> = {}): Config =>
getCompressionThreshold: vi.fn().mockResolvedValue(undefined),
getUserCaching: vi.fn().mockResolvedValue(false),
getNumericalRoutingEnabled: vi.fn().mockResolvedValue(false),
getBestEffortProEnabled: vi.fn().mockResolvedValue(false),
getProTimeoutMinutes: vi.fn().mockResolvedValue(5),
getProTimeoutFallbackDurationMinutes: vi.fn().mockResolvedValue(60),
getClassifierThreshold: vi.fn().mockResolvedValue(undefined),
getBannerTextNoCapacityIssues: vi.fn().mockResolvedValue(''),
getBannerTextCapacityIssues: vi.fn().mockResolvedValue(''),
@@ -8,13 +8,15 @@ export type ModelId = string;
type TerminalUnavailabilityReason = 'quota' | 'capacity';
export type TurnUnavailabilityReason = 'retry_once_per_turn';
export type TemporaryUnavailabilityReason = 'timeout';
export type UnavailabilityReason =
| TerminalUnavailabilityReason
| TurnUnavailabilityReason
| TemporaryUnavailabilityReason
| 'unknown';
export type ModelHealthStatus = 'terminal' | 'sticky_retry';
export type ModelHealthStatus = 'terminal' | 'sticky_retry' | 'temporary';
type HealthState =
| { status: 'terminal'; reason: TerminalUnavailabilityReason }
@@ -22,6 +24,11 @@ type HealthState =
status: 'sticky_retry';
reason: TurnUnavailabilityReason;
consumed: boolean;
}
| {
status: 'temporary';
reason: TemporaryUnavailabilityReason;
untilMs: number;
};
export interface ModelAvailabilitySnapshot {
@@ -48,6 +55,18 @@ export class ModelAvailabilityService {
});
}
markTemporarilyUnavailable(
model: ModelId,
reason: TemporaryUnavailabilityReason,
durationMs: number,
) {
this.setState(model, {
status: 'temporary',
reason,
untilMs: Date.now() + durationMs,
});
}
markHealthy(model: ModelId) {
this.clearState(model);
}
@@ -95,6 +114,15 @@ export class ModelAvailabilityService {
return { available: false, reason: state.reason };
}
if (state.status === 'temporary') {
if (Date.now() < state.untilMs) {
return { available: false, reason: state.reason };
} else {
this.clearState(model);
return { available: true };
}
}
return { available: true };
}
+25
View File
@@ -679,6 +679,11 @@ export interface ConfigParameters {
policyUpdateConfirmationRequest?: PolicyUpdateConfirmationRequest;
output?: OutputSettings;
gemmaModelRouter?: GemmaModelRouterSettings;
autoRouting?: {
bestEffortPro?: boolean;
proTimeoutMinutes?: number;
proTimeoutFallbackDurationMinutes?: number;
};
adk?: ADKSettings;
disableModelRouterForAuth?: AuthType[];
continueOnFailedApiCall?: boolean;
@@ -963,6 +968,9 @@ export class Config implements McpContext, AgentLoopContext {
private readonly planEnabled: boolean;
private readonly trackerEnabled: boolean;
private readonly planModeRoutingEnabled: boolean;
private readonly autoRoutingBestEffortPro: boolean;
private readonly autoRoutingProTimeoutMinutes: number;
private readonly autoRoutingProTimeoutFallbackDurationMinutes: number;
private readonly modelSteering: boolean;
private memoryContextManager?: MemoryContextManager;
private readonly contextManagement: ContextManagementConfig;
@@ -1117,6 +1125,11 @@ export class Config implements McpContext, AgentLoopContext {
this.planEnabled = params.plan ?? true;
this.trackerEnabled = params.tracker ?? false;
this.planModeRoutingEnabled = params.planSettings?.modelRouting ?? true;
this.autoRoutingBestEffortPro = params.autoRouting?.bestEffortPro ?? false;
this.autoRoutingProTimeoutMinutes =
params.autoRouting?.proTimeoutMinutes ?? 5;
this.autoRoutingProTimeoutFallbackDurationMinutes =
params.autoRouting?.proTimeoutFallbackDurationMinutes ?? 60;
this.enableEventDrivenScheduler = params.enableEventDrivenScheduler ?? true;
this.skillsSupport = params.skillsSupport ?? true;
this.disabledSkills = params.disabledSkills ?? [];
@@ -3144,6 +3157,18 @@ export class Config implements McpContext, AgentLoopContext {
return flag?.boolValue ?? true;
}
async getBestEffortProEnabled(): Promise<boolean> {
return this.autoRoutingBestEffortPro;
}
async getProTimeoutMinutes(): Promise<number> {
return this.autoRoutingProTimeoutMinutes;
}
async getProTimeoutFallbackDurationMinutes(): Promise<number> {
return this.autoRoutingProTimeoutFallbackDurationMinutes;
}
/**
* Returns the resolved complexity threshold for routing.
* If a remote threshold is provided and within range (0-100), it is returned.
@@ -109,6 +109,8 @@ describe('BaseLlmClient', () => {
.mockReturnValue({ authType: AuthType.USE_GEMINI }),
getEmbeddingModel: vi.fn().mockReturnValue('test-embedding-model'),
isInteractive: vi.fn().mockReturnValue(false),
getProTimeoutMinutes: vi.fn().mockResolvedValue(5),
getProTimeoutFallbackDurationMinutes: vi.fn().mockResolvedValue(60),
modelConfigService: {
getResolvedConfig: vi
.fn()
+8
View File
@@ -325,11 +325,19 @@ export class BaseLlmClient {
);
};
const proTimeoutMinutes = await this.config.getProTimeoutMinutes();
const proTimeoutFallbackDurationMinutes =
await this.config.getProTimeoutFallbackDurationMinutes();
return await retryWithBackoff(apiCall, {
shouldRetryOnContent,
maxAttempts:
availabilityMaxAttempts ?? maxAttempts ?? DEFAULT_MAX_ATTEMPTS,
getAvailabilityContext,
timeoutFallback: {
timeoutMs: proTimeoutMinutes * 60 * 1000,
fallbackDurationMs: proTimeoutFallbackDurationMinutes * 60 * 1000,
},
onPersistent429: this.config.isInteractive()
? (authType, error) =>
handleFallback(this.config, currentModel, authType, error)
@@ -160,6 +160,8 @@ describe('GeminiChat', () => {
authType: 'oauth-personal',
model: currentModel,
})),
getProTimeoutMinutes: vi.fn().mockResolvedValue(5),
getProTimeoutFallbackDurationMinutes: vi.fn().mockResolvedValue(60),
getModel: vi.fn().mockImplementation(() => currentModel),
setModel: vi.fn().mockImplementation((m: string) => {
currentModel = m;
+8
View File
@@ -687,6 +687,10 @@ export class GeminiChat {
);
};
const proTimeoutMinutes = await this.context.config.getProTimeoutMinutes();
const proTimeoutFallbackDurationMinutes =
await this.context.config.getProTimeoutFallbackDurationMinutes();
const streamResponse = await retryWithBackoff(apiCall, {
onPersistent429: onPersistent429Callback,
onValidationRequired: onValidationRequiredCallback,
@@ -696,6 +700,10 @@ export class GeminiChat {
maxAttempts:
availabilityMaxAttempts ?? this.context.config.getMaxAttempts(),
getAvailabilityContext,
timeoutFallback: {
timeoutMs: proTimeoutMinutes * 60 * 1000,
fallbackDurationMs: proTimeoutFallbackDurationMinutes * 60 * 1000,
},
onRetry: (attempt, error, delayMs) => {
coreEvents.emitRetryAttempt({
attempt,
@@ -103,6 +103,8 @@ describe('GeminiChat Network Retries', () => {
authType: 'oauth-personal',
model: 'test-model',
}),
getProTimeoutMinutes: vi.fn().mockResolvedValue(5),
getProTimeoutFallbackDurationMinutes: vi.fn().mockResolvedValue(60),
getModel: vi.fn().mockReturnValue('gemini-pro'),
getActiveModel: vi.fn().mockReturnValue('gemini-pro'),
setActiveModel: vi.fn(),
@@ -32,6 +32,9 @@ vi.mock('./strategies/overrideStrategy.js');
vi.mock('./strategies/approvalModeStrategy.js');
vi.mock('./strategies/classifierStrategy.js');
vi.mock('./strategies/numericalClassifierStrategy.js');
import { BestEffortProStrategy } from './strategies/bestEffortProStrategy.js';
vi.mock('./strategies/bestEffortProStrategy.js');
vi.mock('./strategies/gemmaClassifierStrategy.js');
vi.mock('../telemetry/loggers.js');
vi.mock('../telemetry/types.js');
@@ -74,6 +77,7 @@ describe('ModelRouterService', () => {
[
new FallbackStrategy(),
new OverrideStrategy(),
new BestEffortProStrategy(),
new ApprovalModeStrategy(),
new ClassifierStrategy(),
new NumericalClassifierStrategy(),
@@ -104,13 +108,14 @@ describe('ModelRouterService', () => {
const compositeStrategyArgs = vi.mocked(CompositeStrategy).mock.calls[0];
const childStrategies = compositeStrategyArgs[0];
expect(childStrategies.length).toBe(6);
expect(childStrategies.length).toBe(7);
expect(childStrategies[0]).toBeInstanceOf(FallbackStrategy);
expect(childStrategies[1]).toBeInstanceOf(OverrideStrategy);
expect(childStrategies[2]).toBeInstanceOf(ApprovalModeStrategy);
expect(childStrategies[3]).toBeInstanceOf(ClassifierStrategy);
expect(childStrategies[4]).toBeInstanceOf(NumericalClassifierStrategy);
expect(childStrategies[5]).toBeInstanceOf(DefaultStrategy);
expect(childStrategies[2]).toBeInstanceOf(BestEffortProStrategy);
expect(childStrategies[3]).toBeInstanceOf(ApprovalModeStrategy);
expect(childStrategies[4]).toBeInstanceOf(ClassifierStrategy);
expect(childStrategies[5]).toBeInstanceOf(NumericalClassifierStrategy);
expect(childStrategies[6]).toBeInstanceOf(DefaultStrategy);
expect(compositeStrategyArgs[1]).toBe('agent-router');
});
@@ -133,14 +138,15 @@ describe('ModelRouterService', () => {
const compositeStrategyArgs = vi.mocked(CompositeStrategy).mock.calls[0];
const childStrategies = compositeStrategyArgs[0];
expect(childStrategies.length).toBe(7);
expect(childStrategies.length).toBe(8);
expect(childStrategies[0]).toBeInstanceOf(FallbackStrategy);
expect(childStrategies[1]).toBeInstanceOf(OverrideStrategy);
expect(childStrategies[2]).toBeInstanceOf(ApprovalModeStrategy);
expect(childStrategies[3]).toBeInstanceOf(GemmaClassifierStrategy);
expect(childStrategies[4]).toBeInstanceOf(ClassifierStrategy);
expect(childStrategies[5]).toBeInstanceOf(NumericalClassifierStrategy);
expect(childStrategies[6]).toBeInstanceOf(DefaultStrategy);
expect(childStrategies[2]).toBeInstanceOf(BestEffortProStrategy);
expect(childStrategies[3]).toBeInstanceOf(ApprovalModeStrategy);
expect(childStrategies[4]).toBeInstanceOf(GemmaClassifierStrategy);
expect(childStrategies[5]).toBeInstanceOf(ClassifierStrategy);
expect(childStrategies[6]).toBeInstanceOf(NumericalClassifierStrategy);
expect(childStrategies[7]).toBeInstanceOf(DefaultStrategy);
expect(compositeStrategyArgs[1]).toBe('agent-router');
});
@@ -18,6 +18,7 @@ import { NumericalClassifierStrategy } from './strategies/numericalClassifierStr
import { CompositeStrategy } from './strategies/compositeStrategy.js';
import { FallbackStrategy } from './strategies/fallbackStrategy.js';
import { OverrideStrategy } from './strategies/overrideStrategy.js';
import { BestEffortProStrategy } from './strategies/bestEffortProStrategy.js';
import { ApprovalModeStrategy } from './strategies/approvalModeStrategy.js';
import { logModelRouting } from '../telemetry/loggers.js';
@@ -43,6 +44,9 @@ export class ModelRouterService {
strategies.push(new FallbackStrategy());
strategies.push(new OverrideStrategy());
// Best Effort Pro is next.
strategies.push(new BestEffortProStrategy());
// Approval mode is next.
strategies.push(new ApprovalModeStrategy());
@@ -0,0 +1,83 @@
/**
* @license
* Copyright 2025 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import type { Config } from '../../config/config.js';
import { isAutoModel, resolveModel } from '../../config/models.js';
import type {
RoutingStrategy,
RoutingDecision,
RoutingContext,
} from '../routingStrategy.js';
/**
* A routing strategy that respects the "Best Effort Pro" setting.
* If the setting is enabled and the Pro model is available, it routes to Pro
* regardless of complexity. If Pro is unavailable, it routes to Flash.
*/
export class BestEffortProStrategy implements RoutingStrategy {
name = 'best-effort-pro';
async route(
context: RoutingContext,
config: Config,
): Promise<RoutingDecision | null> {
const requestedModel = config.getModel();
if (!isAutoModel(requestedModel)) {
return null;
}
const isBestEffortProEnabled = await config.getBestEffortProEnabled();
if (!isBestEffortProEnabled) {
return null;
}
const useGemini3_1 = (await config.getGemini31Launched?.()) ?? false;
const useGemini3_1FlashLite =
(await config.getGemini31FlashLiteLaunched?.()) ?? false;
const hasAccessToPreview = config.getHasAccessToPreviewModel?.() ?? true;
const availabilityService = config.getModelAvailabilityService();
const proModel = resolveModel(
'gemini-3.1-pro',
useGemini3_1,
useGemini3_1FlashLite,
false,
hasAccessToPreview,
config,
);
const flashModel = resolveModel(
'gemini-3.1-flash',
useGemini3_1,
useGemini3_1FlashLite,
false,
hasAccessToPreview,
config,
);
const proSnapshot = availabilityService.snapshot(proModel);
if (proSnapshot.available) {
return {
model: proModel,
metadata: {
source: this.name,
latencyMs: 0,
reasoning:
'Best Effort Pro is enabled and the Pro model is available.',
},
};
} else {
return {
model: flashModel,
metadata: {
source: this.name,
latencyMs: 0,
reasoning: `Best Effort Pro is enabled, but Pro is unavailable (${proSnapshot.reason}). Falling back to Flash.`,
},
};
}
}
}
+42
View File
@@ -37,6 +37,10 @@ export interface RetryOptions {
signal?: AbortSignal;
getAvailabilityContext?: () => RetryAvailabilityContext | undefined;
onRetry?: (attempt: number, error: unknown, delayMs: number) => void;
timeoutFallback?: {
timeoutMs: number;
fallbackDurationMs: number;
};
}
const DEFAULT_RETRY_OPTIONS: RetryOptions = {
@@ -240,6 +244,7 @@ export async function retryWithBackoff<T>(
signal,
getAvailabilityContext,
onRetry,
timeoutFallback,
} = {
...DEFAULT_RETRY_OPTIONS,
shouldRetryOnError: isRetryableError,
@@ -248,6 +253,7 @@ export async function retryWithBackoff<T>(
let attempt = 0;
let currentDelay = initialDelayMs;
let startTime = Date.now();
const throwIfAborted = () => {
if (signal?.aborted) {
throw createAbortError();
@@ -294,6 +300,42 @@ export async function retryWithBackoff<T>(
const errorCode = getErrorStatus(error);
const isTimeout =
(error instanceof Error &&
error.message.toLowerCase().includes('timeout')) ||
getRetryErrorType(error) === 'ETIMEDOUT' ||
getRetryErrorType(error) === 'FETCH_FAILED';
if (isTimeout && timeoutFallback) {
if (Date.now() - startTime >= timeoutFallback.timeoutMs) {
const successContext = getAvailabilityContext?.();
if (successContext) {
successContext.service.markTemporarilyUnavailable(
successContext.policy.model,
'timeout',
timeoutFallback.fallbackDurationMs,
);
}
if (onPersistent429) {
try {
const fallbackModel = await onPersistent429(
authType,
new Error('Request timed out'),
);
if (fallbackModel) {
attempt = 0;
currentDelay = initialDelayMs;
startTime = Date.now();
continue;
}
} catch (fallbackError) {
debugLogger.warn('Model fallback failed:', fallbackError);
}
}
throw error;
}
}
if (
classifiedError instanceof TerminalQuotaError ||
classifiedError instanceof ModelNotFoundError