mirror of
https://github.com/google-gemini/gemini-cli.git
synced 2026-05-15 06:12:50 -07:00
feat(routing): availability-aware auto-routing with best-effort pro
Adds settings and logic to detect slow/hanging Pro model requests, marking them as temporarily unavailable and automatically triggering a fallback to Flash. Introduces a proTimeoutMinutes and bestEffortPro strategy configuration.
This commit is contained in:
Generated
+3
-32
@@ -449,8 +449,7 @@
|
||||
"version": "2.11.0",
|
||||
"resolved": "https://registry.npmjs.org/@bufbuild/protobuf/-/protobuf-2.11.0.tgz",
|
||||
"integrity": "sha512-sBXGT13cpmPR5BMgHE6UEEfEaShh5Ror6rfN3yEK5si7QVrtZg8LEPQb0VVhiLRUslD2yLnXtnRzG035J/mZXQ==",
|
||||
"license": "(Apache-2.0 AND BSD-3-Clause)",
|
||||
"peer": true
|
||||
"license": "(Apache-2.0 AND BSD-3-Clause)"
|
||||
},
|
||||
"node_modules/@bundled-es-modules/cookie": {
|
||||
"version": "2.0.1",
|
||||
@@ -1474,7 +1473,6 @@
|
||||
"resolved": "https://registry.npmjs.org/@grpc/grpc-js/-/grpc-js-1.13.4.tgz",
|
||||
"integrity": "sha512-GsFaMXCkMqkKIvwCQjCrwH+GHbPKBjhwo/8ZuUkWHqbI73Kky9I+pQltrlT0+MWpedCoosda53lgjYfyEPgxBg==",
|
||||
"license": "Apache-2.0",
|
||||
"peer": true,
|
||||
"dependencies": {
|
||||
"@grpc/proto-loader": "^0.7.13",
|
||||
"@js-sdsl/ordered-map": "^4.4.2"
|
||||
@@ -2152,7 +2150,6 @@
|
||||
"integrity": "sha512-t54CUOsFMappY1Jbzb7fetWeO0n6K0k/4+/ZpkS+3Joz8I4VcvY9OiEBFRYISqaI2fq5sCiPtAjRDOzVYG8m+Q==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"peer": true,
|
||||
"dependencies": {
|
||||
"@octokit/auth-token": "^6.0.0",
|
||||
"@octokit/graphql": "^9.0.2",
|
||||
@@ -2333,7 +2330,6 @@
|
||||
"resolved": "https://registry.npmjs.org/@opentelemetry/api/-/api-1.9.0.tgz",
|
||||
"integrity": "sha512-3giAOQvZiH5F9bMlMiv8+GSPMeqg0dbaeo58/0SlA9sxSqZhnUtxzX9/2FzyhS9sWQf5S0GJE0AKBrFqjpeYcg==",
|
||||
"license": "Apache-2.0",
|
||||
"peer": true,
|
||||
"engines": {
|
||||
"node": ">=8.0.0"
|
||||
}
|
||||
@@ -2383,7 +2379,6 @@
|
||||
"resolved": "https://registry.npmjs.org/@opentelemetry/core/-/core-2.5.0.tgz",
|
||||
"integrity": "sha512-ka4H8OM6+DlUhSAZpONu0cPBtPPTQKxbxVzC4CzVx5+K4JnroJVBtDzLAMx4/3CDTJXRvVFhpFjtl4SaiTNoyQ==",
|
||||
"license": "Apache-2.0",
|
||||
"peer": true,
|
||||
"dependencies": {
|
||||
"@opentelemetry/semantic-conventions": "^1.29.0"
|
||||
},
|
||||
@@ -2758,7 +2753,6 @@
|
||||
"resolved": "https://registry.npmjs.org/@opentelemetry/resources/-/resources-2.5.0.tgz",
|
||||
"integrity": "sha512-F8W52ApePshpoSrfsSk1H2yJn9aKjCrbpQF1M9Qii0GHzbfVeFUB+rc3X4aggyZD8x9Gu3Slua+s6krmq6Dt8g==",
|
||||
"license": "Apache-2.0",
|
||||
"peer": true,
|
||||
"dependencies": {
|
||||
"@opentelemetry/core": "2.5.0",
|
||||
"@opentelemetry/semantic-conventions": "^1.29.0"
|
||||
@@ -2792,7 +2786,6 @@
|
||||
"resolved": "https://registry.npmjs.org/@opentelemetry/sdk-metrics/-/sdk-metrics-2.5.0.tgz",
|
||||
"integrity": "sha512-BeJLtU+f5Gf905cJX9vXFQorAr6TAfK3SPvTFqP+scfIpDQEJfRaGJWta7sJgP+m4dNtBf9y3yvBKVAZZtJQVA==",
|
||||
"license": "Apache-2.0",
|
||||
"peer": true,
|
||||
"dependencies": {
|
||||
"@opentelemetry/core": "2.5.0",
|
||||
"@opentelemetry/resources": "2.5.0"
|
||||
@@ -2847,7 +2840,6 @@
|
||||
"resolved": "https://registry.npmjs.org/@opentelemetry/sdk-trace-base/-/sdk-trace-base-2.5.0.tgz",
|
||||
"integrity": "sha512-VzRf8LzotASEyNDUxTdaJ9IRJ1/h692WyArDBInf5puLCjxbICD6XkHgpuudis56EndyS7LYFmtTMny6UABNdQ==",
|
||||
"license": "Apache-2.0",
|
||||
"peer": true,
|
||||
"dependencies": {
|
||||
"@opentelemetry/core": "2.5.0",
|
||||
"@opentelemetry/resources": "2.5.0",
|
||||
@@ -4054,7 +4046,6 @@
|
||||
"integrity": "sha512-6mDvHUFSjyT2B2yeNx2nUgMxh9LtOWvkhIU3uePn2I2oyNymUAX1NIsdgviM4CH+JSrp2D2hsMvJOkxY+0wNRA==",
|
||||
"devOptional": true,
|
||||
"license": "MIT",
|
||||
"peer": true,
|
||||
"dependencies": {
|
||||
"csstype": "^3.0.2"
|
||||
}
|
||||
@@ -4328,7 +4319,6 @@
|
||||
"integrity": "sha512-/Zb/xaIDfxeJnvishjGdcR4jmr7S+bda8PKNhRGdljDM+elXhlvN0FyPSsMnLmJUrVG9aPO6dof80wjMawsASg==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"peer": true,
|
||||
"dependencies": {
|
||||
"@typescript-eslint/scope-manager": "8.58.2",
|
||||
"@typescript-eslint/types": "8.58.2",
|
||||
@@ -5073,7 +5063,6 @@
|
||||
"resolved": "https://registry.npmjs.org/acorn/-/acorn-8.15.0.tgz",
|
||||
"integrity": "sha512-NZyJarBfL7nWwIq+FDL6Zp/yHEhePMNnnJ0y3qfieCrmNvYct8uvtiV41UvlSe6apAfk0fY1FbWx+NwfmpvtTg==",
|
||||
"license": "MIT",
|
||||
"peer": true,
|
||||
"bin": {
|
||||
"acorn": "bin/acorn"
|
||||
},
|
||||
@@ -7151,8 +7140,7 @@
|
||||
"version": "0.0.1581282",
|
||||
"resolved": "https://registry.npmjs.org/devtools-protocol/-/devtools-protocol-0.0.1581282.tgz",
|
||||
"integrity": "sha512-nv7iKtNZQshSW2hKzYNr46nM/Cfh5SEvE2oV0/SEGgc9XupIY5ggf84Cz8eJIkBce7S3bmTAauFD6aysMpnqsQ==",
|
||||
"license": "BSD-3-Clause",
|
||||
"peer": true
|
||||
"license": "BSD-3-Clause"
|
||||
},
|
||||
"node_modules/dezalgo": {
|
||||
"version": "1.0.4",
|
||||
@@ -7737,7 +7725,6 @@
|
||||
"integrity": "sha512-GsGizj2Y1rCWDu6XoEekL3RLilp0voSePurjZIkxL3wlm5o5EC9VpgaP7lrCvjnkuLvzFBQWB3vWB3K5KQTveQ==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"peer": true,
|
||||
"dependencies": {
|
||||
"@eslint-community/eslint-utils": "^4.2.0",
|
||||
"@eslint-community/regexpp": "^4.12.1",
|
||||
@@ -8255,7 +8242,6 @@
|
||||
"resolved": "https://registry.npmjs.org/express/-/express-5.2.1.tgz",
|
||||
"integrity": "sha512-hIS4idWWai69NezIdRt2xFVofaF4j+6INOpJlVOLDO8zXGpUVEVzIYk12UUi2JzjEzWL3IOAxcTubgz9Po0yXw==",
|
||||
"license": "MIT",
|
||||
"peer": true,
|
||||
"dependencies": {
|
||||
"accepts": "^2.0.0",
|
||||
"body-parser": "^2.2.1",
|
||||
@@ -9522,7 +9508,6 @@
|
||||
"resolved": "https://registry.npmjs.org/hono/-/hono-4.12.12.tgz",
|
||||
"integrity": "sha512-p1JfQMKaceuCbpJKAPKVqyqviZdS0eUxH9v82oWo1kb9xjQ5wA6iP3FNVAPDFlz5/p7d45lO+BpSk1tuSZMF4Q==",
|
||||
"license": "MIT",
|
||||
"peer": true,
|
||||
"engines": {
|
||||
"node": ">=16.9.0"
|
||||
}
|
||||
@@ -9782,7 +9767,6 @@
|
||||
"resolved": "https://registry.npmjs.org/@jrichman/ink/-/ink-6.6.9.tgz",
|
||||
"integrity": "sha512-RL9sSiLQZECnjbmBwjIHOp8yVGdWF7C/uifg7ISv/e+F3nLNsfl7FdUFQs8iZARFMJAYxMFpxW6OW+HSt9drwQ==",
|
||||
"license": "MIT",
|
||||
"peer": true,
|
||||
"dependencies": {
|
||||
"ansi-escapes": "^7.0.0",
|
||||
"ansi-styles": "^6.2.3",
|
||||
@@ -13496,7 +13480,6 @@
|
||||
"resolved": "https://registry.npmjs.org/react/-/react-19.2.4.tgz",
|
||||
"integrity": "sha512-9nfp2hYpCwOjAN+8TZFGhtWEwgvWHXqESH8qT89AT/lWklpLON22Lc8pEtnpsZz7VmawabSU0gCjnj8aC0euHQ==",
|
||||
"license": "MIT",
|
||||
"peer": true,
|
||||
"engines": {
|
||||
"node": ">=0.10.0"
|
||||
}
|
||||
@@ -13507,7 +13490,6 @@
|
||||
"integrity": "sha512-ePrwPfxAnB+7hgnEr8vpKxL9cmnp7F322t8oqcPshbIQQhDKgFDW4tjhF2wjVbdXF9O/nyuy3sQWd9JGpiLPvA==",
|
||||
"devOptional": true,
|
||||
"license": "MIT",
|
||||
"peer": true,
|
||||
"dependencies": {
|
||||
"shell-quote": "^1.6.1",
|
||||
"ws": "^7"
|
||||
@@ -15627,7 +15609,6 @@
|
||||
"resolved": "https://registry.npmjs.org/picomatch/-/picomatch-4.0.4.tgz",
|
||||
"integrity": "sha512-QP88BAKvMam/3NxH6vj2o21R6MjxZUAd6nlwAS/pnGvN9IVLocLHxGYIzFhg6fUQ+5th6P4dv4eW9jX3DSIj7A==",
|
||||
"license": "MIT",
|
||||
"peer": true,
|
||||
"engines": {
|
||||
"node": ">=12"
|
||||
},
|
||||
@@ -15850,8 +15831,7 @@
|
||||
"version": "2.8.1",
|
||||
"resolved": "https://registry.npmjs.org/tslib/-/tslib-2.8.1.tgz",
|
||||
"integrity": "sha512-oJFu94HQb+KVduSUQL7wnpmqnfmLsOA/nAh6b6EH0wCEoK0/mPeXU6c3wKDV83MkOuHPRHtSXKKU99IBazS/2w==",
|
||||
"license": "0BSD",
|
||||
"peer": true
|
||||
"license": "0BSD"
|
||||
},
|
||||
"node_modules/tsx": {
|
||||
"version": "4.20.3",
|
||||
@@ -15859,7 +15839,6 @@
|
||||
"integrity": "sha512-qjbnuR9Tr+FJOMBqJCW5ehvIo/buZq7vH7qD7JziU98h6l3qGy0a/yPFjwO+y0/T7GFpNgNAvEcPPVfyT8rrPQ==",
|
||||
"devOptional": true,
|
||||
"license": "MIT",
|
||||
"peer": true,
|
||||
"dependencies": {
|
||||
"esbuild": "~0.25.0",
|
||||
"get-tsconfig": "^4.7.5"
|
||||
@@ -16025,7 +16004,6 @@
|
||||
"integrity": "sha512-p1diW6TqL9L07nNxvRMM7hMMw4c5XOo/1ibL4aAIGmSAt9slTE1Xgw5KWuof2uTOvCg9BY7ZRi+GaF+7sfgPeQ==",
|
||||
"devOptional": true,
|
||||
"license": "Apache-2.0",
|
||||
"peer": true,
|
||||
"bin": {
|
||||
"tsc": "bin/tsc",
|
||||
"tsserver": "bin/tsserver"
|
||||
@@ -16093,7 +16071,6 @@
|
||||
"integrity": "sha512-6sMvZePQrnZH2/cJkwRpkT7DxoAWh+g6+GFRK6bV3YQo7ogi3SX5rgF6099r5Q53Ma5qeT7LGmOmuIutF4t3lA==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"peer": true,
|
||||
"dependencies": {
|
||||
"@typescript-eslint/scope-manager": "8.35.0",
|
||||
"@typescript-eslint/types": "8.35.0",
|
||||
@@ -16480,7 +16457,6 @@
|
||||
"resolved": "https://registry.npmjs.org/vite/-/vite-7.3.2.tgz",
|
||||
"integrity": "sha512-Bby3NOsna2jsjfLVOHKes8sGwgl4TT0E6vvpYgnAYDIF/tie7MRaFthmKuHx1NSXjiTueXH3do80FMQgvEktRg==",
|
||||
"license": "MIT",
|
||||
"peer": true,
|
||||
"dependencies": {
|
||||
"esbuild": "^0.27.0",
|
||||
"fdir": "^6.5.0",
|
||||
@@ -17051,7 +17027,6 @@
|
||||
"resolved": "https://registry.npmjs.org/picomatch/-/picomatch-4.0.4.tgz",
|
||||
"integrity": "sha512-QP88BAKvMam/3NxH6vj2o21R6MjxZUAd6nlwAS/pnGvN9IVLocLHxGYIzFhg6fUQ+5th6P4dv4eW9jX3DSIj7A==",
|
||||
"license": "MIT",
|
||||
"peer": true,
|
||||
"engines": {
|
||||
"node": ">=12"
|
||||
},
|
||||
@@ -17064,7 +17039,6 @@
|
||||
"resolved": "https://registry.npmjs.org/vitest/-/vitest-3.2.4.tgz",
|
||||
"integrity": "sha512-LUCP5ev3GURDysTWiP47wRRUpLKMOfPh+yKTx3kVIEiu5KOMeqzpnYNsKyOoVrULivR8tLcks4+lga33Whn90A==",
|
||||
"license": "MIT",
|
||||
"peer": true,
|
||||
"dependencies": {
|
||||
"@types/chai": "^5.2.2",
|
||||
"@vitest/expect": "3.2.4",
|
||||
@@ -17703,7 +17677,6 @@
|
||||
"resolved": "https://registry.npmjs.org/zod/-/zod-3.25.76.tgz",
|
||||
"integrity": "sha512-gzUt/qt81nXsFGKIFcC3YnfEAx5NkunCfnDlvuBSSFS02bcXu4Lmea0AFIUwbLWxWPx3d9p8S5QoaujKcNQxcQ==",
|
||||
"license": "MIT",
|
||||
"peer": true,
|
||||
"funding": {
|
||||
"url": "https://github.com/sponsors/colinhacks"
|
||||
}
|
||||
@@ -18139,7 +18112,6 @@
|
||||
"resolved": "https://registry.npmjs.org/@grpc/grpc-js/-/grpc-js-1.14.3.tgz",
|
||||
"integrity": "sha512-Iq8QQQ/7X3Sac15oB6p0FmUg/klxQvXLeileoqrTRGJYLV+/9tubbr9ipz0GKHjmXVsgFPo/+W+2cA8eNcR+XA==",
|
||||
"license": "Apache-2.0",
|
||||
"peer": true,
|
||||
"dependencies": {
|
||||
"@grpc/proto-loader": "^0.8.0",
|
||||
"@js-sdsl/ordered-map": "^4.4.2"
|
||||
@@ -18258,7 +18230,6 @@
|
||||
"resolved": "https://registry.npmjs.org/picomatch/-/picomatch-4.0.4.tgz",
|
||||
"integrity": "sha512-QP88BAKvMam/3NxH6vj2o21R6MjxZUAd6nlwAS/pnGvN9IVLocLHxGYIzFhg6fUQ+5th6P4dv4eW9jX3DSIj7A==",
|
||||
"license": "MIT",
|
||||
"peer": true,
|
||||
"engines": {
|
||||
"node": ">=12"
|
||||
},
|
||||
|
||||
@@ -1044,6 +1044,7 @@ export async function loadCliConfig(
|
||||
format: (argv.outputFormat ?? settings.output?.format) as OutputFormat,
|
||||
},
|
||||
gemmaModelRouter: settings.experimental?.gemmaModelRouter,
|
||||
autoRouting: settings.model?.autoRouting,
|
||||
adk: settings.experimental?.adk,
|
||||
fakeResponses: argv.fakeResponses,
|
||||
recordResponses: argv.recordResponses,
|
||||
|
||||
@@ -1112,6 +1112,46 @@ const SETTINGS_SCHEMA = {
|
||||
description: 'Skip the next speaker check.',
|
||||
showInDialog: true,
|
||||
},
|
||||
autoRouting: {
|
||||
type: 'object',
|
||||
label: 'Auto Routing',
|
||||
category: 'Model',
|
||||
requiresRestart: false,
|
||||
default: {},
|
||||
description: 'Settings for automatic model routing.',
|
||||
showInDialog: false,
|
||||
properties: {
|
||||
bestEffortPro: {
|
||||
type: 'boolean',
|
||||
label: 'Best Effort Pro',
|
||||
category: 'Model',
|
||||
requiresRestart: false,
|
||||
default: false,
|
||||
description:
|
||||
'Always prefer the Pro model unless it is unavailable (e.g., due to timeouts or quota), ignoring other routing hints.',
|
||||
showInDialog: true,
|
||||
},
|
||||
proTimeoutMinutes: {
|
||||
type: 'number',
|
||||
label: 'Pro Timeout (Minutes)',
|
||||
category: 'Model',
|
||||
requiresRestart: false,
|
||||
default: 5,
|
||||
description:
|
||||
'If a Pro request takes longer than this many minutes, it will be marked as temporarily unavailable and fallback to Flash.',
|
||||
showInDialog: true,
|
||||
},
|
||||
proTimeoutFallbackDurationMinutes: {
|
||||
type: 'number',
|
||||
label: 'Pro Timeout Fallback Duration (Minutes)',
|
||||
category: 'Model',
|
||||
requiresRestart: false,
|
||||
default: 60,
|
||||
description: 'How long to route to Flash after Pro times out.',
|
||||
showInDialog: true,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
|
||||
|
||||
@@ -124,6 +124,9 @@ export const createMockConfig = (overrides: Partial<Config> = {}): Config =>
|
||||
getCompressionThreshold: vi.fn().mockResolvedValue(undefined),
|
||||
getUserCaching: vi.fn().mockResolvedValue(false),
|
||||
getNumericalRoutingEnabled: vi.fn().mockResolvedValue(false),
|
||||
getBestEffortProEnabled: vi.fn().mockResolvedValue(false),
|
||||
getProTimeoutMinutes: vi.fn().mockResolvedValue(5),
|
||||
getProTimeoutFallbackDurationMinutes: vi.fn().mockResolvedValue(60),
|
||||
getClassifierThreshold: vi.fn().mockResolvedValue(undefined),
|
||||
getBannerTextNoCapacityIssues: vi.fn().mockResolvedValue(''),
|
||||
getBannerTextCapacityIssues: vi.fn().mockResolvedValue(''),
|
||||
|
||||
@@ -8,13 +8,15 @@ export type ModelId = string;
|
||||
|
||||
type TerminalUnavailabilityReason = 'quota' | 'capacity';
|
||||
export type TurnUnavailabilityReason = 'retry_once_per_turn';
|
||||
export type TemporaryUnavailabilityReason = 'timeout';
|
||||
|
||||
export type UnavailabilityReason =
|
||||
| TerminalUnavailabilityReason
|
||||
| TurnUnavailabilityReason
|
||||
| TemporaryUnavailabilityReason
|
||||
| 'unknown';
|
||||
|
||||
export type ModelHealthStatus = 'terminal' | 'sticky_retry';
|
||||
export type ModelHealthStatus = 'terminal' | 'sticky_retry' | 'temporary';
|
||||
|
||||
type HealthState =
|
||||
| { status: 'terminal'; reason: TerminalUnavailabilityReason }
|
||||
@@ -22,6 +24,11 @@ type HealthState =
|
||||
status: 'sticky_retry';
|
||||
reason: TurnUnavailabilityReason;
|
||||
consumed: boolean;
|
||||
}
|
||||
| {
|
||||
status: 'temporary';
|
||||
reason: TemporaryUnavailabilityReason;
|
||||
untilMs: number;
|
||||
};
|
||||
|
||||
export interface ModelAvailabilitySnapshot {
|
||||
@@ -48,6 +55,18 @@ export class ModelAvailabilityService {
|
||||
});
|
||||
}
|
||||
|
||||
markTemporarilyUnavailable(
|
||||
model: ModelId,
|
||||
reason: TemporaryUnavailabilityReason,
|
||||
durationMs: number,
|
||||
) {
|
||||
this.setState(model, {
|
||||
status: 'temporary',
|
||||
reason,
|
||||
untilMs: Date.now() + durationMs,
|
||||
});
|
||||
}
|
||||
|
||||
markHealthy(model: ModelId) {
|
||||
this.clearState(model);
|
||||
}
|
||||
@@ -95,6 +114,15 @@ export class ModelAvailabilityService {
|
||||
return { available: false, reason: state.reason };
|
||||
}
|
||||
|
||||
if (state.status === 'temporary') {
|
||||
if (Date.now() < state.untilMs) {
|
||||
return { available: false, reason: state.reason };
|
||||
} else {
|
||||
this.clearState(model);
|
||||
return { available: true };
|
||||
}
|
||||
}
|
||||
|
||||
return { available: true };
|
||||
}
|
||||
|
||||
|
||||
@@ -679,6 +679,11 @@ export interface ConfigParameters {
|
||||
policyUpdateConfirmationRequest?: PolicyUpdateConfirmationRequest;
|
||||
output?: OutputSettings;
|
||||
gemmaModelRouter?: GemmaModelRouterSettings;
|
||||
autoRouting?: {
|
||||
bestEffortPro?: boolean;
|
||||
proTimeoutMinutes?: number;
|
||||
proTimeoutFallbackDurationMinutes?: number;
|
||||
};
|
||||
adk?: ADKSettings;
|
||||
disableModelRouterForAuth?: AuthType[];
|
||||
continueOnFailedApiCall?: boolean;
|
||||
@@ -963,6 +968,9 @@ export class Config implements McpContext, AgentLoopContext {
|
||||
private readonly planEnabled: boolean;
|
||||
private readonly trackerEnabled: boolean;
|
||||
private readonly planModeRoutingEnabled: boolean;
|
||||
private readonly autoRoutingBestEffortPro: boolean;
|
||||
private readonly autoRoutingProTimeoutMinutes: number;
|
||||
private readonly autoRoutingProTimeoutFallbackDurationMinutes: number;
|
||||
private readonly modelSteering: boolean;
|
||||
private memoryContextManager?: MemoryContextManager;
|
||||
private readonly contextManagement: ContextManagementConfig;
|
||||
@@ -1117,6 +1125,11 @@ export class Config implements McpContext, AgentLoopContext {
|
||||
this.planEnabled = params.plan ?? true;
|
||||
this.trackerEnabled = params.tracker ?? false;
|
||||
this.planModeRoutingEnabled = params.planSettings?.modelRouting ?? true;
|
||||
this.autoRoutingBestEffortPro = params.autoRouting?.bestEffortPro ?? false;
|
||||
this.autoRoutingProTimeoutMinutes =
|
||||
params.autoRouting?.proTimeoutMinutes ?? 5;
|
||||
this.autoRoutingProTimeoutFallbackDurationMinutes =
|
||||
params.autoRouting?.proTimeoutFallbackDurationMinutes ?? 60;
|
||||
this.enableEventDrivenScheduler = params.enableEventDrivenScheduler ?? true;
|
||||
this.skillsSupport = params.skillsSupport ?? true;
|
||||
this.disabledSkills = params.disabledSkills ?? [];
|
||||
@@ -3144,6 +3157,18 @@ export class Config implements McpContext, AgentLoopContext {
|
||||
return flag?.boolValue ?? true;
|
||||
}
|
||||
|
||||
async getBestEffortProEnabled(): Promise<boolean> {
|
||||
return this.autoRoutingBestEffortPro;
|
||||
}
|
||||
|
||||
async getProTimeoutMinutes(): Promise<number> {
|
||||
return this.autoRoutingProTimeoutMinutes;
|
||||
}
|
||||
|
||||
async getProTimeoutFallbackDurationMinutes(): Promise<number> {
|
||||
return this.autoRoutingProTimeoutFallbackDurationMinutes;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the resolved complexity threshold for routing.
|
||||
* If a remote threshold is provided and within range (0-100), it is returned.
|
||||
|
||||
@@ -109,6 +109,8 @@ describe('BaseLlmClient', () => {
|
||||
.mockReturnValue({ authType: AuthType.USE_GEMINI }),
|
||||
getEmbeddingModel: vi.fn().mockReturnValue('test-embedding-model'),
|
||||
isInteractive: vi.fn().mockReturnValue(false),
|
||||
getProTimeoutMinutes: vi.fn().mockResolvedValue(5),
|
||||
getProTimeoutFallbackDurationMinutes: vi.fn().mockResolvedValue(60),
|
||||
modelConfigService: {
|
||||
getResolvedConfig: vi
|
||||
.fn()
|
||||
|
||||
@@ -325,11 +325,19 @@ export class BaseLlmClient {
|
||||
);
|
||||
};
|
||||
|
||||
const proTimeoutMinutes = await this.config.getProTimeoutMinutes();
|
||||
const proTimeoutFallbackDurationMinutes =
|
||||
await this.config.getProTimeoutFallbackDurationMinutes();
|
||||
|
||||
return await retryWithBackoff(apiCall, {
|
||||
shouldRetryOnContent,
|
||||
maxAttempts:
|
||||
availabilityMaxAttempts ?? maxAttempts ?? DEFAULT_MAX_ATTEMPTS,
|
||||
getAvailabilityContext,
|
||||
timeoutFallback: {
|
||||
timeoutMs: proTimeoutMinutes * 60 * 1000,
|
||||
fallbackDurationMs: proTimeoutFallbackDurationMinutes * 60 * 1000,
|
||||
},
|
||||
onPersistent429: this.config.isInteractive()
|
||||
? (authType, error) =>
|
||||
handleFallback(this.config, currentModel, authType, error)
|
||||
|
||||
@@ -160,6 +160,8 @@ describe('GeminiChat', () => {
|
||||
authType: 'oauth-personal',
|
||||
model: currentModel,
|
||||
})),
|
||||
getProTimeoutMinutes: vi.fn().mockResolvedValue(5),
|
||||
getProTimeoutFallbackDurationMinutes: vi.fn().mockResolvedValue(60),
|
||||
getModel: vi.fn().mockImplementation(() => currentModel),
|
||||
setModel: vi.fn().mockImplementation((m: string) => {
|
||||
currentModel = m;
|
||||
|
||||
@@ -687,6 +687,10 @@ export class GeminiChat {
|
||||
);
|
||||
};
|
||||
|
||||
const proTimeoutMinutes = await this.context.config.getProTimeoutMinutes();
|
||||
const proTimeoutFallbackDurationMinutes =
|
||||
await this.context.config.getProTimeoutFallbackDurationMinutes();
|
||||
|
||||
const streamResponse = await retryWithBackoff(apiCall, {
|
||||
onPersistent429: onPersistent429Callback,
|
||||
onValidationRequired: onValidationRequiredCallback,
|
||||
@@ -696,6 +700,10 @@ export class GeminiChat {
|
||||
maxAttempts:
|
||||
availabilityMaxAttempts ?? this.context.config.getMaxAttempts(),
|
||||
getAvailabilityContext,
|
||||
timeoutFallback: {
|
||||
timeoutMs: proTimeoutMinutes * 60 * 1000,
|
||||
fallbackDurationMs: proTimeoutFallbackDurationMinutes * 60 * 1000,
|
||||
},
|
||||
onRetry: (attempt, error, delayMs) => {
|
||||
coreEvents.emitRetryAttempt({
|
||||
attempt,
|
||||
|
||||
@@ -103,6 +103,8 @@ describe('GeminiChat Network Retries', () => {
|
||||
authType: 'oauth-personal',
|
||||
model: 'test-model',
|
||||
}),
|
||||
getProTimeoutMinutes: vi.fn().mockResolvedValue(5),
|
||||
getProTimeoutFallbackDurationMinutes: vi.fn().mockResolvedValue(60),
|
||||
getModel: vi.fn().mockReturnValue('gemini-pro'),
|
||||
getActiveModel: vi.fn().mockReturnValue('gemini-pro'),
|
||||
setActiveModel: vi.fn(),
|
||||
|
||||
@@ -32,6 +32,9 @@ vi.mock('./strategies/overrideStrategy.js');
|
||||
vi.mock('./strategies/approvalModeStrategy.js');
|
||||
vi.mock('./strategies/classifierStrategy.js');
|
||||
vi.mock('./strategies/numericalClassifierStrategy.js');
|
||||
import { BestEffortProStrategy } from './strategies/bestEffortProStrategy.js';
|
||||
|
||||
vi.mock('./strategies/bestEffortProStrategy.js');
|
||||
vi.mock('./strategies/gemmaClassifierStrategy.js');
|
||||
vi.mock('../telemetry/loggers.js');
|
||||
vi.mock('../telemetry/types.js');
|
||||
@@ -74,6 +77,7 @@ describe('ModelRouterService', () => {
|
||||
[
|
||||
new FallbackStrategy(),
|
||||
new OverrideStrategy(),
|
||||
new BestEffortProStrategy(),
|
||||
new ApprovalModeStrategy(),
|
||||
new ClassifierStrategy(),
|
||||
new NumericalClassifierStrategy(),
|
||||
@@ -104,13 +108,14 @@ describe('ModelRouterService', () => {
|
||||
const compositeStrategyArgs = vi.mocked(CompositeStrategy).mock.calls[0];
|
||||
const childStrategies = compositeStrategyArgs[0];
|
||||
|
||||
expect(childStrategies.length).toBe(6);
|
||||
expect(childStrategies.length).toBe(7);
|
||||
expect(childStrategies[0]).toBeInstanceOf(FallbackStrategy);
|
||||
expect(childStrategies[1]).toBeInstanceOf(OverrideStrategy);
|
||||
expect(childStrategies[2]).toBeInstanceOf(ApprovalModeStrategy);
|
||||
expect(childStrategies[3]).toBeInstanceOf(ClassifierStrategy);
|
||||
expect(childStrategies[4]).toBeInstanceOf(NumericalClassifierStrategy);
|
||||
expect(childStrategies[5]).toBeInstanceOf(DefaultStrategy);
|
||||
expect(childStrategies[2]).toBeInstanceOf(BestEffortProStrategy);
|
||||
expect(childStrategies[3]).toBeInstanceOf(ApprovalModeStrategy);
|
||||
expect(childStrategies[4]).toBeInstanceOf(ClassifierStrategy);
|
||||
expect(childStrategies[5]).toBeInstanceOf(NumericalClassifierStrategy);
|
||||
expect(childStrategies[6]).toBeInstanceOf(DefaultStrategy);
|
||||
expect(compositeStrategyArgs[1]).toBe('agent-router');
|
||||
});
|
||||
|
||||
@@ -133,14 +138,15 @@ describe('ModelRouterService', () => {
|
||||
const compositeStrategyArgs = vi.mocked(CompositeStrategy).mock.calls[0];
|
||||
const childStrategies = compositeStrategyArgs[0];
|
||||
|
||||
expect(childStrategies.length).toBe(7);
|
||||
expect(childStrategies.length).toBe(8);
|
||||
expect(childStrategies[0]).toBeInstanceOf(FallbackStrategy);
|
||||
expect(childStrategies[1]).toBeInstanceOf(OverrideStrategy);
|
||||
expect(childStrategies[2]).toBeInstanceOf(ApprovalModeStrategy);
|
||||
expect(childStrategies[3]).toBeInstanceOf(GemmaClassifierStrategy);
|
||||
expect(childStrategies[4]).toBeInstanceOf(ClassifierStrategy);
|
||||
expect(childStrategies[5]).toBeInstanceOf(NumericalClassifierStrategy);
|
||||
expect(childStrategies[6]).toBeInstanceOf(DefaultStrategy);
|
||||
expect(childStrategies[2]).toBeInstanceOf(BestEffortProStrategy);
|
||||
expect(childStrategies[3]).toBeInstanceOf(ApprovalModeStrategy);
|
||||
expect(childStrategies[4]).toBeInstanceOf(GemmaClassifierStrategy);
|
||||
expect(childStrategies[5]).toBeInstanceOf(ClassifierStrategy);
|
||||
expect(childStrategies[6]).toBeInstanceOf(NumericalClassifierStrategy);
|
||||
expect(childStrategies[7]).toBeInstanceOf(DefaultStrategy);
|
||||
expect(compositeStrategyArgs[1]).toBe('agent-router');
|
||||
});
|
||||
|
||||
|
||||
@@ -18,6 +18,7 @@ import { NumericalClassifierStrategy } from './strategies/numericalClassifierStr
|
||||
import { CompositeStrategy } from './strategies/compositeStrategy.js';
|
||||
import { FallbackStrategy } from './strategies/fallbackStrategy.js';
|
||||
import { OverrideStrategy } from './strategies/overrideStrategy.js';
|
||||
import { BestEffortProStrategy } from './strategies/bestEffortProStrategy.js';
|
||||
import { ApprovalModeStrategy } from './strategies/approvalModeStrategy.js';
|
||||
|
||||
import { logModelRouting } from '../telemetry/loggers.js';
|
||||
@@ -43,6 +44,9 @@ export class ModelRouterService {
|
||||
strategies.push(new FallbackStrategy());
|
||||
strategies.push(new OverrideStrategy());
|
||||
|
||||
// Best Effort Pro is next.
|
||||
strategies.push(new BestEffortProStrategy());
|
||||
|
||||
// Approval mode is next.
|
||||
strategies.push(new ApprovalModeStrategy());
|
||||
|
||||
|
||||
@@ -0,0 +1,83 @@
|
||||
/**
|
||||
* @license
|
||||
* Copyright 2025 Google LLC
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
import type { Config } from '../../config/config.js';
|
||||
import { isAutoModel, resolveModel } from '../../config/models.js';
|
||||
import type {
|
||||
RoutingStrategy,
|
||||
RoutingDecision,
|
||||
RoutingContext,
|
||||
} from '../routingStrategy.js';
|
||||
|
||||
/**
|
||||
* A routing strategy that respects the "Best Effort Pro" setting.
|
||||
* If the setting is enabled and the Pro model is available, it routes to Pro
|
||||
* regardless of complexity. If Pro is unavailable, it routes to Flash.
|
||||
*/
|
||||
export class BestEffortProStrategy implements RoutingStrategy {
|
||||
name = 'best-effort-pro';
|
||||
|
||||
async route(
|
||||
context: RoutingContext,
|
||||
config: Config,
|
||||
): Promise<RoutingDecision | null> {
|
||||
const requestedModel = config.getModel();
|
||||
if (!isAutoModel(requestedModel)) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const isBestEffortProEnabled = await config.getBestEffortProEnabled();
|
||||
if (!isBestEffortProEnabled) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const useGemini3_1 = (await config.getGemini31Launched?.()) ?? false;
|
||||
const useGemini3_1FlashLite =
|
||||
(await config.getGemini31FlashLiteLaunched?.()) ?? false;
|
||||
const hasAccessToPreview = config.getHasAccessToPreviewModel?.() ?? true;
|
||||
|
||||
const availabilityService = config.getModelAvailabilityService();
|
||||
const proModel = resolveModel(
|
||||
'gemini-3.1-pro',
|
||||
useGemini3_1,
|
||||
useGemini3_1FlashLite,
|
||||
false,
|
||||
hasAccessToPreview,
|
||||
config,
|
||||
);
|
||||
const flashModel = resolveModel(
|
||||
'gemini-3.1-flash',
|
||||
useGemini3_1,
|
||||
useGemini3_1FlashLite,
|
||||
false,
|
||||
hasAccessToPreview,
|
||||
config,
|
||||
);
|
||||
|
||||
const proSnapshot = availabilityService.snapshot(proModel);
|
||||
|
||||
if (proSnapshot.available) {
|
||||
return {
|
||||
model: proModel,
|
||||
metadata: {
|
||||
source: this.name,
|
||||
latencyMs: 0,
|
||||
reasoning:
|
||||
'Best Effort Pro is enabled and the Pro model is available.',
|
||||
},
|
||||
};
|
||||
} else {
|
||||
return {
|
||||
model: flashModel,
|
||||
metadata: {
|
||||
source: this.name,
|
||||
latencyMs: 0,
|
||||
reasoning: `Best Effort Pro is enabled, but Pro is unavailable (${proSnapshot.reason}). Falling back to Flash.`,
|
||||
},
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -37,6 +37,10 @@ export interface RetryOptions {
|
||||
signal?: AbortSignal;
|
||||
getAvailabilityContext?: () => RetryAvailabilityContext | undefined;
|
||||
onRetry?: (attempt: number, error: unknown, delayMs: number) => void;
|
||||
timeoutFallback?: {
|
||||
timeoutMs: number;
|
||||
fallbackDurationMs: number;
|
||||
};
|
||||
}
|
||||
|
||||
const DEFAULT_RETRY_OPTIONS: RetryOptions = {
|
||||
@@ -240,6 +244,7 @@ export async function retryWithBackoff<T>(
|
||||
signal,
|
||||
getAvailabilityContext,
|
||||
onRetry,
|
||||
timeoutFallback,
|
||||
} = {
|
||||
...DEFAULT_RETRY_OPTIONS,
|
||||
shouldRetryOnError: isRetryableError,
|
||||
@@ -248,6 +253,7 @@ export async function retryWithBackoff<T>(
|
||||
|
||||
let attempt = 0;
|
||||
let currentDelay = initialDelayMs;
|
||||
let startTime = Date.now();
|
||||
const throwIfAborted = () => {
|
||||
if (signal?.aborted) {
|
||||
throw createAbortError();
|
||||
@@ -294,6 +300,42 @@ export async function retryWithBackoff<T>(
|
||||
|
||||
const errorCode = getErrorStatus(error);
|
||||
|
||||
const isTimeout =
|
||||
(error instanceof Error &&
|
||||
error.message.toLowerCase().includes('timeout')) ||
|
||||
getRetryErrorType(error) === 'ETIMEDOUT' ||
|
||||
getRetryErrorType(error) === 'FETCH_FAILED';
|
||||
|
||||
if (isTimeout && timeoutFallback) {
|
||||
if (Date.now() - startTime >= timeoutFallback.timeoutMs) {
|
||||
const successContext = getAvailabilityContext?.();
|
||||
if (successContext) {
|
||||
successContext.service.markTemporarilyUnavailable(
|
||||
successContext.policy.model,
|
||||
'timeout',
|
||||
timeoutFallback.fallbackDurationMs,
|
||||
);
|
||||
}
|
||||
if (onPersistent429) {
|
||||
try {
|
||||
const fallbackModel = await onPersistent429(
|
||||
authType,
|
||||
new Error('Request timed out'),
|
||||
);
|
||||
if (fallbackModel) {
|
||||
attempt = 0;
|
||||
currentDelay = initialDelayMs;
|
||||
startTime = Date.now();
|
||||
continue;
|
||||
}
|
||||
} catch (fallbackError) {
|
||||
debugLogger.warn('Model fallback failed:', fallbackError);
|
||||
}
|
||||
}
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
if (
|
||||
classifiedError instanceof TerminalQuotaError ||
|
||||
classifiedError instanceof ModelNotFoundError
|
||||
|
||||
Reference in New Issue
Block a user