From 19601955fdec686541f471a810c7b3671ac7e092 Mon Sep 17 00:00:00 2001 From: jacob314 Date: Thu, 23 Apr 2026 13:50:24 -0700 Subject: [PATCH] feat(routing): availability-aware auto-routing with best-effort pro Adds settings and logic to detect slow/hanging Pro model requests, marking them as temporarily unavailable and automatically triggering a fallback to Flash. Introduces a proTimeoutMinutes and bestEffortPro strategy configuration. --- package-lock.json | 35 +------- packages/cli/src/config/config.ts | 1 + packages/cli/src/config/settingsSchema.ts | 40 +++++++++ packages/cli/src/test-utils/mockConfig.ts | 3 + .../availability/modelAvailabilityService.ts | 30 ++++++- packages/core/src/config/config.ts | 25 ++++++ packages/core/src/core/baseLlmClient.test.ts | 2 + packages/core/src/core/baseLlmClient.ts | 8 ++ packages/core/src/core/geminiChat.test.ts | 2 + packages/core/src/core/geminiChat.ts | 8 ++ .../src/core/geminiChat_network_retry.test.ts | 2 + .../src/routing/modelRouterService.test.ts | 28 ++++--- .../core/src/routing/modelRouterService.ts | 4 + .../strategies/bestEffortProStrategy.ts | 83 +++++++++++++++++++ packages/core/src/utils/retry.ts | 42 ++++++++++ 15 files changed, 269 insertions(+), 44 deletions(-) create mode 100644 packages/core/src/routing/strategies/bestEffortProStrategy.ts diff --git a/package-lock.json b/package-lock.json index 71af158b14..96073430ab 100644 --- a/package-lock.json +++ b/package-lock.json @@ -449,8 +449,7 @@ "version": "2.11.0", "resolved": "https://registry.npmjs.org/@bufbuild/protobuf/-/protobuf-2.11.0.tgz", "integrity": "sha512-sBXGT13cpmPR5BMgHE6UEEfEaShh5Ror6rfN3yEK5si7QVrtZg8LEPQb0VVhiLRUslD2yLnXtnRzG035J/mZXQ==", - "license": "(Apache-2.0 AND BSD-3-Clause)", - "peer": true + "license": "(Apache-2.0 AND BSD-3-Clause)" }, "node_modules/@bundled-es-modules/cookie": { "version": "2.0.1", @@ -1474,7 +1473,6 @@ "resolved": "https://registry.npmjs.org/@grpc/grpc-js/-/grpc-js-1.13.4.tgz", "integrity": "sha512-GsFaMXCkMqkKIvwCQjCrwH+GHbPKBjhwo/8ZuUkWHqbI73Kky9I+pQltrlT0+MWpedCoosda53lgjYfyEPgxBg==", "license": "Apache-2.0", - "peer": true, "dependencies": { "@grpc/proto-loader": "^0.7.13", "@js-sdsl/ordered-map": "^4.4.2" @@ -2152,7 +2150,6 @@ "integrity": "sha512-t54CUOsFMappY1Jbzb7fetWeO0n6K0k/4+/ZpkS+3Joz8I4VcvY9OiEBFRYISqaI2fq5sCiPtAjRDOzVYG8m+Q==", "dev": true, "license": "MIT", - "peer": true, "dependencies": { "@octokit/auth-token": "^6.0.0", "@octokit/graphql": "^9.0.2", @@ -2333,7 +2330,6 @@ "resolved": "https://registry.npmjs.org/@opentelemetry/api/-/api-1.9.0.tgz", "integrity": "sha512-3giAOQvZiH5F9bMlMiv8+GSPMeqg0dbaeo58/0SlA9sxSqZhnUtxzX9/2FzyhS9sWQf5S0GJE0AKBrFqjpeYcg==", "license": "Apache-2.0", - "peer": true, "engines": { "node": ">=8.0.0" } @@ -2383,7 +2379,6 @@ "resolved": "https://registry.npmjs.org/@opentelemetry/core/-/core-2.5.0.tgz", "integrity": "sha512-ka4H8OM6+DlUhSAZpONu0cPBtPPTQKxbxVzC4CzVx5+K4JnroJVBtDzLAMx4/3CDTJXRvVFhpFjtl4SaiTNoyQ==", "license": "Apache-2.0", - "peer": true, "dependencies": { "@opentelemetry/semantic-conventions": "^1.29.0" }, @@ -2758,7 +2753,6 @@ "resolved": "https://registry.npmjs.org/@opentelemetry/resources/-/resources-2.5.0.tgz", "integrity": "sha512-F8W52ApePshpoSrfsSk1H2yJn9aKjCrbpQF1M9Qii0GHzbfVeFUB+rc3X4aggyZD8x9Gu3Slua+s6krmq6Dt8g==", "license": "Apache-2.0", - "peer": true, "dependencies": { "@opentelemetry/core": "2.5.0", "@opentelemetry/semantic-conventions": "^1.29.0" @@ -2792,7 +2786,6 @@ "resolved": "https://registry.npmjs.org/@opentelemetry/sdk-metrics/-/sdk-metrics-2.5.0.tgz", "integrity": "sha512-BeJLtU+f5Gf905cJX9vXFQorAr6TAfK3SPvTFqP+scfIpDQEJfRaGJWta7sJgP+m4dNtBf9y3yvBKVAZZtJQVA==", "license": "Apache-2.0", - "peer": true, "dependencies": { "@opentelemetry/core": "2.5.0", "@opentelemetry/resources": "2.5.0" @@ -2847,7 +2840,6 @@ "resolved": "https://registry.npmjs.org/@opentelemetry/sdk-trace-base/-/sdk-trace-base-2.5.0.tgz", "integrity": "sha512-VzRf8LzotASEyNDUxTdaJ9IRJ1/h692WyArDBInf5puLCjxbICD6XkHgpuudis56EndyS7LYFmtTMny6UABNdQ==", "license": "Apache-2.0", - "peer": true, "dependencies": { "@opentelemetry/core": "2.5.0", "@opentelemetry/resources": "2.5.0", @@ -4054,7 +4046,6 @@ "integrity": "sha512-6mDvHUFSjyT2B2yeNx2nUgMxh9LtOWvkhIU3uePn2I2oyNymUAX1NIsdgviM4CH+JSrp2D2hsMvJOkxY+0wNRA==", "devOptional": true, "license": "MIT", - "peer": true, "dependencies": { "csstype": "^3.0.2" } @@ -4328,7 +4319,6 @@ "integrity": "sha512-/Zb/xaIDfxeJnvishjGdcR4jmr7S+bda8PKNhRGdljDM+elXhlvN0FyPSsMnLmJUrVG9aPO6dof80wjMawsASg==", "dev": true, "license": "MIT", - "peer": true, "dependencies": { "@typescript-eslint/scope-manager": "8.58.2", "@typescript-eslint/types": "8.58.2", @@ -5073,7 +5063,6 @@ "resolved": "https://registry.npmjs.org/acorn/-/acorn-8.15.0.tgz", "integrity": "sha512-NZyJarBfL7nWwIq+FDL6Zp/yHEhePMNnnJ0y3qfieCrmNvYct8uvtiV41UvlSe6apAfk0fY1FbWx+NwfmpvtTg==", "license": "MIT", - "peer": true, "bin": { "acorn": "bin/acorn" }, @@ -7151,8 +7140,7 @@ "version": "0.0.1581282", "resolved": "https://registry.npmjs.org/devtools-protocol/-/devtools-protocol-0.0.1581282.tgz", "integrity": "sha512-nv7iKtNZQshSW2hKzYNr46nM/Cfh5SEvE2oV0/SEGgc9XupIY5ggf84Cz8eJIkBce7S3bmTAauFD6aysMpnqsQ==", - "license": "BSD-3-Clause", - "peer": true + "license": "BSD-3-Clause" }, "node_modules/dezalgo": { "version": "1.0.4", @@ -7737,7 +7725,6 @@ "integrity": "sha512-GsGizj2Y1rCWDu6XoEekL3RLilp0voSePurjZIkxL3wlm5o5EC9VpgaP7lrCvjnkuLvzFBQWB3vWB3K5KQTveQ==", "dev": true, "license": "MIT", - "peer": true, "dependencies": { "@eslint-community/eslint-utils": "^4.2.0", "@eslint-community/regexpp": "^4.12.1", @@ -8255,7 +8242,6 @@ "resolved": "https://registry.npmjs.org/express/-/express-5.2.1.tgz", "integrity": "sha512-hIS4idWWai69NezIdRt2xFVofaF4j+6INOpJlVOLDO8zXGpUVEVzIYk12UUi2JzjEzWL3IOAxcTubgz9Po0yXw==", "license": "MIT", - "peer": true, "dependencies": { "accepts": "^2.0.0", "body-parser": "^2.2.1", @@ -9522,7 +9508,6 @@ "resolved": "https://registry.npmjs.org/hono/-/hono-4.12.12.tgz", "integrity": "sha512-p1JfQMKaceuCbpJKAPKVqyqviZdS0eUxH9v82oWo1kb9xjQ5wA6iP3FNVAPDFlz5/p7d45lO+BpSk1tuSZMF4Q==", "license": "MIT", - "peer": true, "engines": { "node": ">=16.9.0" } @@ -9782,7 +9767,6 @@ "resolved": "https://registry.npmjs.org/@jrichman/ink/-/ink-6.6.9.tgz", "integrity": "sha512-RL9sSiLQZECnjbmBwjIHOp8yVGdWF7C/uifg7ISv/e+F3nLNsfl7FdUFQs8iZARFMJAYxMFpxW6OW+HSt9drwQ==", "license": "MIT", - "peer": true, "dependencies": { "ansi-escapes": "^7.0.0", "ansi-styles": "^6.2.3", @@ -13496,7 +13480,6 @@ "resolved": "https://registry.npmjs.org/react/-/react-19.2.4.tgz", "integrity": "sha512-9nfp2hYpCwOjAN+8TZFGhtWEwgvWHXqESH8qT89AT/lWklpLON22Lc8pEtnpsZz7VmawabSU0gCjnj8aC0euHQ==", "license": "MIT", - "peer": true, "engines": { "node": ">=0.10.0" } @@ -13507,7 +13490,6 @@ "integrity": "sha512-ePrwPfxAnB+7hgnEr8vpKxL9cmnp7F322t8oqcPshbIQQhDKgFDW4tjhF2wjVbdXF9O/nyuy3sQWd9JGpiLPvA==", "devOptional": true, "license": "MIT", - "peer": true, "dependencies": { "shell-quote": "^1.6.1", "ws": "^7" @@ -15627,7 +15609,6 @@ "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-4.0.4.tgz", "integrity": "sha512-QP88BAKvMam/3NxH6vj2o21R6MjxZUAd6nlwAS/pnGvN9IVLocLHxGYIzFhg6fUQ+5th6P4dv4eW9jX3DSIj7A==", "license": "MIT", - "peer": true, "engines": { "node": ">=12" }, @@ -15850,8 +15831,7 @@ "version": "2.8.1", "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.8.1.tgz", "integrity": "sha512-oJFu94HQb+KVduSUQL7wnpmqnfmLsOA/nAh6b6EH0wCEoK0/mPeXU6c3wKDV83MkOuHPRHtSXKKU99IBazS/2w==", - "license": "0BSD", - "peer": true + "license": "0BSD" }, "node_modules/tsx": { "version": "4.20.3", @@ -15859,7 +15839,6 @@ "integrity": "sha512-qjbnuR9Tr+FJOMBqJCW5ehvIo/buZq7vH7qD7JziU98h6l3qGy0a/yPFjwO+y0/T7GFpNgNAvEcPPVfyT8rrPQ==", "devOptional": true, "license": "MIT", - "peer": true, "dependencies": { "esbuild": "~0.25.0", "get-tsconfig": "^4.7.5" @@ -16025,7 +16004,6 @@ "integrity": "sha512-p1diW6TqL9L07nNxvRMM7hMMw4c5XOo/1ibL4aAIGmSAt9slTE1Xgw5KWuof2uTOvCg9BY7ZRi+GaF+7sfgPeQ==", "devOptional": true, "license": "Apache-2.0", - "peer": true, "bin": { "tsc": "bin/tsc", "tsserver": "bin/tsserver" @@ -16093,7 +16071,6 @@ "integrity": "sha512-6sMvZePQrnZH2/cJkwRpkT7DxoAWh+g6+GFRK6bV3YQo7ogi3SX5rgF6099r5Q53Ma5qeT7LGmOmuIutF4t3lA==", "dev": true, "license": "MIT", - "peer": true, "dependencies": { "@typescript-eslint/scope-manager": "8.35.0", "@typescript-eslint/types": "8.35.0", @@ -16480,7 +16457,6 @@ "resolved": "https://registry.npmjs.org/vite/-/vite-7.3.2.tgz", "integrity": "sha512-Bby3NOsna2jsjfLVOHKes8sGwgl4TT0E6vvpYgnAYDIF/tie7MRaFthmKuHx1NSXjiTueXH3do80FMQgvEktRg==", "license": "MIT", - "peer": true, "dependencies": { "esbuild": "^0.27.0", "fdir": "^6.5.0", @@ -17051,7 +17027,6 @@ "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-4.0.4.tgz", "integrity": "sha512-QP88BAKvMam/3NxH6vj2o21R6MjxZUAd6nlwAS/pnGvN9IVLocLHxGYIzFhg6fUQ+5th6P4dv4eW9jX3DSIj7A==", "license": "MIT", - "peer": true, "engines": { "node": ">=12" }, @@ -17064,7 +17039,6 @@ "resolved": "https://registry.npmjs.org/vitest/-/vitest-3.2.4.tgz", "integrity": "sha512-LUCP5ev3GURDysTWiP47wRRUpLKMOfPh+yKTx3kVIEiu5KOMeqzpnYNsKyOoVrULivR8tLcks4+lga33Whn90A==", "license": "MIT", - "peer": true, "dependencies": { "@types/chai": "^5.2.2", "@vitest/expect": "3.2.4", @@ -17703,7 +17677,6 @@ "resolved": "https://registry.npmjs.org/zod/-/zod-3.25.76.tgz", "integrity": "sha512-gzUt/qt81nXsFGKIFcC3YnfEAx5NkunCfnDlvuBSSFS02bcXu4Lmea0AFIUwbLWxWPx3d9p8S5QoaujKcNQxcQ==", "license": "MIT", - "peer": true, "funding": { "url": "https://github.com/sponsors/colinhacks" } @@ -18139,7 +18112,6 @@ "resolved": "https://registry.npmjs.org/@grpc/grpc-js/-/grpc-js-1.14.3.tgz", "integrity": "sha512-Iq8QQQ/7X3Sac15oB6p0FmUg/klxQvXLeileoqrTRGJYLV+/9tubbr9ipz0GKHjmXVsgFPo/+W+2cA8eNcR+XA==", "license": "Apache-2.0", - "peer": true, "dependencies": { "@grpc/proto-loader": "^0.8.0", "@js-sdsl/ordered-map": "^4.4.2" @@ -18258,7 +18230,6 @@ "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-4.0.4.tgz", "integrity": "sha512-QP88BAKvMam/3NxH6vj2o21R6MjxZUAd6nlwAS/pnGvN9IVLocLHxGYIzFhg6fUQ+5th6P4dv4eW9jX3DSIj7A==", "license": "MIT", - "peer": true, "engines": { "node": ">=12" }, diff --git a/packages/cli/src/config/config.ts b/packages/cli/src/config/config.ts index f7e7c5086b..4f0adc938d 100755 --- a/packages/cli/src/config/config.ts +++ b/packages/cli/src/config/config.ts @@ -1044,6 +1044,7 @@ export async function loadCliConfig( format: (argv.outputFormat ?? settings.output?.format) as OutputFormat, }, gemmaModelRouter: settings.experimental?.gemmaModelRouter, + autoRouting: settings.model?.autoRouting, adk: settings.experimental?.adk, fakeResponses: argv.fakeResponses, recordResponses: argv.recordResponses, diff --git a/packages/cli/src/config/settingsSchema.ts b/packages/cli/src/config/settingsSchema.ts index f5da86b60a..727530bcc4 100644 --- a/packages/cli/src/config/settingsSchema.ts +++ b/packages/cli/src/config/settingsSchema.ts @@ -1112,6 +1112,46 @@ const SETTINGS_SCHEMA = { description: 'Skip the next speaker check.', showInDialog: true, }, + autoRouting: { + type: 'object', + label: 'Auto Routing', + category: 'Model', + requiresRestart: false, + default: {}, + description: 'Settings for automatic model routing.', + showInDialog: false, + properties: { + bestEffortPro: { + type: 'boolean', + label: 'Best Effort Pro', + category: 'Model', + requiresRestart: false, + default: false, + description: + 'Always prefer the Pro model unless it is unavailable (e.g., due to timeouts or quota), ignoring other routing hints.', + showInDialog: true, + }, + proTimeoutMinutes: { + type: 'number', + label: 'Pro Timeout (Minutes)', + category: 'Model', + requiresRestart: false, + default: 5, + description: + 'If a Pro request takes longer than this many minutes, it will be marked as temporarily unavailable and fallback to Flash.', + showInDialog: true, + }, + proTimeoutFallbackDurationMinutes: { + type: 'number', + label: 'Pro Timeout Fallback Duration (Minutes)', + category: 'Model', + requiresRestart: false, + default: 60, + description: 'How long to route to Flash after Pro times out.', + showInDialog: true, + }, + }, + }, }, }, diff --git a/packages/cli/src/test-utils/mockConfig.ts b/packages/cli/src/test-utils/mockConfig.ts index ffcafb37b2..cb05263c32 100644 --- a/packages/cli/src/test-utils/mockConfig.ts +++ b/packages/cli/src/test-utils/mockConfig.ts @@ -124,6 +124,9 @@ export const createMockConfig = (overrides: Partial = {}): Config => getCompressionThreshold: vi.fn().mockResolvedValue(undefined), getUserCaching: vi.fn().mockResolvedValue(false), getNumericalRoutingEnabled: vi.fn().mockResolvedValue(false), + getBestEffortProEnabled: vi.fn().mockResolvedValue(false), + getProTimeoutMinutes: vi.fn().mockResolvedValue(5), + getProTimeoutFallbackDurationMinutes: vi.fn().mockResolvedValue(60), getClassifierThreshold: vi.fn().mockResolvedValue(undefined), getBannerTextNoCapacityIssues: vi.fn().mockResolvedValue(''), getBannerTextCapacityIssues: vi.fn().mockResolvedValue(''), diff --git a/packages/core/src/availability/modelAvailabilityService.ts b/packages/core/src/availability/modelAvailabilityService.ts index 051003f667..9b7569ef98 100644 --- a/packages/core/src/availability/modelAvailabilityService.ts +++ b/packages/core/src/availability/modelAvailabilityService.ts @@ -8,13 +8,15 @@ export type ModelId = string; type TerminalUnavailabilityReason = 'quota' | 'capacity'; export type TurnUnavailabilityReason = 'retry_once_per_turn'; +export type TemporaryUnavailabilityReason = 'timeout'; export type UnavailabilityReason = | TerminalUnavailabilityReason | TurnUnavailabilityReason + | TemporaryUnavailabilityReason | 'unknown'; -export type ModelHealthStatus = 'terminal' | 'sticky_retry'; +export type ModelHealthStatus = 'terminal' | 'sticky_retry' | 'temporary'; type HealthState = | { status: 'terminal'; reason: TerminalUnavailabilityReason } @@ -22,6 +24,11 @@ type HealthState = status: 'sticky_retry'; reason: TurnUnavailabilityReason; consumed: boolean; + } + | { + status: 'temporary'; + reason: TemporaryUnavailabilityReason; + untilMs: number; }; export interface ModelAvailabilitySnapshot { @@ -48,6 +55,18 @@ export class ModelAvailabilityService { }); } + markTemporarilyUnavailable( + model: ModelId, + reason: TemporaryUnavailabilityReason, + durationMs: number, + ) { + this.setState(model, { + status: 'temporary', + reason, + untilMs: Date.now() + durationMs, + }); + } + markHealthy(model: ModelId) { this.clearState(model); } @@ -95,6 +114,15 @@ export class ModelAvailabilityService { return { available: false, reason: state.reason }; } + if (state.status === 'temporary') { + if (Date.now() < state.untilMs) { + return { available: false, reason: state.reason }; + } else { + this.clearState(model); + return { available: true }; + } + } + return { available: true }; } diff --git a/packages/core/src/config/config.ts b/packages/core/src/config/config.ts index a6ca91d7b5..2684adbb34 100644 --- a/packages/core/src/config/config.ts +++ b/packages/core/src/config/config.ts @@ -679,6 +679,11 @@ export interface ConfigParameters { policyUpdateConfirmationRequest?: PolicyUpdateConfirmationRequest; output?: OutputSettings; gemmaModelRouter?: GemmaModelRouterSettings; + autoRouting?: { + bestEffortPro?: boolean; + proTimeoutMinutes?: number; + proTimeoutFallbackDurationMinutes?: number; + }; adk?: ADKSettings; disableModelRouterForAuth?: AuthType[]; continueOnFailedApiCall?: boolean; @@ -963,6 +968,9 @@ export class Config implements McpContext, AgentLoopContext { private readonly planEnabled: boolean; private readonly trackerEnabled: boolean; private readonly planModeRoutingEnabled: boolean; + private readonly autoRoutingBestEffortPro: boolean; + private readonly autoRoutingProTimeoutMinutes: number; + private readonly autoRoutingProTimeoutFallbackDurationMinutes: number; private readonly modelSteering: boolean; private memoryContextManager?: MemoryContextManager; private readonly contextManagement: ContextManagementConfig; @@ -1117,6 +1125,11 @@ export class Config implements McpContext, AgentLoopContext { this.planEnabled = params.plan ?? true; this.trackerEnabled = params.tracker ?? false; this.planModeRoutingEnabled = params.planSettings?.modelRouting ?? true; + this.autoRoutingBestEffortPro = params.autoRouting?.bestEffortPro ?? false; + this.autoRoutingProTimeoutMinutes = + params.autoRouting?.proTimeoutMinutes ?? 5; + this.autoRoutingProTimeoutFallbackDurationMinutes = + params.autoRouting?.proTimeoutFallbackDurationMinutes ?? 60; this.enableEventDrivenScheduler = params.enableEventDrivenScheduler ?? true; this.skillsSupport = params.skillsSupport ?? true; this.disabledSkills = params.disabledSkills ?? []; @@ -3144,6 +3157,18 @@ export class Config implements McpContext, AgentLoopContext { return flag?.boolValue ?? true; } + async getBestEffortProEnabled(): Promise { + return this.autoRoutingBestEffortPro; + } + + async getProTimeoutMinutes(): Promise { + return this.autoRoutingProTimeoutMinutes; + } + + async getProTimeoutFallbackDurationMinutes(): Promise { + return this.autoRoutingProTimeoutFallbackDurationMinutes; + } + /** * Returns the resolved complexity threshold for routing. * If a remote threshold is provided and within range (0-100), it is returned. diff --git a/packages/core/src/core/baseLlmClient.test.ts b/packages/core/src/core/baseLlmClient.test.ts index 5bfefa6665..8304d63d0f 100644 --- a/packages/core/src/core/baseLlmClient.test.ts +++ b/packages/core/src/core/baseLlmClient.test.ts @@ -109,6 +109,8 @@ describe('BaseLlmClient', () => { .mockReturnValue({ authType: AuthType.USE_GEMINI }), getEmbeddingModel: vi.fn().mockReturnValue('test-embedding-model'), isInteractive: vi.fn().mockReturnValue(false), + getProTimeoutMinutes: vi.fn().mockResolvedValue(5), + getProTimeoutFallbackDurationMinutes: vi.fn().mockResolvedValue(60), modelConfigService: { getResolvedConfig: vi .fn() diff --git a/packages/core/src/core/baseLlmClient.ts b/packages/core/src/core/baseLlmClient.ts index 2b03f27b79..9bc1cc304a 100644 --- a/packages/core/src/core/baseLlmClient.ts +++ b/packages/core/src/core/baseLlmClient.ts @@ -325,11 +325,19 @@ export class BaseLlmClient { ); }; + const proTimeoutMinutes = await this.config.getProTimeoutMinutes(); + const proTimeoutFallbackDurationMinutes = + await this.config.getProTimeoutFallbackDurationMinutes(); + return await retryWithBackoff(apiCall, { shouldRetryOnContent, maxAttempts: availabilityMaxAttempts ?? maxAttempts ?? DEFAULT_MAX_ATTEMPTS, getAvailabilityContext, + timeoutFallback: { + timeoutMs: proTimeoutMinutes * 60 * 1000, + fallbackDurationMs: proTimeoutFallbackDurationMinutes * 60 * 1000, + }, onPersistent429: this.config.isInteractive() ? (authType, error) => handleFallback(this.config, currentModel, authType, error) diff --git a/packages/core/src/core/geminiChat.test.ts b/packages/core/src/core/geminiChat.test.ts index 4beb14ea06..e5b111221b 100644 --- a/packages/core/src/core/geminiChat.test.ts +++ b/packages/core/src/core/geminiChat.test.ts @@ -160,6 +160,8 @@ describe('GeminiChat', () => { authType: 'oauth-personal', model: currentModel, })), + getProTimeoutMinutes: vi.fn().mockResolvedValue(5), + getProTimeoutFallbackDurationMinutes: vi.fn().mockResolvedValue(60), getModel: vi.fn().mockImplementation(() => currentModel), setModel: vi.fn().mockImplementation((m: string) => { currentModel = m; diff --git a/packages/core/src/core/geminiChat.ts b/packages/core/src/core/geminiChat.ts index c480c3800b..aa17f11963 100644 --- a/packages/core/src/core/geminiChat.ts +++ b/packages/core/src/core/geminiChat.ts @@ -687,6 +687,10 @@ export class GeminiChat { ); }; + const proTimeoutMinutes = await this.context.config.getProTimeoutMinutes(); + const proTimeoutFallbackDurationMinutes = + await this.context.config.getProTimeoutFallbackDurationMinutes(); + const streamResponse = await retryWithBackoff(apiCall, { onPersistent429: onPersistent429Callback, onValidationRequired: onValidationRequiredCallback, @@ -696,6 +700,10 @@ export class GeminiChat { maxAttempts: availabilityMaxAttempts ?? this.context.config.getMaxAttempts(), getAvailabilityContext, + timeoutFallback: { + timeoutMs: proTimeoutMinutes * 60 * 1000, + fallbackDurationMs: proTimeoutFallbackDurationMinutes * 60 * 1000, + }, onRetry: (attempt, error, delayMs) => { coreEvents.emitRetryAttempt({ attempt, diff --git a/packages/core/src/core/geminiChat_network_retry.test.ts b/packages/core/src/core/geminiChat_network_retry.test.ts index 7d9bf67848..07a1731631 100644 --- a/packages/core/src/core/geminiChat_network_retry.test.ts +++ b/packages/core/src/core/geminiChat_network_retry.test.ts @@ -103,6 +103,8 @@ describe('GeminiChat Network Retries', () => { authType: 'oauth-personal', model: 'test-model', }), + getProTimeoutMinutes: vi.fn().mockResolvedValue(5), + getProTimeoutFallbackDurationMinutes: vi.fn().mockResolvedValue(60), getModel: vi.fn().mockReturnValue('gemini-pro'), getActiveModel: vi.fn().mockReturnValue('gemini-pro'), setActiveModel: vi.fn(), diff --git a/packages/core/src/routing/modelRouterService.test.ts b/packages/core/src/routing/modelRouterService.test.ts index 4e0c32c62f..c3f82cd5a1 100644 --- a/packages/core/src/routing/modelRouterService.test.ts +++ b/packages/core/src/routing/modelRouterService.test.ts @@ -32,6 +32,9 @@ vi.mock('./strategies/overrideStrategy.js'); vi.mock('./strategies/approvalModeStrategy.js'); vi.mock('./strategies/classifierStrategy.js'); vi.mock('./strategies/numericalClassifierStrategy.js'); +import { BestEffortProStrategy } from './strategies/bestEffortProStrategy.js'; + +vi.mock('./strategies/bestEffortProStrategy.js'); vi.mock('./strategies/gemmaClassifierStrategy.js'); vi.mock('../telemetry/loggers.js'); vi.mock('../telemetry/types.js'); @@ -74,6 +77,7 @@ describe('ModelRouterService', () => { [ new FallbackStrategy(), new OverrideStrategy(), + new BestEffortProStrategy(), new ApprovalModeStrategy(), new ClassifierStrategy(), new NumericalClassifierStrategy(), @@ -104,13 +108,14 @@ describe('ModelRouterService', () => { const compositeStrategyArgs = vi.mocked(CompositeStrategy).mock.calls[0]; const childStrategies = compositeStrategyArgs[0]; - expect(childStrategies.length).toBe(6); + expect(childStrategies.length).toBe(7); expect(childStrategies[0]).toBeInstanceOf(FallbackStrategy); expect(childStrategies[1]).toBeInstanceOf(OverrideStrategy); - expect(childStrategies[2]).toBeInstanceOf(ApprovalModeStrategy); - expect(childStrategies[3]).toBeInstanceOf(ClassifierStrategy); - expect(childStrategies[4]).toBeInstanceOf(NumericalClassifierStrategy); - expect(childStrategies[5]).toBeInstanceOf(DefaultStrategy); + expect(childStrategies[2]).toBeInstanceOf(BestEffortProStrategy); + expect(childStrategies[3]).toBeInstanceOf(ApprovalModeStrategy); + expect(childStrategies[4]).toBeInstanceOf(ClassifierStrategy); + expect(childStrategies[5]).toBeInstanceOf(NumericalClassifierStrategy); + expect(childStrategies[6]).toBeInstanceOf(DefaultStrategy); expect(compositeStrategyArgs[1]).toBe('agent-router'); }); @@ -133,14 +138,15 @@ describe('ModelRouterService', () => { const compositeStrategyArgs = vi.mocked(CompositeStrategy).mock.calls[0]; const childStrategies = compositeStrategyArgs[0]; - expect(childStrategies.length).toBe(7); + expect(childStrategies.length).toBe(8); expect(childStrategies[0]).toBeInstanceOf(FallbackStrategy); expect(childStrategies[1]).toBeInstanceOf(OverrideStrategy); - expect(childStrategies[2]).toBeInstanceOf(ApprovalModeStrategy); - expect(childStrategies[3]).toBeInstanceOf(GemmaClassifierStrategy); - expect(childStrategies[4]).toBeInstanceOf(ClassifierStrategy); - expect(childStrategies[5]).toBeInstanceOf(NumericalClassifierStrategy); - expect(childStrategies[6]).toBeInstanceOf(DefaultStrategy); + expect(childStrategies[2]).toBeInstanceOf(BestEffortProStrategy); + expect(childStrategies[3]).toBeInstanceOf(ApprovalModeStrategy); + expect(childStrategies[4]).toBeInstanceOf(GemmaClassifierStrategy); + expect(childStrategies[5]).toBeInstanceOf(ClassifierStrategy); + expect(childStrategies[6]).toBeInstanceOf(NumericalClassifierStrategy); + expect(childStrategies[7]).toBeInstanceOf(DefaultStrategy); expect(compositeStrategyArgs[1]).toBe('agent-router'); }); diff --git a/packages/core/src/routing/modelRouterService.ts b/packages/core/src/routing/modelRouterService.ts index 30e2bb9f8d..97fa9b7594 100644 --- a/packages/core/src/routing/modelRouterService.ts +++ b/packages/core/src/routing/modelRouterService.ts @@ -18,6 +18,7 @@ import { NumericalClassifierStrategy } from './strategies/numericalClassifierStr import { CompositeStrategy } from './strategies/compositeStrategy.js'; import { FallbackStrategy } from './strategies/fallbackStrategy.js'; import { OverrideStrategy } from './strategies/overrideStrategy.js'; +import { BestEffortProStrategy } from './strategies/bestEffortProStrategy.js'; import { ApprovalModeStrategy } from './strategies/approvalModeStrategy.js'; import { logModelRouting } from '../telemetry/loggers.js'; @@ -43,6 +44,9 @@ export class ModelRouterService { strategies.push(new FallbackStrategy()); strategies.push(new OverrideStrategy()); + // Best Effort Pro is next. + strategies.push(new BestEffortProStrategy()); + // Approval mode is next. strategies.push(new ApprovalModeStrategy()); diff --git a/packages/core/src/routing/strategies/bestEffortProStrategy.ts b/packages/core/src/routing/strategies/bestEffortProStrategy.ts new file mode 100644 index 0000000000..2c8a82827d --- /dev/null +++ b/packages/core/src/routing/strategies/bestEffortProStrategy.ts @@ -0,0 +1,83 @@ +/** + * @license + * Copyright 2025 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import type { Config } from '../../config/config.js'; +import { isAutoModel, resolveModel } from '../../config/models.js'; +import type { + RoutingStrategy, + RoutingDecision, + RoutingContext, +} from '../routingStrategy.js'; + +/** + * A routing strategy that respects the "Best Effort Pro" setting. + * If the setting is enabled and the Pro model is available, it routes to Pro + * regardless of complexity. If Pro is unavailable, it routes to Flash. + */ +export class BestEffortProStrategy implements RoutingStrategy { + name = 'best-effort-pro'; + + async route( + context: RoutingContext, + config: Config, + ): Promise { + const requestedModel = config.getModel(); + if (!isAutoModel(requestedModel)) { + return null; + } + + const isBestEffortProEnabled = await config.getBestEffortProEnabled(); + if (!isBestEffortProEnabled) { + return null; + } + + const useGemini3_1 = (await config.getGemini31Launched?.()) ?? false; + const useGemini3_1FlashLite = + (await config.getGemini31FlashLiteLaunched?.()) ?? false; + const hasAccessToPreview = config.getHasAccessToPreviewModel?.() ?? true; + + const availabilityService = config.getModelAvailabilityService(); + const proModel = resolveModel( + 'gemini-3.1-pro', + useGemini3_1, + useGemini3_1FlashLite, + false, + hasAccessToPreview, + config, + ); + const flashModel = resolveModel( + 'gemini-3.1-flash', + useGemini3_1, + useGemini3_1FlashLite, + false, + hasAccessToPreview, + config, + ); + + const proSnapshot = availabilityService.snapshot(proModel); + + if (proSnapshot.available) { + return { + model: proModel, + metadata: { + source: this.name, + latencyMs: 0, + reasoning: + 'Best Effort Pro is enabled and the Pro model is available.', + }, + }; + } else { + return { + model: flashModel, + metadata: { + source: this.name, + latencyMs: 0, + reasoning: `Best Effort Pro is enabled, but Pro is unavailable (${proSnapshot.reason}). Falling back to Flash.`, + }, + }; + } + } +} diff --git a/packages/core/src/utils/retry.ts b/packages/core/src/utils/retry.ts index 5b3ac4f113..2c387a13f5 100644 --- a/packages/core/src/utils/retry.ts +++ b/packages/core/src/utils/retry.ts @@ -37,6 +37,10 @@ export interface RetryOptions { signal?: AbortSignal; getAvailabilityContext?: () => RetryAvailabilityContext | undefined; onRetry?: (attempt: number, error: unknown, delayMs: number) => void; + timeoutFallback?: { + timeoutMs: number; + fallbackDurationMs: number; + }; } const DEFAULT_RETRY_OPTIONS: RetryOptions = { @@ -240,6 +244,7 @@ export async function retryWithBackoff( signal, getAvailabilityContext, onRetry, + timeoutFallback, } = { ...DEFAULT_RETRY_OPTIONS, shouldRetryOnError: isRetryableError, @@ -248,6 +253,7 @@ export async function retryWithBackoff( let attempt = 0; let currentDelay = initialDelayMs; + let startTime = Date.now(); const throwIfAborted = () => { if (signal?.aborted) { throw createAbortError(); @@ -294,6 +300,42 @@ export async function retryWithBackoff( const errorCode = getErrorStatus(error); + const isTimeout = + (error instanceof Error && + error.message.toLowerCase().includes('timeout')) || + getRetryErrorType(error) === 'ETIMEDOUT' || + getRetryErrorType(error) === 'FETCH_FAILED'; + + if (isTimeout && timeoutFallback) { + if (Date.now() - startTime >= timeoutFallback.timeoutMs) { + const successContext = getAvailabilityContext?.(); + if (successContext) { + successContext.service.markTemporarilyUnavailable( + successContext.policy.model, + 'timeout', + timeoutFallback.fallbackDurationMs, + ); + } + if (onPersistent429) { + try { + const fallbackModel = await onPersistent429( + authType, + new Error('Request timed out'), + ); + if (fallbackModel) { + attempt = 0; + currentDelay = initialDelayMs; + startTime = Date.now(); + continue; + } + } catch (fallbackError) { + debugLogger.warn('Model fallback failed:', fallbackError); + } + } + throw error; + } + } + if ( classifiedError instanceof TerminalQuotaError || classifiedError instanceof ModelNotFoundError