Remove auto-execution on Flash in the event of a 429/Quota failover (#3662)

Co-authored-by: Jenna Inouye <jinouye@google.com>
2026-04-05 02:40:55 -07:00 · 2025-07-09 13:55:56 -04:00
parent 01e756481f
commit 8a6509ffeb
14 changed files with 292 additions and 86 deletions
--- a/packages/cli/src/ui/utils/errorParsing.ts
+++ b/packages/cli/src/ui/utils/errorParsing.ts
@@ -19,7 +19,7 @@ import {
 const getRateLimitErrorMessageGoogleFree = (
  fallbackModel: string = DEFAULT_GEMINI_FLASH_MODEL,
 ) =>
-  `\nSlow response times detected. Switching to the ${fallbackModel} model for the rest of this session.`;
+  `\nPossible quota limitations in place or slow response times detected. Switching to the ${fallbackModel} model for the rest of this session.`;

 const getRateLimitErrorMessageGoogleProQuotaFree = (
  currentModel: string = DEFAULT_GEMINI_MODEL,
@@ -34,7 +34,7 @@ const getRateLimitErrorMessageGoogleGenericQuotaFree = () =>
 const getRateLimitErrorMessageGooglePaid = (
  fallbackModel: string = DEFAULT_GEMINI_FLASH_MODEL,
 ) =>
-  `\nSlow response times detected. Switching to the ${fallbackModel} model for the rest of this session. We appreciate you for choosing Gemini Code Assist and the Gemini CLI.`;
+  `\nPossible quota limitations in place or slow response times detected. Switching to the ${fallbackModel} model for the rest of this session. We appreciate you for choosing Gemini Code Assist and the Gemini CLI.`;

 const getRateLimitErrorMessageGoogleProQuotaPaid = (
  currentModel: string = DEFAULT_GEMINI_MODEL,
@@ -53,7 +53,7 @@ const RATE_LIMIT_ERROR_MESSAGE_VERTEX =
 const getRateLimitErrorMessageDefault = (
  fallbackModel: string = DEFAULT_GEMINI_FLASH_MODEL,
 ) =>
-  `\nSlow response times detected. Switching to the ${fallbackModel} model for the rest of this session.`;
+  `\nPossible quota limitations in place or slow response times detected. Switching to the ${fallbackModel} model for the rest of this session.`;

 function getRateLimitMessage(
  authType?: AuthType,